Files
journal/parse.py
2021-06-19 11:50:19 +03:00

190 lines
5.1 KiB
Python

from pathlib import Path
from datetime import datetime
import re
import json
entry_re = re.compile(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', re.MULTILINE)
curr_day = ''
def parse_godword(godword):
return godword.split()
def parse_habits(habits):
result = {}
for habit in habits.splitlines():
value, name = habit.split(maxsplit=1)
name = name.strip()
result[name] = value[1] == 'x'
return result
def parse_notifications(notifications):
result = []
for notification in notifications.splitlines():
parts = notification.split()
result.append({
'source': ' '.join(parts[0:2]).strip('[]'),
'message': ' '.join(parts[2:]),
})
return result
header_modules = {
'godword': parse_godword,
'habits': parse_habits,
'notifications': parse_notifications,
}
def parse_header(header):
result = {}
def split_into_blocks(text):
return [b.strip() for b in re.split(r'\n{2,}', text) if b.strip() != '']
title, *modules = split_into_blocks(header)
for module in modules:
print(module)
name, value = module.split('\n', maxsplit=1)
name = name.lower().removesuffix(':')
result[name] = header_modules[name](value)
return result
def parse_timestamp(timestamp):
return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
def parse_post(block):
block = block.removeprefix('@post ')
try:
timestamp = int(parse_timestamp(block[:19]).timestamp())
block = block[19:]
except:
timestamp = None
content = block.strip()
result = {}
if content:
result['content'] = content
if timestamp:
result['timestamp'] = timestamp
return result
def parse_notes(block):
tag, source, title = block.splitlines()
return {'source': source, 'title': title}
def parse_diet(block):
tag, amount, food = block.split()
amount = int(amount.removesuffix('g'))
return {'amount': amount, 'food': food}
def parse_timer(block):
tag, *rest = block.split()
name = None
timestamp = None
if len(rest) > 2:
name, *rest = rest
if len(rest) > 1:
timestamp = int(parse_timestamp(' '.join(rest)).timestamp())
result = {}
if name:
result['name'] = name
if timestamp:
result['timestamp'] = timestamp
return result
def parse_exercise(block):
tag, *parts = block.split()
if parts[0] == 'walk':
kind, minutes, distance, steps = parts
return {
'kind': kind,
'minutes': int(minutes.removesuffix('min')),
'distance': float(distance.removesuffix('km')),
'steps': int(steps.removesuffix('steps')),
}
return {'kind': 'INVALID'}
def parse_notify(block):
tag, day, *rest = block.split()
return {'day': day.strip(), 'message': ' '.join(rest)}
def create_entry_module_parser(name, handler=None):
handler = handler or (lambda b: {'value': b.removeprefix(f'@{name} ')})
return lambda b: {'type': name} | handler(b)
entry_modules = {
'hide': create_entry_module_parser('hide', lambda _: {}),
'post': create_entry_module_parser('post', parse_post),
'info': create_entry_module_parser('info'),
'notes': create_entry_module_parser('notes', parse_notes),
'behavior': create_entry_module_parser('behavior'),
'diet': create_entry_module_parser('diet', parse_diet),
'task': create_entry_module_parser('task'),
'start': create_entry_module_parser('start', parse_timer),
'stop': create_entry_module_parser('stop', parse_timer),
'done': create_entry_module_parser('done', parse_timer),
'exercise': create_entry_module_parser('exercise', parse_exercise),
'notify': create_entry_module_parser('notify', parse_notify),
}
def parse_entry(entry):
result = {}
def split_into_blocks(text):
result = []
for block in re.split(r'\n{2,}', text):
block = block.strip()
if not block:
continue
for i, module in enumerate(block.replace(' @', '\n@').split('\n@')):
#module = module.strip().replace('\n', ' ')
if i == 0:
result.append(module)
else:
result.append('@'+module)
return result
timestamp, content = entry
result['timestamp'] = int(parse_timestamp(timestamp.strip()).timestamp())
result['blocks'] = []
for b in split_into_blocks(content):
if b[0] == '@':
tag = b.split()[0][1:]
result['blocks'].append(entry_modules[tag](b))
else:
result['blocks'].append(b)
return result
result = {}
for fpath in list(sorted((Path.home() / 'workspace' / 'journal').glob('*.md'))):
curr_day = fpath.stem
header, *tmp = entry_re.split(fpath.read_text())
entries = list(zip(tmp[::2], tmp[1::2]))
result[fpath.stem] = {
'header': parse_header(header),
'entries': [parse_entry(e) for e in entries],
}
script_path = Path(__file__).parent
with open(script_path / 'journal.json', 'w') as fp:
json.dump(result, fp, indent=4, ensure_ascii=False)