from pathlib import Path from datetime import datetime import re import json entry_re = re.compile(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', re.MULTILINE) curr_day = '' def parse_godword(godword): return godword.split() def parse_habits(habits): result = {} for habit in habits.splitlines(): value, name = habit.split(maxsplit=1) name = name.strip() result[name] = value[1] == 'x' return result def parse_notifications(notifications): result = [] for notification in notifications.splitlines(): parts = notification.split() result.append({ 'source': ' '.join(parts[0:2]).strip('[]'), 'message': ' '.join(parts[2:]), }) return result header_modules = { 'godword': parse_godword, 'habits': parse_habits, 'notifications': parse_notifications, } def parse_header(header): result = {} def split_into_blocks(text): return [b.strip() for b in re.split(r'\n{2,}', text) if b.strip() != ''] title, *modules = split_into_blocks(header) for module in modules: name, value = module.split('\n', maxsplit=1) name = name.lower().removesuffix(':') result[name] = header_modules[name](value) return result def parse_timestamp(timestamp): return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S') def parse_post(block): block = block.removeprefix('@post ') try: timestamp = int(parse_timestamp(block[:19]).timestamp()) block = block[19:] except: timestamp = None content = block.strip() result = {} if content: result['content'] = content if timestamp: result['timestamp'] = timestamp return result def parse_notes(block): tag, source, title = block.splitlines() return {'source': source, 'title': title} def parse_diet(block): tag, amount, food = block.split() amount = int(amount.removesuffix('g')) return {'amount': amount, 'food': food} def parse_timer(block): tag, *rest = block.split() name = None timestamp = None if len(rest) > 2: name, *rest = rest if len(rest) > 1: timestamp = int(parse_timestamp(' '.join(rest)).timestamp()) result = {} if name: result['name'] = name if timestamp: result['timestamp'] = timestamp return result def parse_exercise(block): tag, *parts = block.split() if parts[0] == 'walk': kind, minutes, distance, steps = parts return { 'kind': kind, 'minutes': int(minutes.removesuffix('min')), 'distance': float(distance.removesuffix('km')), 'steps': int(steps.removesuffix('steps')), } return {'kind': 'INVALID'} def parse_notify(block): tag, day, *rest = block.split() return {'day': day.strip(), 'message': ' '.join(rest)} def create_entry_module_parser(name, handler=None): handler = handler or (lambda b: {'value': b.removeprefix(f'@{name} ')}) return lambda b: {'type': name} | handler(b) entry_modules = { 'hide': create_entry_module_parser('hide', lambda _: {}), 'post': create_entry_module_parser('post', parse_post), 'info': create_entry_module_parser('info'), 'notes': create_entry_module_parser('notes', parse_notes), 'behavior': create_entry_module_parser('behavior'), 'diet': create_entry_module_parser('diet', parse_diet), 'task': create_entry_module_parser('task'), 'start': create_entry_module_parser('start', parse_timer), 'stop': create_entry_module_parser('stop', parse_timer), 'done': create_entry_module_parser('done', parse_timer), 'exercise': create_entry_module_parser('exercise', parse_exercise), 'notify': create_entry_module_parser('notify', parse_notify), } def parse_entry(entry): result = {} def split_into_blocks(text): result = [] for block in re.split(r'\n{2,}', text): block = block.strip() if not block: continue for i, module in enumerate(block.replace(' @', '\n@').split('\n@')): #module = module.strip().replace('\n', ' ') if i == 0: result.append(module) else: result.append('@'+module) return result timestamp, content = entry result['timestamp'] = int(parse_timestamp(timestamp.strip()).timestamp()) result['blocks'] = [] for b in split_into_blocks(content): if b[0] == '@': tag = b.split()[0][1:] result['blocks'].append(entry_modules[tag](b)) else: result['blocks'].append(b) return result result = {} for fpath in list(sorted((Path.home() / 'workspace' / 'journal').glob('*.md'))): curr_day = fpath.stem header, *tmp = entry_re.split(fpath.read_text()) entries = list(zip(tmp[::2], tmp[1::2])) result[fpath.stem] = { 'header': parse_header(header), 'entries': [parse_entry(e) for e in entries], } with open('journal.json', 'w') as fp: json.dump(result, fp, indent=4, ensure_ascii=False)