journal/parse.py

from pathlib import Path
from datetime import datetime
import re
import json

entry_re = re.compile(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', re.MULTILINE)

curr_day = ''


def parse_godword(godword):
    return godword.split()

def parse_habits(habits):
    result = {}
    for habit in habits.splitlines():
        value, name = habit.split(maxsplit=1)
        name = name.strip()
        result[name] = value[1] == 'x'
    return result

def parse_notifications(notifications):
    result = []
    for notification in notifications.splitlines():
        parts = notification.split()
        result.append({
            'source': ' '.join(parts[0:2]).strip('[]'),
            'message': ' '.join(parts[2:]),
        })
    return result


header_modules = {
    'godword': parse_godword,
    'habits': parse_habits,
    'notifications': parse_notifications,
}

def parse_header(header):
    result = {}

    def split_into_blocks(text):
        return [b.strip() for b in re.split(r'\n{2,}', text) if b.strip() != '']

    title, *modules = split_into_blocks(header)

    for module in modules:
        print(module)
        name, value = module.split('\n', maxsplit=1)
        name = name.lower().removesuffix(':')
        result[name] = header_modules[name](value)

    return result

def parse_timestamp(timestamp):
    return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')

def parse_post(block):
    block = block.removeprefix('@post ')
    try:
        timestamp = int(parse_timestamp(block[:19]).timestamp())
        block = block[19:]
    except:
        timestamp = None

    content = block.strip()

    result = {}
    if content:
        result['content'] = content
    if timestamp:
        result['timestamp'] = timestamp
    return result

def parse_notes(block):
    tag, source, title = block.splitlines()
    return {'source': source, 'title': title}

def parse_diet(block):
    tag, amount, food = block.split()
    amount = int(amount.removesuffix('g'))
    return {'amount': amount, 'food': food}

def parse_timer(block):
    tag, *rest = block.split()

    name = None
    timestamp = None
    if len(rest) > 2:
        name, *rest = rest
    if len(rest) > 1:
        timestamp = int(parse_timestamp(' '.join(rest)).timestamp())

    result = {}
    if name:
        result['name'] = name
    if timestamp:
        result['timestamp'] = timestamp
    return result

def parse_exercise(block):
    tag, *parts = block.split()

    if parts[0] == 'walk':
        kind, minutes, distance, steps = parts
        return {
            'kind': kind,
            'minutes': int(minutes.removesuffix('min')),
            'distance': float(distance.removesuffix('km')),
            'steps': int(steps.removesuffix('steps')),
        }

    return {'kind': 'INVALID'}

def parse_notify(block):
    tag, day, *rest = block.split()

    return {'day': day.strip(), 'message': ' '.join(rest)}

def create_entry_module_parser(name, handler=None):
    handler = handler or (lambda b: {'value': b.removeprefix(f'@{name} ')})
    return lambda b: {'type': name} | handler(b)

entry_modules = {
    'hide': create_entry_module_parser('hide', lambda _: {}),
    'post': create_entry_module_parser('post', parse_post),
    'info': create_entry_module_parser('info'),
    'notes': create_entry_module_parser('notes', parse_notes),
    'behavior': create_entry_module_parser('behavior'),
    'diet': create_entry_module_parser('diet', parse_diet),
    'task': create_entry_module_parser('task'),
    'start': create_entry_module_parser('start', parse_timer),
    'stop': create_entry_module_parser('stop', parse_timer),
    'done': create_entry_module_parser('done', parse_timer),
    'exercise': create_entry_module_parser('exercise', parse_exercise),
    'notify': create_entry_module_parser('notify', parse_notify),
}

def parse_entry(entry):
    result = {}

    def split_into_blocks(text):
        result = []

        for block in re.split(r'\n{2,}', text):
            block = block.strip()
            if not block:
                continue

            for i, module in enumerate(block.replace(' @', '\n@').split('\n@')):
                #module = module.strip().replace('\n', ' ')
                if i == 0:
                    result.append(module)
                else:
                    result.append('@'+module)

        return result

    timestamp, content = entry

    result['timestamp'] = int(parse_timestamp(timestamp.strip()).timestamp())
    result['blocks'] = []

    for b in split_into_blocks(content):
        if b[0] == '@':
            tag = b.split()[0][1:]
            result['blocks'].append(entry_modules[tag](b))
        else:
            result['blocks'].append(b)

    return result

result = {}

for fpath in list(sorted((Path.home() / 'workspace' / 'journal').glob('*.md'))):
    curr_day = fpath.stem

    header, *tmp = entry_re.split(fpath.read_text())
    entries = list(zip(tmp[::2], tmp[1::2]))

    result[fpath.stem] = {
        'header': parse_header(header),
        'entries': [parse_entry(e) for e in entries],
    }

script_path = Path(__file__).parent

with open(script_path / 'journal.json', 'w') as fp:
    json.dump(result, fp, indent=4, ensure_ascii=False)