diff --git a/parse.py b/parse.py index 1d93a72..8ebe94c 100644 --- a/parse.py +++ b/parse.py @@ -1,9 +1,10 @@ +import enum from pathlib import Path from datetime import datetime import re import json -entry_re = re.compile(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', re.MULTILINE) +entry_re = re.compile(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ?', re.MULTILINE) curr_day = '' @@ -143,25 +144,137 @@ entry_modules = { 'notify': create_entry_module_parser('notify', parse_notify), } +from functools import reduce, partial + +def split_keep(delims, string): + res = [] + buf = [] + for c in string: + if c in delims: + if buf: + res.append(''.join(buf)) + res.append(c) + buf = [] + else: + buf.append(c) + if buf: + res.append(''.join(buf)) + return res + +assert split_keep(['@', '\n'], 'hello @world\n\nabout') == ['hello ', '@', 'world', '\n', '\n', 'about'] + +def merge_chars(chars, l): + res = [] + for i in l: + if i in chars and res and all(c == i for c in res[-1]): + res[-1] += i + else: + res.append(i) + return res + +assert merge_chars('\n', ['\n', '\n', 'hello', 'world', '\n', '\n']) == ['\n\n', 'hello', 'world', '\n\n'] + +def attach_to_next(c, l): + l = l.copy() + + try: + while True: + i = l.index(c) + l[i+1] = c + l[i+1] + l.pop(i) + except: + pass + + return l + +assert attach_to_next('@', ['aoeu', '@', 'oeu']) == ['aoeu', '@oeu'] + +def attach_to_prev_if(pred, l): + res = [] + + for i, curr in enumerate(l): + prev = l[i-1] if i-1 >= 0 else None + if prev and pred(prev, curr): + res[-1] += curr + else: + res.append(curr) + + return res + +assert attach_to_prev_if(lambda p, c: p[-1] != '\n' and c[0] == '@', ['aoeu', '@oeu']) == ['aoeu@oeu'] + +def merge_notes_block(l): + res = [] + + i = 0 + while i < len(l): + if l[i] == '@notes': + # notes nl source nl title + res.append('\n'.join([l[i], l[i+2], l[i+4]])) + i += 5 + else: + res.append(l[i]) + i += 1 + + return res + +def merge_wrapped_lines(l): + res = [] + + i = 0 + while i < len(l): + curr = l[i] + prev = l[i-1] if i > 0 else None + next = l[i+1] if i+1 < len(l) else None + + if prev and next and curr == '\n': + len_prev = len(prev) + if i == 1: + len_prev += len('2020-02-02 02:02:02 ') + + if not next[0].isspace(): + next_word = next.split()[0] + if len_prev + len(next_word) >= 80: + res[-1] += ' ' + next + i += 2 + continue + + res.append(curr) + i += 1 + + return res + +def apply(f, x): + return f(x) + +def flip(f): + return lambda a1, a2: f(a2, a1) + def parse_entry(entry): result = {} def split_into_blocks(text): - result = [] - - for block in re.split(r'\n{2,}', text): - block = block.strip() - if not block: - continue - - for i, module in enumerate(block.replace(' @', '\n@').split('\n@')): - #module = module.strip().replace('\n', ' ') - if i == 0: - result.append(module) - else: - result.append('@'+module) - - return result + r = reduce(flip(apply), [ + # split the text into sections by newline and tag symbol, keeping the separators + partial(split_keep, ('\n', '@')), + # merge sequential newlines together into a single whitespace block + partial(merge_chars, '\n'), + # attach escaped tag symbols + partial(attach_to_prev_if, lambda p, c: c == '@' and p[-1] == '\\'), + # attach tag symbols + partial(attach_to_next, '@'), + # ??? + partial(attach_to_prev_if, lambda p, c: p[-1] != '\n' and not (p[0] == '@' and p[-1] == ' ') and c[0] == '@'), + # yes + merge_notes_block, + # strip all non-whitespace blocks + partial(map, lambda s: s if s.isspace() else s.rstrip()), list, + # yes + merge_wrapped_lines, + # remove trailing whitespace block + lambda b: b if b and not all(c == '\n' for c in b[-1]) else b[:-1], + ], text) + return r timestamp, content = entry @@ -169,7 +282,7 @@ def parse_entry(entry): result['blocks'] = [] for b in split_into_blocks(content): - if b[0] == '@': + if b.startswith('@'): tag = b.split()[0][1:] result['blocks'].append(entry_modules[tag](b)) else: @@ -177,20 +290,23 @@ def parse_entry(entry): return result -result = {} - -for fpath in list(sorted((Path.home() / 'workspace' / 'journal').glob('*.md'))): - curr_day = fpath.stem - - header, *tmp = entry_re.split(fpath.read_text()) +def parse_page(text): + header, *tmp = entry_re.split(text) entries = list(zip(tmp[::2], tmp[1::2])) - result[fpath.stem] = { + return { 'header': parse_header(header), 'entries': [parse_entry(e) for e in entries], } -script_path = Path(__file__).parent +if __name__ == '__main__': + result = {} -with open(script_path / 'journal.json', 'w') as fp: - json.dump(result, fp, indent=4, ensure_ascii=False) + for fpath in list(sorted((Path.home() / 'workspace' / 'journal').glob('*.md'))): + day = parse_page(fpath.read_text()) + result[fpath.stem] = day + + script_path = Path(__file__).parent + + with open(script_path / 'journal.json', 'w') as fp: + json.dump(result, fp, indent=4, ensure_ascii=False)