This commit is contained in:
olari
2021-06-01 11:38:57 +03:00
parent a10d3e1326
commit 6e07d120a2
3 changed files with 150 additions and 86 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,5 @@
**/*.csv **/*.csv
.ipynb_checkpoints/ .ipynb_checkpoints/
__pycache__/ __pycache__/
diet
journal.json

10
foods
View File

@@ -1,4 +1,14 @@
Kaurakeksi
Energy 461kcal
Fat 20g
SaturatedFat 8.5g
Carbs 56g
Sugar 31g
Fiber 4.8g
Protein 7.9g
Salt 0.2g
Cereal Cereal
Energy 373kcal Energy 373kcal
Fat 1.4g Fat 1.4g

224
parse.py
View File

@@ -1,110 +1,162 @@
from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from subprocess import run from datetime import datetime
from typing import Union
import sys
import re import re
import json
from functools import reduce
JOURNAL_PATH = '/Users/olari/workspace/journal' entry_re = re.compile(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', re.MULTILINE)
entry_re = re.compile(r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') curr_day = ''
def get_daily_file_paths() -> list[Path]:
return list(sorted(Path(JOURNAL_PATH).glob('*.md')))
def resolve_relative_time_expression(now: datetime, expr: str) -> datetime: def parse_godword(godword):
weekdays = [ return godword.split()
'monday', 'tuesday', 'wednesday', 'thursday',
'friday', 'saturday', 'sunday'
]
if expr == 'today': def parse_habits(habits):
return now result = {}
elif expr == 'tomorrow': for habit in habits.splitlines():
return now + timedelta(days=1) value, name = habit.split(maxsplit=1)
elif expr == 'yesterday': name = name.strip()
return now - timedelta(days=1) result[name] = value[1] == 'x'
elif expr in weekdays: return result
return now - timedelta(days=now.weekday()) + weekdays.index(expr)
else:
return None
def parse_godword(content: str) -> list[str]:
return content.split()
def parse_habits(content: str) -> dict[str, bool]:
return {
line[4:]: line[1] == 'x'
for line in content.splitlines()
}
header_modules = { header_modules = {
'godword': (None, parse_godword), 'godword': parse_godword,
'habits': (None, parse_habits), 'habits': parse_habits,
} }
def parse_header_module(content: str) -> tuple[str, Union[list, dict]]:
name, *content = content.splitlines()
name = name.removesuffix(':').lower()
content = '\n'.join(content)
_, parse = header_modules[name]
return name, parse(content)
def parse_header(header): def parse_header(header):
title, *modules = header.split('\n\n') result = {}
title = title.removeprefix('# ')
return {
'title': title,
'modules': dict(parse_header_module(module) for module in modules)
}
def parse_diet(content): def split_into_blocks(text):
pass return [b.strip() for b in re.split(r'\n{2,}', text) if b.strip() != '']
def parse_content(content): title, *modules = split_into_blocks(header)
content = content.strip()
return { for module in modules:
'blocks': [b.replace('\n', ' ') for b in content.split('\n\n')] name, value = module.split('\n', maxsplit=1)
} name = name.lower().removesuffix(':')
result[name] = header_modules[name](value)
return result
def parse_timestamp(timestamp): def parse_timestamp(timestamp):
return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S') return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
def parse_post(block):
block = block.removeprefix('@post ')
try:
timestamp = int(parse_timestamp(block[:19]).timestamp())
block = block[19:]
except:
timestamp = None
content = block
return {'timestamp': timestamp, 'content': content}
def parse_notes(block):
tag, source, title = block.splitlines()
return {'source': source, 'title': title}
def parse_diet(block):
tag, amount, food = block.split()
amount = float(amount.removesuffix('g'))
return {'amount': amount, 'food': food}
def parse_timer(block):
tag, *rest = block.split()
name = None
timestamp = None
if len(rest) > 2:
name, *rest = rest
if len(rest) > 1:
timestamp = int(parse_timestamp(' '.join(rest)).timestamp())
result = {}
if name:
result['name'] = name
if timestamp:
result['timestamp'] = timestamp
return result
def parse_exercise(block):
tag, *parts = block.split()
if parts[0] == 'walk':
kind, minutes, distance, steps = parts
return {
'kind': kind,
'minutes': int(minutes.removesuffix('min')),
'distance': float(distance.removesuffix('km')),
'steps': int(steps.removesuffix('steps')),
}
return {'kind': 'INVALID'}
def create_entry_module_parser(name, handler=None):
handler = handler or (lambda b: {'value': b.removeprefix(f'@{name} ')})
return lambda b: {'type': name} | handler(b)
entry_modules = {
'hide': create_entry_module_parser('hide', lambda _: {}),
'post': create_entry_module_parser('post', parse_post),
'info': create_entry_module_parser('info'),
'notes': create_entry_module_parser('notes', parse_notes),
'behavior': create_entry_module_parser('behavior'),
'diet': create_entry_module_parser('diet', parse_diet),
'task': create_entry_module_parser('task'),
'start': create_entry_module_parser('start', parse_timer),
'stop': create_entry_module_parser('stop', parse_timer),
'done': create_entry_module_parser('done', parse_timer),
'exercise': create_entry_module_parser('exercise', parse_exercise),
}
def parse_entry(entry): def parse_entry(entry):
return {
'timestamp': int(parse_timestamp(entry[:19]).timestamp()),
'content': parse_content(entry[19:]),
}
def parse_file(fpath):
header = {}
entries = []
buf = []
for i, line in enumerate(fpath.read_text().splitlines()):
if entry_re.match(line):
if not header:
header = parse_header('\n'.join([c.strip() for c in buf]))
else:
entries.append(parse_entry('\n'.join(buf)))
buf = [line]
else:
buf.append(line)
return {
'header': header,
'entries': entries,
}
def parse_journal():
result = {} result = {}
for fpath in get_daily_file_paths()[-5:]: def split_into_blocks(text):
info = parse_file(fpath) result = []
result[info['header']['title']] = info
for block in re.split(r'\n{2,}', text):
block = block.strip()
if not block:
continue
for i, module in enumerate(block.replace(' @', '\n@').split('\n@')):
#module = module.strip().replace('\n', ' ')
if i == 0:
result.append(module)
else:
result.append('@'+module)
return result return result
import json timestamp, content = entry
open('journal.json', 'w').write(json.dumps(parse_journal()))
result['timestamp'] = int(parse_timestamp(timestamp.strip()).timestamp())
result['blocks'] = []
for b in split_into_blocks(content):
if b[0] == '@':
tag = b.split()[0][1:]
result['blocks'].append(entry_modules[tag](b))
else:
result['blocks'].append(b)
return result
result = {}
for fpath in list(sorted((Path.home() / 'workspace' / 'journal').glob('*.md'))):
curr_day = fpath.stem
header, *tmp = entry_re.split(fpath.read_text())
entries = list(zip(tmp[::2], tmp[1::2]))
result[fpath.stem] = {
'header': parse_header(header),
'entries': [parse_entry(e) for e in entries],
}
with open('journal.json', 'w') as fp:
json.dump(result, fp, indent=4, ensure_ascii=False)