journal/analyze.py

from pathlib import Path
from datetime import datetime
from collections import Counter
from functools import reduce
import re
import string

import sys

def parse_foods_file():
    path = Path.home() / 'projects' / 'open-journal' / 'foods'
    text = path.read_text()
    foods, recipes = text.split('---')

    def parse_macro(macro):
        if macro == '...':
            return ('INVALID', 0.0)

        name, value = macro.split()
        value = float(value.removesuffix('g').removesuffix('kcal'))
        return (name, value)

    foods = {
        macros[0]: dict(parse_macro(macro) for macro in macros[1:])
        for macros in [food.split('\n') for food in foods.strip().split('\n\n')]
    }

    def combine_values(fst, snd):
        result = fst.copy()
        for k,v in snd.items():
            if k in fst:
                result[k] += v
            else:
                result[k] = v
        return result

    def evaluate_ingredients(ingredients):
        result = {}

        total_weight = 0.0
        for ingredient in ingredients:
            k,v = parse_macro(ingredient)
            if k == 'TOTAL':
                result[k] = v
                break
            else:
                total_weight += v


            food = foods[k]

            for kk,vv in food.items():
                if kk not in result:
                    result[kk] = 0.0

                result[kk] += vv * (v/100.0)

        if 'TOTAL' not in result:
            result['TOTAL'] = total_weight

        return result

    recipes = {
        ingredients[0]: evaluate_ingredients(ingredients[1:])
        for ingredients in [
            recipe.split('\n') for recipe in recipes.strip().split('\n\n')
        ]
    }

    def get_calories_from_macros(mm):
        calories = 0.0
        for k,v in mm.items():
            calories += v * {
                'Carbs': 4,
                'Fat': 9,
                'Protein': 4
            }.get(k, 0.0)
        return calories

    #for k,v in foods.items():
    #    print(round(v.get('Energy') - get_calories_from_macros(v)), k)

    return foods, recipes

foods, recipes = parse_foods_file()

if len(sys.argv) > 1:

    value, name = sys.argv[1:]
    value = float(value.removesuffix('g'))

    if name in recipes:
        food = recipes[name]

        if value == 0.0:
            value = food['TOTAL']

        food = {k: v*(value/food['TOTAL']) for k,v in food.items()}
    elif name in foods:
        if value == 0.0:
            value = 100

        food = {k: v*(value/100.0) for k,v in foods[name].items()}
    else:
        breakpoint()
        print(f'ERROR: Invalid diet entry: {content}')

    from pprint import pprint
    pprint(food)

    exit(0)


entry_re = re.compile(r'^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) ', re.MULTILINE)
diet_re = re.compile(r'@diet (\d+g) ([a-zA-Z]+)')

total_entries = 0
total_words = 0
word_frequency = Counter()

total_csv = [['day', 'entries', 'words']]
daily_csv = [['day', 'entries', 'words', 'calories', 'protein']]
entry_csv = [['timestamp', 'words']]
words_csv = [['word', 'count']]

diet_csv = [[
    'timestamp', 'name', 'grams', 'calories', 'carbs', 'fat', 'protein',
    'saturated_fat', 'sugar', 'fiber'
]]

for fpath in sorted((Path.home() / 'workspace' / 'journal').glob('*.md')):
    day = fpath.stem
    header, *tmp = entry_re.split(fpath.read_text())
    entries = list(zip(tmp[::2], tmp[1::2]))

    daily_entries = len(entries)
    daily_words = 0
    daily_calories = 0.0
    daily_protein = 0.0

    for (timestamp, content) in sorted(entries, key=lambda x: x[0]):
        timestamp = int(datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S').timestamp())

        content = '\n'.join(
            part.replace('\n', ' ')
            for part in content.split('\n\n')
        )

        for diet in diet_re.finditer(content):
            value, name = diet.groups()
            value = float(value.removesuffix('g'))

            if name in recipes:
                food = recipes[name]

                if value == 0.0:
                    value = food['TOTAL']

                food = {k: v*(value/food['TOTAL']) for k,v in food.items()}
            elif name in foods:
                if value == 0.0:
                    value = 100

                food = {k: v*(value/100.0) for k,v in foods[name].items()}
            else:
                breakpoint()
                print(f'ERROR: Invalid diet entry: {content}')
                continue


            diet_csv.append((
                timestamp,
                name,
                value,
                round(food.get('Energy', 0.0), 2),
                round(food.get('Carbs', 0.0), 2),
                round(food.get('Fat', 0.0), 2),
                round(food.get('Protein', 0.0), 2),
                round(food.get('SaturatedFat', 0.0), 2),
                round(food.get('Sugar', 0.0), 2),
                round(food.get('Fiber', 0.0), 2),
            ))

            daily_calories += food.get('Energy', 0.0)
            daily_protein += food.get('Protein', 0.0)

        words = ''.join(
            c if c in string.ascii_letters+"'" else ' '
            for c in content.lower()
        ).split()

        word_frequency.update(words)

        entry_words = len(words)
        daily_words += entry_words

        entry_csv.append([timestamp, entry_words])

    daily_csv.append([day, daily_entries, daily_words, daily_calories,
        daily_protein])

    total_entries += daily_entries
    total_words += daily_words

    total_csv.append([day, total_entries, total_words])

words_csv += word_frequency.most_common()

def write_csv(fname, csv):
    with open(fname, 'w') as fp:
        fp.write('\n'.join(','.join(str(x) for x in row) for row in csv))

write_csv('total.csv', total_csv)
write_csv('daily.csv', daily_csv)
write_csv('entry.csv', entry_csv)
write_csv('words.csv', words_csv)
write_csv('diet.csv', diet_csv)