import json from pathlib import Path idx = 0 result = [] for part in sorted(Path('raw/').glob('*.json')): for item in json.loads(part.read_text())['value']: m = item['metaInfo'] mc = item['metaInfoCollections'] assert len(mc['levelinfo']) == 1 assert len(mc['tags']) < 3 # just type and usually key result.append({ 'title': item['title'], 'composer': ' & '.join([i['title'] for i in mc['pieceBys']]), 'year': int(m['year']) if m['year'] else -1, 'type': m['scoretype'].removeprefix('label_score-'), 'genre': m['genreName'].removeprefix('label_oktavgenre-'), 'key': m['tone'].removeprefix('label_'), 'pages': int(m['pageExtent']), 'level': int(mc['levelinfo'][0]['metaInfo']['level']), 'index': idx, }) idx += 1 #types = set() #genres = set() #keys = set() # #for item in result: # types.add(item['type']) # genres.add(item['genre']) # keys.add(item['key']) # #print(types) #print(genres) #print(keys) Path('data.json').write_text(json.dumps(result))