| import glob | |
| import json | |
| from sys import argv | |
| for split in ['train', 'valid']: | |
| with open(f'json/{split}.json', 'w') as f: | |
| data = [{'source': glob.glob(f'shard/{split}/*'), 'source_lang': 'en', 'weight': 1.0, 'name': '16gb-en'}] | |
| json.dump(data, f, indent=4) | |