Fairseq
English
File size: 278 Bytes
f5feb4c
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
import glob
import json
from sys import argv

for split in ['train', 'valid']:
    with open(f'json/{split}.json', 'w') as f:
        data = [{'source': glob.glob(f'shard/{split}/*'), 'source_lang': 'en', 'weight': 1.0, 'name': '16gb-en'}]
        json.dump(data, f, indent=4)