Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import datasets | |
| from pathlib import Path | |
| _DESCRIPTION = "Newsroom validation dataset" | |
| _DOCUMENT = "document" | |
| _ID = "id" | |
| class NewsroomDatasetValidation(datasets.GeneratorBasedBuilder): | |
| VERSION = datasets.Version("1.0.0") | |
| def _info(self): | |
| return datasets.DatasetInfo( | |
| description=_DESCRIPTION, | |
| features=datasets.Features( | |
| { | |
| _DOCUMENT: datasets.Value("string"), | |
| _ID: datasets.Value("string"), | |
| } | |
| ), | |
| ) | |
| def _split_generators(self, dl_manager): | |
| """Returns SplitGenerators.""" | |
| data_dir = dl_manager._data_dir | |
| return [ | |
| datasets.SplitGenerator( | |
| name=datasets.Split.TRAIN, | |
| gen_kwargs={"path": os.path.join(data_dir, "train.jsonl"), "name": "train"} | |
| ), | |
| datasets.SplitGenerator( | |
| name=datasets.Split.VALIDATION, | |
| gen_kwargs={"path": os.path.join(data_dir, "val.jsonl"), "name": "validation"} | |
| ), | |
| ] | |
| def _generate_examples(self, path=None, name=None): | |
| """Yields examples.""" | |
| with open(path, encoding="utf-8") as f: | |
| for i, line in enumerate(f): | |
| x = json.loads(line) | |
| id = x["id"] | |
| item = { | |
| _ID: id, | |
| _DOCUMENT: x["sentence"], | |
| } | |
| yield id, item | |