Spaces:
Running
Running
Ensure utf8
Browse filesNot using utf8 by default should be illegal
- dataset.py +6 -5
dataset.py
CHANGED
|
@@ -27,7 +27,7 @@ class DatasetReader(IterableDataset):
|
|
| 27 |
)
|
| 28 |
|
| 29 |
def __iter__(self):
|
| 30 |
-
file_itr = open(self.filename, "r")
|
| 31 |
mapped_itr = map(self.preprocess, file_itr)
|
| 32 |
return mapped_itr
|
| 33 |
|
|
@@ -56,10 +56,11 @@ class ParallelTextReader(IterableDataset):
|
|
| 56 |
return pred, [gold]
|
| 57 |
|
| 58 |
def __iter__(self):
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
|
|
|
| 63 |
|
| 64 |
def __len__(self):
|
| 65 |
return self.num_sentences
|
|
|
|
| 27 |
)
|
| 28 |
|
| 29 |
def __iter__(self):
|
| 30 |
+
file_itr = open(self.filename, "r", encoding="utf8")
|
| 31 |
mapped_itr = map(self.preprocess, file_itr)
|
| 32 |
return mapped_itr
|
| 33 |
|
|
|
|
| 56 |
return pred, [gold]
|
| 57 |
|
| 58 |
def __iter__(self):
|
| 59 |
+
with open(self.pred_path, "r", encoding="utf8") as pred_itr, open(
|
| 60 |
+
self.gold_path, "r", encoding="utf8"
|
| 61 |
+
) as gold_itr:
|
| 62 |
+
mapped_itr = map(self.preprocess, pred_itr, gold_itr)
|
| 63 |
+
return mapped_itr
|
| 64 |
|
| 65 |
def __len__(self):
|
| 66 |
return self.num_sentences
|