Spaces:
Running
Running
Fix count lines
Browse files- dataset.py +1 -9
dataset.py
CHANGED
|
@@ -1,17 +1,9 @@
|
|
| 1 |
from torch.utils.data import IterableDataset
|
| 2 |
|
| 3 |
|
| 4 |
-
def blocks(files, size=65536):
|
| 5 |
-
while True:
|
| 6 |
-
b = files.read(size)
|
| 7 |
-
if not b:
|
| 8 |
-
break
|
| 9 |
-
yield b
|
| 10 |
-
|
| 11 |
-
|
| 12 |
def count_lines(input_path: str) -> int:
|
| 13 |
with open(input_path, "r", encoding="utf8") as f:
|
| 14 |
-
return sum(
|
| 15 |
|
| 16 |
|
| 17 |
class DatasetReader(IterableDataset):
|
|
|
|
| 1 |
from torch.utils.data import IterableDataset
|
| 2 |
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
def count_lines(input_path: str) -> int:
|
| 5 |
with open(input_path, "r", encoding="utf8") as f:
|
| 6 |
+
return sum(1 for _ in f)
|
| 7 |
|
| 8 |
|
| 9 |
class DatasetReader(IterableDataset):
|