Spaces:
Runtime error
Runtime error
Add progress bar to serialize
Browse files- app/utils.py +9 -2
app/utils.py
CHANGED
|
@@ -11,16 +11,23 @@ if TYPE_CHECKING:
|
|
| 11 |
__all__ = ["serialize", "deserialize"]
|
| 12 |
|
| 13 |
|
| 14 |
-
def serialize(data: Sequence[str], path: Path, max_size: int = 100000) -> None:
|
| 15 |
"""Serialize data to a file
|
| 16 |
|
| 17 |
Args:
|
| 18 |
data: The data to serialize
|
| 19 |
path: The path to save the serialized data
|
| 20 |
max_size: The maximum size a chunk can be (in elements)
|
|
|
|
| 21 |
"""
|
| 22 |
# first file is path, next chunks have ".1", ".2", etc. appended
|
| 23 |
-
for i, chunk in enumerate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
fd = path.with_suffix(f".{i}.pkl" if i else ".pkl")
|
| 25 |
with fd.open("wb") as f:
|
| 26 |
joblib.dump(chunk, f, compress=3)
|
|
|
|
| 11 |
__all__ = ["serialize", "deserialize"]
|
| 12 |
|
| 13 |
|
| 14 |
+
def serialize(data: Sequence[str], path: Path, max_size: int = 100000, show_progress: bool = False) -> None:
|
| 15 |
"""Serialize data to a file
|
| 16 |
|
| 17 |
Args:
|
| 18 |
data: The data to serialize
|
| 19 |
path: The path to save the serialized data
|
| 20 |
max_size: The maximum size a chunk can be (in elements)
|
| 21 |
+
show_progress: Whether to show a progress bar
|
| 22 |
"""
|
| 23 |
# first file is path, next chunks have ".1", ".2", etc. appended
|
| 24 |
+
for i, chunk in enumerate(
|
| 25 |
+
tqdm(
|
| 26 |
+
[data[i : i + max_size] for i in range(0, len(data), max_size)],
|
| 27 |
+
unit="chunk",
|
| 28 |
+
disable=not show_progress,
|
| 29 |
+
),
|
| 30 |
+
):
|
| 31 |
fd = path.with_suffix(f".{i}.pkl" if i else ".pkl")
|
| 32 |
with fd.open("wb") as f:
|
| 33 |
joblib.dump(chunk, f, compress=3)
|