Tymec commited on
Commit
632adc4
1 Parent(s): ac221ce

Add progress bar to serialize

Browse files
Files changed (1) hide show
  1. app/utils.py +9 -2
app/utils.py CHANGED
@@ -11,16 +11,23 @@ if TYPE_CHECKING:
11
  __all__ = ["serialize", "deserialize"]
12
 
13
 
14
- def serialize(data: Sequence[str], path: Path, max_size: int = 100000) -> None:
15
  """Serialize data to a file
16
 
17
  Args:
18
  data: The data to serialize
19
  path: The path to save the serialized data
20
  max_size: The maximum size a chunk can be (in elements)
 
21
  """
22
  # first file is path, next chunks have ".1", ".2", etc. appended
23
- for i, chunk in enumerate(tqdm([data[i : i + max_size] for i in range(0, len(data), max_size)])):
 
 
 
 
 
 
24
  fd = path.with_suffix(f".{i}.pkl" if i else ".pkl")
25
  with fd.open("wb") as f:
26
  joblib.dump(chunk, f, compress=3)
 
11
  __all__ = ["serialize", "deserialize"]
12
 
13
 
14
+ def serialize(data: Sequence[str], path: Path, max_size: int = 100000, show_progress: bool = False) -> None:
15
  """Serialize data to a file
16
 
17
  Args:
18
  data: The data to serialize
19
  path: The path to save the serialized data
20
  max_size: The maximum size a chunk can be (in elements)
21
+ show_progress: Whether to show a progress bar
22
  """
23
  # first file is path, next chunks have ".1", ".2", etc. appended
24
+ for i, chunk in enumerate(
25
+ tqdm(
26
+ [data[i : i + max_size] for i in range(0, len(data), max_size)],
27
+ unit="chunk",
28
+ disable=not show_progress,
29
+ ),
30
+ ):
31
  fd = path.with_suffix(f".{i}.pkl" if i else ".pkl")
32
  with fd.open("wb") as f:
33
  joblib.dump(chunk, f, compress=3)