Spaces:
Build error
Build error
meg-huggingface
commited on
Commit
·
4f4c0c4
1
Parent(s):
9f6cc2b
tokenized df bug
Browse files
data_measurements/dataset_statistics.py
CHANGED
|
@@ -455,7 +455,7 @@ class DatasetStatisticsCacheClass:
|
|
| 455 |
self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
|
| 456 |
else:
|
| 457 |
logs.info("Calculating vocab afresh")
|
| 458 |
-
if
|
| 459 |
self.tokenized_df = self.do_tokenization()
|
| 460 |
if save:
|
| 461 |
logs.info("Writing out.")
|
|
|
|
| 455 |
self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
|
| 456 |
else:
|
| 457 |
logs.info("Calculating vocab afresh")
|
| 458 |
+
if self.tokenized_df is None:
|
| 459 |
self.tokenized_df = self.do_tokenization()
|
| 460 |
if save:
|
| 461 |
logs.info("Writing out.")
|