Correct typos in datasets.py (#639)
Browse files- src/axolotl/datasets.py +2 -2
src/axolotl/datasets.py
CHANGED
|
@@ -22,7 +22,7 @@ class TokenizedPromptDataset(Dataset):
|
|
| 22 |
"""
|
| 23 |
Dataset that returns tokenized prompts from a stream of text files.
|
| 24 |
Args:
|
| 25 |
-
prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for
|
| 26 |
dataset (dataset.Dataset): Dataset with text files.
|
| 27 |
"""
|
| 28 |
|
|
@@ -55,7 +55,7 @@ class ConstantLengthDataset(IterableDataset):
|
|
| 55 |
"""
|
| 56 |
Iterable dataset that returns constant length chunks of tokens from stream of text files.
|
| 57 |
Args:
|
| 58 |
-
tokenizer (Tokenizer): The processor used for
|
| 59 |
dataset (dataset.Dataset): Dataset with text files.
|
| 60 |
seq_length (int): Length of token sequences to return.
|
| 61 |
"""
|
|
|
|
| 22 |
"""
|
| 23 |
Dataset that returns tokenized prompts from a stream of text files.
|
| 24 |
Args:
|
| 25 |
+
prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for processing the data.
|
| 26 |
dataset (dataset.Dataset): Dataset with text files.
|
| 27 |
"""
|
| 28 |
|
|
|
|
| 55 |
"""
|
| 56 |
Iterable dataset that returns constant length chunks of tokens from stream of text files.
|
| 57 |
Args:
|
| 58 |
+
tokenizer (Tokenizer): The processor used for processing the data.
|
| 59 |
dataset (dataset.Dataset): Dataset with text files.
|
| 60 |
seq_length (int): Length of token sequences to return.
|
| 61 |
"""
|