Added pretraining datasets from open llama v2 to README
Browse files
README.md
CHANGED
@@ -6,6 +6,9 @@ datasets:
|
|
6 |
- sahil2801/code_instructions_120k
|
7 |
- medalpaca/medical_meadow_mediqa
|
8 |
- kaiokendev/SuperCOT-dataset
|
|
|
|
|
|
|
9 |
language:
|
10 |
- en
|
11 |
library_name: transformers
|
@@ -163,4 +166,4 @@ special_tokens:
|
|
163 |
eos_token: "</s>"
|
164 |
unk_token: "<unk>"
|
165 |
```
|
166 |
-
</details>
|
|
|
6 |
- sahil2801/code_instructions_120k
|
7 |
- medalpaca/medical_meadow_mediqa
|
8 |
- kaiokendev/SuperCOT-dataset
|
9 |
+
- tiiuae/falcon-refinedweb
|
10 |
+
- bigcode/starcoderdata
|
11 |
+
- togethercomputer/RedPajama-Data-1T
|
12 |
language:
|
13 |
- en
|
14 |
library_name: transformers
|
|
|
166 |
eos_token: "</s>"
|
167 |
unk_token: "<unk>"
|
168 |
```
|
169 |
+
</details>
|