Update model
Browse files- README.md +5 -6
- config.json +2 -3
- model.safetensors +1 -1
- tokenizer.json +1 -6
README.md
CHANGED
@@ -73,7 +73,7 @@ widget:
|
|
73 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
|
74 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
|
75 |
- [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
|
76 |
-
- License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-
|
77 |
|
78 |
## Recommended Prompt Format
|
79 |
|
@@ -91,7 +91,7 @@ widget:
|
|
91 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
92 |
import torch
|
93 |
|
94 |
-
model_path = "Felladrin/TinyMistral-248M-Chat-
|
95 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
96 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
97 |
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
|
@@ -178,7 +178,6 @@ llamafactory-cli train \
|
|
178 |
--preprocessing_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
179 |
--dataloader_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
180 |
--finetuning_type full \
|
181 |
-
--template default \
|
182 |
--flash_attn auto \
|
183 |
--enable_liger_kernel True \
|
184 |
--dataset_dir data \
|
@@ -188,15 +187,15 @@ llamafactory-cli train \
|
|
188 |
--num_train_epochs 2.0 \
|
189 |
--per_device_train_batch_size 4 \
|
190 |
--gradient_accumulation_steps 4 \
|
191 |
-
--lr_scheduler_type
|
192 |
--max_grad_norm 1.0 \
|
193 |
--logging_steps 10 \
|
194 |
--save_steps 50 \
|
195 |
--save_total_limit 1 \
|
196 |
--warmup_ratio 0.1 \
|
197 |
--packing False \
|
198 |
-
--report_to
|
199 |
-
--output_dir ~/TinyMistral-248M-Chat-
|
200 |
--pure_bf16 True \
|
201 |
--plot_loss True \
|
202 |
--trust_remote_code True \
|
|
|
73 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
|
74 |
- [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
|
75 |
- [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
|
76 |
+
- License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v4/resolve/main/license.txt)
|
77 |
|
78 |
## Recommended Prompt Format
|
79 |
|
|
|
91 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
92 |
import torch
|
93 |
|
94 |
+
model_path = "Felladrin/TinyMistral-248M-Chat-v4"
|
95 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
96 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
97 |
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)
|
|
|
178 |
--preprocessing_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
179 |
--dataloader_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
|
180 |
--finetuning_type full \
|
|
|
181 |
--flash_attn auto \
|
182 |
--enable_liger_kernel True \
|
183 |
--dataset_dir data \
|
|
|
187 |
--num_train_epochs 2.0 \
|
188 |
--per_device_train_batch_size 4 \
|
189 |
--gradient_accumulation_steps 4 \
|
190 |
+
--lr_scheduler_type linear \
|
191 |
--max_grad_norm 1.0 \
|
192 |
--logging_steps 10 \
|
193 |
--save_steps 50 \
|
194 |
--save_total_limit 1 \
|
195 |
--warmup_ratio 0.1 \
|
196 |
--packing False \
|
197 |
+
--report_to tensorboard \
|
198 |
+
--output_dir ~/TinyMistral-248M-Chat-v4 \
|
199 |
--pure_bf16 True \
|
200 |
--plot_loss True \
|
201 |
--trust_remote_code True \
|
config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "Felladrin/TinyMistral-248M-Chat-v3",
|
3 |
"architectures": ["MistralForCausalLM"],
|
4 |
"attention_dropout": 0.0,
|
5 |
"bos_token_id": 32000,
|
@@ -19,8 +18,8 @@
|
|
19 |
"sliding_window": null,
|
20 |
"tie_word_embeddings": false,
|
21 |
"torch_dtype": "bfloat16",
|
22 |
-
"transformers_version": "4.
|
23 |
-
"use_cache":
|
24 |
"use_sliding_window": false,
|
25 |
"vocab_size": 32005
|
26 |
}
|
|
|
1 |
{
|
|
|
2 |
"architectures": ["MistralForCausalLM"],
|
3 |
"attention_dropout": 0.0,
|
4 |
"bos_token_id": 32000,
|
|
|
18 |
"sliding_window": null,
|
19 |
"tie_word_embeddings": false,
|
20 |
"torch_dtype": "bfloat16",
|
21 |
+
"transformers_version": "4.50.0",
|
22 |
+
"use_cache": false,
|
23 |
"use_sliding_window": false,
|
24 |
"vocab_size": 32005
|
25 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 496060688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da09172da13d6da1727beb0cef6c42e3fbc99bd3d9bdfedc0df8f5b2746c02a0
|
3 |
size 496060688
|
tokenizer.json
CHANGED
@@ -1,11 +1,6 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
"direction": "Right",
|
5 |
-
"max_length": 1536,
|
6 |
-
"strategy": "LongestFirst",
|
7 |
-
"stride": 0
|
8 |
-
},
|
9 |
"padding": null,
|
10 |
"added_tokens": [
|
11 |
{
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": null,
|
|
|
|
|
|
|
|
|
|
|
4 |
"padding": null,
|
5 |
"added_tokens": [
|
6 |
{
|