Update model

Browse files

Files changed (6) hide show

README.md +116 -171
config.json +6 -3
generation_config.json +0 -6
model.safetensors +2 -2
tokenizer.json +0 -0
tokenizer_config.json +8 -0

README.md CHANGED Viewed

@@ -1,170 +1,72 @@
 ---
 language:
-- en
 license: apache-2.0
 base_model: Locutusque/TinyMistral-248M
 datasets:
-- HuggingFaceH4/ultrachat_200k
-- Felladrin/ChatML-ultrachat_200k
-- Open-Orca/OpenOrca
-- Felladrin/ChatML-OpenOrca
-- hkust-nlp/deita-10k-v0
-- Felladrin/ChatML-deita-10k-v0
-- LDJnr/Capybara
-- Felladrin/ChatML-Capybara
-- databricks/databricks-dolly-15k
-- Felladrin/ChatML-databricks-dolly-15k
-- euclaise/reddit-instruct-curated
-- Felladrin/ChatML-reddit-instruct-curated
-- CohereForAI/aya_dataset
-- Felladrin/ChatML-aya_dataset
 pipeline_tag: text-generation
 widget:
-- messages:
-  - role: system
-    content: You are a highly knowledgeable and friendly assistant. Your goal is to
-      understand and respond to user inquiries with clarity. Your interactions are
-      always respectful, helpful, and focused on delivering the most accurate information
-      to the user.
-  - role: user
-    content: Hey! Got a question for you!
-  - role: assistant
-    content: Sure! What's it?
-  - role: user
-    content: What are some potential applications for quantum computing?
-- messages:
-  - role: user
-    content: Heya!
-  - role: assistant
-    content: Hi! How may I help you?
-  - role: user
-    content: I'm interested in developing a career in software engineering. What would
-      you recommend me to do?
-- messages:
-  - role: user
-    content: Morning!
-  - role: assistant
-    content: Good morning! How can I help you today?
-  - role: user
-    content: Could you give me some tips for becoming a healthier person?
-- messages:
-  - role: system
-    content: You are a very creative assistant. User will give you a task, which you
-      should complete with all your knowledge.
-  - role: user
-    content: Hello! Can you please elaborate a background story of an RPG game about
-      wizards and dragons in a sci-fi world?
 inference:
   parameters:
     max_new_tokens: 250
     penalty_alpha: 0.5
     top_k: 5
-model-index:
-- name: TinyMistral-248M-Chat-v2
-  results:
-  - task:
-      type: text-generation
-      name: Text Generation
-    dataset:
-      name: AI2 Reasoning Challenge (25-Shot)
-      type: ai2_arc
-      config: ARC-Challenge
-      split: test
-      args:
-        num_few_shot: 25
-    metrics:
-    - type: acc_norm
-      value: 23.29
-      name: normalized accuracy
-    source:
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/TinyMistral-248M-Chat-v2
-      name: Open LLM Leaderboard
-  - task:
-      type: text-generation
-      name: Text Generation
-    dataset:
-      name: HellaSwag (10-Shot)
-      type: hellaswag
-      split: validation
-      args:
-        num_few_shot: 10
-    metrics:
-    - type: acc_norm
-      value: 27.39
-      name: normalized accuracy
-    source:
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/TinyMistral-248M-Chat-v2
-      name: Open LLM Leaderboard
-  - task:
-      type: text-generation
-      name: Text Generation
-    dataset:
-      name: MMLU (5-Shot)
-      type: cais/mmlu
-      config: all
-      split: test
-      args:
-        num_few_shot: 5
-    metrics:
-    - type: acc
-      value: 23.52
-      name: accuracy
-    source:
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/TinyMistral-248M-Chat-v2
-      name: Open LLM Leaderboard
-  - task:
-      type: text-generation
-      name: Text Generation
-    dataset:
-      name: TruthfulQA (0-shot)
-      type: truthful_qa
-      config: multiple_choice
-      split: validation
-      args:
-        num_few_shot: 0
-    metrics:
-    - type: mc2
-      value: 41.32
-    source:
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/TinyMistral-248M-Chat-v2
-      name: Open LLM Leaderboard
-  - task:
-      type: text-generation
-      name: Text Generation
-    dataset:
-      name: Winogrande (5-shot)
-      type: winogrande
-      config: winogrande_xl
-      split: validation
-      args:
-        num_few_shot: 5
-    metrics:
-    - type: acc
-      value: 49.01
-      name: accuracy
-    source:
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/TinyMistral-248M-Chat-v2
-      name: Open LLM Leaderboard
-  - task:
-      type: text-generation
-      name: Text Generation
-    dataset:
-      name: GSM8k (5-shot)
-      type: gsm8k
-      config: main
-      split: test
-      args:
-        num_few_shot: 5
-    metrics:
-    - type: acc
-      value: 0.0
-      name: accuracy
-    source:
-      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/TinyMistral-248M-Chat-v2
-      name: Open LLM Leaderboard
 ---
-# Locutusque's TinyMistral-248M trained on chat datasets
 - Base model: [Locutusque/TinyMistral-248M](https://huggingface.co/Locutusque/TinyMistral-248M) with two additional special tokens (`<|im_start|>` and `<|im_end|>`)
 - Datasets:
@@ -175,10 +77,8 @@ model-index:
   - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-databricks-dolly-15k)] [databricks/databricks-dolly-15k](https://huggingface.co/datasets/databricks/databricks-dolly-15k)
   - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
   - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
-- License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v2/resolve/main/license.txt)
-- Availability in other ML formats:
-  - GGUF: [Felladrin/gguf-TinyMistral-248M-Chat-v2](https://huggingface.co/Felladrin/gguf-TinyMistral-248M-Chat-v2)
-  - ONNX: [Felladrin/onnx-TinyMistral-248M-Chat-v2](https://huggingface.co/Felladrin/onnx-TinyMistral-248M-Chat-v2)
 ## Recommended Prompt Format
@@ -202,7 +102,7 @@ top_k: 5
 ```python
 from transformers import pipeline
-generate = pipeline("text-generation", "Felladrin/TinyMistral-248M-Chat-v2")
 messages = [
     {
@@ -251,16 +151,61 @@ This model was trained with [SFTTrainer](https://huggingface.co/docs/trl/main/en
 | Scheduler              | cosine                                        |
 | Seed                   | 42                                            |
-## [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
-Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Felladrin__TinyMistral-248M-Chat-v2)
-|             Metric              |Value|
-|---------------------------------|----:|
-|Avg.                             |27.42|
-|AI2 Reasoning Challenge (25-Shot)|23.29|
-|HellaSwag (10-Shot)              |27.39|
-|MMLU (5-Shot)                    |23.52|
-|TruthfulQA (0-shot)              |41.32|
-|Winogrande (5-shot)              |49.01|
-|GSM8k (5-shot)                   | 0.00|

 ---
 language:
+  - en
 license: apache-2.0
 base_model: Locutusque/TinyMistral-248M
 datasets:
+  - HuggingFaceH4/ultrachat_200k
+  - Felladrin/ChatML-ultrachat_200k
+  - Open-Orca/OpenOrca
+  - Felladrin/ChatML-OpenOrca
+  - hkust-nlp/deita-10k-v0
+  - Felladrin/ChatML-deita-10k-v0
+  - LDJnr/Capybara
+  - Felladrin/ChatML-Capybara
+  - databricks/databricks-dolly-15k
+  - Felladrin/ChatML-databricks-dolly-15k
+  - euclaise/reddit-instruct-curated
+  - Felladrin/ChatML-reddit-instruct-curated
+  - CohereForAI/aya_dataset
+  - Felladrin/ChatML-aya_dataset
+  - HuggingFaceH4/ultrafeedback_binarized
 pipeline_tag: text-generation
 widget:
+  - messages:
+      - role: system
+        content:
+          You are a highly knowledgeable and friendly assistant. Your goal is to
+          understand and respond to user inquiries with clarity. Your interactions are
+          always respectful, helpful, and focused on delivering the most accurate information
+          to the user.
+      - role: user
+        content: Hey! Got a question for you!
+      - role: assistant
+        content: Sure! What's it?
+      - role: user
+        content: What are some potential applications for quantum computing?
+  - messages:
+      - role: user
+        content: Heya!
+      - role: assistant
+        content: Hi! How may I help you?
+      - role: user
+        content:
+          I'm interested in developing a career in software engineering. What would
+          you recommend me to do?
+  - messages:
+      - role: user
+        content: Morning!
+      - role: assistant
+        content: Good morning! How can I help you today?
+      - role: user
+        content: Could you give me some tips for becoming a healthier person?
+  - messages:
+      - role: system
+        content:
+          You are a very creative assistant. User will give you a task, which you
+          should complete with all your knowledge.
+      - role: user
+        content:
+          Hello! Can you please elaborate a background story of an RPG game about
+          wizards and dragons in a sci-fi world?
 inference:
   parameters:
     max_new_tokens: 250
     penalty_alpha: 0.5
     top_k: 5
 ---
+# TinyMistral-248M-Chat
 - Base model: [Locutusque/TinyMistral-248M](https://huggingface.co/Locutusque/TinyMistral-248M) with two additional special tokens (`<|im_start|>` and `<|im_end|>`)
 - Datasets:
   - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-databricks-dolly-15k)] [databricks/databricks-dolly-15k](https://huggingface.co/datasets/databricks/databricks-dolly-15k)
   - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-reddit-instruct-curated)] [euclaise/reddit-instruct-curated](https://huggingface.co/datasets/euclaise/reddit-instruct-curated)
   - [[ChatML](https://huggingface.co/datasets/Felladrin/ChatML-aya_dataset)] [CohereForAI/aya_dataset](https://huggingface.co/datasets/CohereForAI/aya_dataset)
+  - [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized)
+- License: [Apache License 2.0](https://huggingface.co/Felladrin/TinyMistral-248M-Chat-v3/resolve/main/license.txt)
 ## Recommended Prompt Format
 ```python
 from transformers import pipeline
+generate = pipeline("text-generation", "Felladrin/TinyMistral-248M-Chat-v3")
 messages = [
     {
 | Scheduler              | cosine                                        |
 | Seed                   | 42                                            |
+Then, the model was fine-tuned with DPO through [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) using the following hyperparameters and command:
+| Parameter                   | Value                                                                                                          |
+| :-------------------------- | :------------------------------------------------------------------------------------------------------------- |
+| Dataset                     | [HuggingFaceH4/ultrafeedback_binarized](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized) |
+| Learning rate               | 1e-06                                                                                                          |
+| Train batch size            | 4                                                                                                              |
+| Eval batch size             | 8                                                                                                              |
+| Seed                        | 42                                                                                                             |
+| Distributed type            | multi-GPU                                                                                                      |
+| Number of devices           | 8                                                                                                              |
+| Gradient accumulation steps | 4                                                                                                              |
+| Total train batch size      | 128                                                                                                            |
+| Total eval batch size       | 64                                                                                                             |
+| Optimizer                   | adamw_8bit with betas=(0.9,0.999) and epsilon=1e-08                                                            |
+| LR scheduler type           | cosine                                                                                                         |
+| LR scheduler warmup ratio   | 0.1                                                                                                            |
+| Number of epochs            | 2.0                                                                                                            |
+```sh
+llamafactory-cli train \
+    --stage dpo \
+    --do_train True \
+    --model_name_or_path ~/TinyMistral-248M-Chat \
+    --preprocessing_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
+    --dataloader_num_workers $(python -c "import os; print(max(1, os.cpu_count() - 2))") \
+    --finetuning_type full \
+    --template default \
+    --flash_attn auto \
+    --enable_liger_kernel True \
+    --dataset_dir data \
+    --dataset ultrafeedback \
+    --cutoff_len 1024 \
+    --learning_rate 1e-6 \
+    --num_train_epochs 2.0 \
+    --per_device_train_batch_size 4 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --max_grad_norm 1.0 \
+    --logging_steps 10 \
+    --save_steps 50 \
+    --save_total_limit 1 \
+    --warmup_ratio 0.1 \
+    --packing False \
+    --report_to none \
+    --output_dir ~/TinyMistral-248M-Chat-v3 \
+    --pure_bf16 True \
+    --plot_loss True \
+    --trust_remote_code True \
+    --ddp_timeout 180000000 \
+    --include_tokens_per_second True \
+    --include_num_input_tokens_seen True \
+    --optim adamw_8bit \
+    --pref_beta 0.5 \
+    --pref_ftx 0 \
+    --pref_loss simpo \
+    --gradient_checkpointing True
+```

config.json CHANGED Viewed

@@ -1,8 +1,10 @@
 {
   "architectures": ["MistralForCausalLM"],
   "attention_dropout": 0.0,
   "bos_token_id": 32000,
   "eos_token_id": 32003,
   "hidden_act": "silu",
   "hidden_size": 1024,
   "initializer_range": 0.02,
@@ -14,10 +16,11 @@
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-6,
   "rope_theta": 10000.0,
-  "sliding_window": 1024,
   "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.38.2",
   "use_cache": true,
   "vocab_size": 32005
 }

 {
+  "_name_or_path": "Felladrin/TinyMistral-248M-Chat-v3",
   "architectures": ["MistralForCausalLM"],
   "attention_dropout": 0.0,
   "bos_token_id": 32000,
   "eos_token_id": 32003,
+  "head_dim": 32,
   "hidden_act": "silu",
   "hidden_size": 1024,
   "initializer_range": 0.02,
   "num_key_value_heads": 8,
   "rms_norm_eps": 1e-6,
   "rope_theta": 10000.0,
+  "sliding_window": null,
   "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0",
   "use_cache": true,
+  "use_sliding_window": false,
   "vocab_size": 32005
 }

generation_config.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "_from_model_config": true,
-  "bos_token_id": 32000,
-  "eos_token_id": 32003,
-  "transformers_version": "4.38.2"
-}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52178bd78ce2e9eaff3fba98236b261d0c97c5423b6eb1dee8d6d3abe1a37850
-size 992108712

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddf0ded71ab5a315f90bc932d018a2b20e81987f49f8c9c6efcaf612b2d5a4d6
+size 496060688

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "add_bos_token": false,
   "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
@@ -72,12 +73,19 @@
   "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "legacy": true,
   "model_max_length": 2048,
   "pad_token": "[PAD]",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

 {
   "add_bos_token": false,
   "add_eos_token": false,
+  "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
   "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
+  "extra_special_tokens": {},
   "legacy": true,
+  "max_length": 2048,
   "model_max_length": 2048,
   "pad_token": "[PAD]",
+  "padding_side": "right",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
+  "split_special_tokens": false,
+  "stride": 0,
   "tokenizer_class": "LlamaTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "<unk>",
   "use_default_system_prompt": false
 }