qgallouedec HF Staff commited on
Commit
1a05e49
·
verified ·
1 Parent(s): 9ff74e9

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
- base_model: Qwen/Qwen2.5-Math-7B
3
- datasets: DigitalLearningGmbH/MATH-lighteval
4
  library_name: transformers
5
  model_name: Qwen-2.5-7B-Simple-RL
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - grpo
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for Qwen-2.5-7B-Simple-RL
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B) on the [DigitalLearningGmbH/MATH-lighteval](https://huggingface.co/datasets/DigitalLearningGmbH/MATH-lighteval) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/pq5sovxv)
33
 
34
 
35
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
@@ -37,7 +35,7 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
37
  ### Framework versions
38
 
39
  - TRL: 0.17.0.dev0
40
- - Transformers: 4.49.0
41
  - Pytorch: 2.6.0
42
  - Datasets: 3.4.1
43
  - Tokenizers: 0.21.1
 
1
  ---
2
+ base_model: Qwen/Qwen2.5-Math-7B-Instruct
 
3
  library_name: transformers
4
  model_name: Qwen-2.5-7B-Simple-RL
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - grpo
9
  licence: license
 
11
 
12
  # Model Card for Qwen-2.5-7B-Simple-RL
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Math-7B-Instruct).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/e8ipvp9s)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
35
  ### Framework versions
36
 
37
  - TRL: 0.17.0.dev0
38
+ - Transformers: 4.51.0
39
  - Pytorch: 2.6.0
40
  - Datasets: 3.4.1
41
  - Tokenizers: 0.21.1
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -4.392043727967474,
4
- "train_runtime": 13254.7719,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.566,
7
- "train_steps_per_second": 0.035
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.12281362387614372,
4
+ "train_runtime": 10554.7907,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.711,
7
+ "train_steps_per_second": 0.011
8
  }
config.json CHANGED
@@ -1,11 +1,10 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-Math-7B",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
- "eos_token_id": 151643,
9
  "hidden_act": "silu",
10
  "hidden_size": 3584,
11
  "initializer_range": 0.02,
@@ -18,13 +17,12 @@
18
  "num_key_value_heads": 4,
19
  "rms_norm_eps": 1e-06,
20
  "rope_scaling": null,
21
- "rope_theta": 10000,
22
  "sliding_window": 4096,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.49.0",
26
- "use_cache": true,
27
- "use_mrope": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 152064
30
  }
 
1
  {
 
2
  "architectures": [
3
  "Qwen2ForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
  "hidden_act": "silu",
9
  "hidden_size": 3584,
10
  "initializer_range": 0.02,
 
17
  "num_key_value_heads": 4,
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
+ "rope_theta": 10000.0,
21
  "sliding_window": 4096,
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.0",
25
+ "use_cache": false,
 
26
  "use_sliding_window": false,
27
  "vocab_size": 152064
28
  }
generation_config.json CHANGED
@@ -1,6 +1,9 @@
1
  {
2
  "bos_token_id": 151643,
3
- "eos_token_id": 151643,
4
- "max_new_tokens": 2048,
5
- "transformers_version": "4.49.0"
 
 
 
6
  }
 
1
  {
2
  "bos_token_id": 151643,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151643
6
+ ],
7
+ "pad_token_id": 151643,
8
+ "transformers_version": "4.51.0"
9
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a1eb6e5679e4d58dd28e87c12fa85b41e6f8649886af1b00e5e560fe9d132ea
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229710b12313b19d22035cd22c92aa7ad6a4ed36452951819b53266fa32c70a4
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95cd0d8bf08fa8ef084bd405054d1ec4dfa7a3e1b856c6f80bcbdb25a01118a8
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9522a64b8db4d386c053355132b2cd00602614f0e8979e394d34f7ce40c331b7
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00f846aafa78bf493ca9d4cb53df3c719240a90c7046ca60255a7bf169ba1165
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e55a974b789e82ee4755da9b66d4eee5e1e0ec997ebd41e7886505b948d14c2
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:621840d94a3e10c3fd2b1d1227690bb2b13530236ad68b97eabacb16ccb7cd2c
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc9e59c746ada8810f79f0985b9941a57f71cc14dc500b93fdbde4c64a8d935d
3
  size 1089994880
special_tokens_map.json CHANGED
@@ -15,7 +15,7 @@
15
  "<|video_pad|>"
16
  ],
17
  "eos_token": {
18
- "content": "<|endoftext|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
 
15
  "<|video_pad|>"
16
  ],
17
  "eos_token": {
18
+ "content": "<|im_end|>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -197,7 +197,7 @@
197
  "bos_token": null,
198
  "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
- "eos_token": "<|endoftext|>",
201
  "errors": "replace",
202
  "extra_special_tokens": {},
203
  "model_max_length": 131072,
 
197
  "bos_token": null,
198
  "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
  "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
  "extra_special_tokens": {},
203
  "model_max_length": 131072,
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -4.392043727967474,
4
- "train_runtime": 13254.7719,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.566,
7
- "train_steps_per_second": 0.035
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.12281362387614372,
4
+ "train_runtime": 10554.7907,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.711,
7
+ "train_steps_per_second": 0.011
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd5677e339fd6306f1bd6b6bc1ee482248770155ab82c82caf21fa6542781d24
3
  size 8568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6ac3cfc11a0c97d69bb715ed0de66399a36fc026b721684ac5f8c83cc60b9b
3
  size 8568