moyixiao commited on
Commit
d1b3a71
·
verified ·
1 Parent(s): 886d8a6

Training in progress, step 80

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "Qwen/Qwen2.5-1.5B",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "up_proj",
27
- "gate_proj",
28
- "o_proj",
29
  "q_proj",
30
- "down_proj",
31
  "v_proj",
32
- "k_proj"
 
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen3-0.6B-Base",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
26
  "q_proj",
 
27
  "v_proj",
28
+ "gate_proj",
29
+ "down_proj",
30
+ "o_proj",
31
+ "k_proj",
32
+ "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c99203693cf3ddbddb10d5fe81a43097fd257d9d62cc27d120ef8a8b3c8e81
3
- size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704aa895f2f07026040c80aabf199a59a516b3812a5f621b843a6d90d51efef1
3
+ size 80792456
added_tokens.json CHANGED
@@ -1,6 +1,10 @@
1
  {
 
2
  "</tool_call>": 151658,
 
 
3
  "<tool_call>": 151657,
 
4
  "<|box_end|>": 151649,
5
  "<|box_start|>": 151648,
6
  "<|endoftext|>": 151643,
 
1
  {
2
+ "</think>": 151668,
3
  "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
  "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
  "<|box_end|>": 151649,
9
  "<|box_start|>": 151648,
10
  "<|endoftext|>": 151643,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
- size 11421896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json CHANGED
@@ -177,6 +177,38 @@
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
  },
182
  "additional_special_tokens": [
 
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
  }
213
  },
214
  "additional_special_tokens": [
trainer_log.jsonl CHANGED
@@ -1,24 +1,8 @@
1
- {"current_steps": 3610, "total_steps": 5106, "loss": 0.4665, "lr": 5.954404425637193e-05, "epoch": 4.2420681551116335, "percentage": 70.7, "elapsed_time": "0:28:37", "remaining_time": "0:11:51"}
2
- {"current_steps": 3620, "total_steps": 5106, "loss": 0.4649, "lr": 5.880810585548123e-05, "epoch": 4.253819036427732, "percentage": 70.9, "elapsed_time": "0:57:02", "remaining_time": "0:23:24"}
3
- {"current_steps": 3630, "total_steps": 5106, "loss": 0.4691, "lr": 5.80756331929067e-05, "epoch": 4.26556991774383, "percentage": 71.09, "elapsed_time": "1:25:27", "remaining_time": "0:34:44"}
4
- {"current_steps": 3640, "total_steps": 5106, "loss": 0.4714, "lr": 5.734665410619469e-05, "epoch": 4.27732079905993, "percentage": 71.29, "elapsed_time": "1:53:51", "remaining_time": "0:45:51"}
5
- {"current_steps": 3650, "total_steps": 5106, "loss": 0.4705, "lr": 5.6621196300118405e-05, "epoch": 4.289071680376028, "percentage": 71.48, "elapsed_time": "2:22:16", "remaining_time": "0:56:45"}
6
- {"current_steps": 3660, "total_steps": 5106, "loss": 0.4649, "lr": 5.589928734562529e-05, "epoch": 4.300822561692127, "percentage": 71.68, "elapsed_time": "2:50:41", "remaining_time": "1:07:26"}
7
- {"current_steps": 3670, "total_steps": 5106, "loss": 0.4674, "lr": 5.5180954678788934e-05, "epoch": 4.312573443008225, "percentage": 71.88, "elapsed_time": "3:19:04", "remaining_time": "1:17:53"}
8
- {"current_steps": 3680, "total_steps": 5106, "loss": 0.4682, "lr": 5.446622559976665e-05, "epoch": 4.324324324324325, "percentage": 72.07, "elapsed_time": "3:47:26", "remaining_time": "1:28:07"}
9
- {"current_steps": 3690, "total_steps": 5106, "loss": 0.4581, "lr": 5.3755127271761567e-05, "epoch": 4.336075205640423, "percentage": 72.27, "elapsed_time": "4:15:51", "remaining_time": "1:38:10"}
10
- {"current_steps": 3700, "total_steps": 5106, "loss": 0.4834, "lr": 5.3047686719990736e-05, "epoch": 4.3478260869565215, "percentage": 72.46, "elapsed_time": "4:44:15", "remaining_time": "1:48:01"}
11
- {"current_steps": 3710, "total_steps": 5106, "loss": 0.4693, "lr": 5.2343930830657565e-05, "epoch": 4.35957696827262, "percentage": 72.66, "elapsed_time": "5:12:39", "remaining_time": "1:57:38"}
12
- {"current_steps": 3720, "total_steps": 5106, "loss": 0.4623, "lr": 5.1643886349930477e-05, "epoch": 4.371327849588719, "percentage": 72.86, "elapsed_time": "5:41:02", "remaining_time": "2:07:03"}
13
- {"current_steps": 3730, "total_steps": 5106, "loss": 0.4786, "lr": 5.094757988292612e-05, "epoch": 4.383078730904818, "percentage": 73.05, "elapsed_time": "6:09:27", "remaining_time": "2:16:17"}
14
- {"current_steps": 3740, "total_steps": 5106, "loss": 0.4736, "lr": 5.0255037892698235e-05, "epoch": 4.394829612220916, "percentage": 73.25, "elapsed_time": "6:37:52", "remaining_time": "2:25:19"}
15
- {"current_steps": 3750, "total_steps": 5106, "loss": 0.4723, "lr": 4.956628669923218e-05, "epoch": 4.406580493537016, "percentage": 73.44, "elapsed_time": "7:06:16", "remaining_time": "2:34:08"}
16
- {"current_steps": 3760, "total_steps": 5106, "loss": 0.4794, "lr": 4.8881352478444295e-05, "epoch": 4.418331374853114, "percentage": 73.64, "elapsed_time": "7:34:39", "remaining_time": "2:42:45"}
17
- {"current_steps": 3770, "total_steps": 5106, "loss": 0.4671, "lr": 4.820026126118745e-05, "epoch": 4.430082256169213, "percentage": 73.83, "elapsed_time": "8:03:05", "remaining_time": "2:51:11"}
18
- {"current_steps": 3780, "total_steps": 5106, "loss": 0.4523, "lr": 4.752303893226154e-05, "epoch": 4.441833137485311, "percentage": 74.03, "elapsed_time": "8:31:29", "remaining_time": "2:59:25"}
19
- {"current_steps": 3790, "total_steps": 5106, "loss": 0.4687, "lr": 4.6849711229429644e-05, "epoch": 4.45358401880141, "percentage": 74.23, "elapsed_time": "8:59:54", "remaining_time": "3:07:28"}
20
- {"current_steps": 3800, "total_steps": 5106, "loss": 0.4528, "lr": 4.61803037424401e-05, "epoch": 4.465334900117509, "percentage": 74.42, "elapsed_time": "9:28:16", "remaining_time": "3:15:18"}
21
- {"current_steps": 3810, "total_steps": 5106, "loss": 0.4643, "lr": 4.551484191205389e-05, "epoch": 4.477085781433607, "percentage": 74.62, "elapsed_time": "9:56:41", "remaining_time": "3:22:58"}
22
- {"current_steps": 3820, "total_steps": 5106, "loss": 0.4634, "lr": 4.4853351029077576e-05, "epoch": 4.488836662749706, "percentage": 74.81, "elapsed_time": "10:25:05", "remaining_time": "3:30:26"}
23
- {"current_steps": 3830, "total_steps": 5106, "loss": 0.471, "lr": 4.4195856233402423e-05, "epoch": 4.500587544065805, "percentage": 75.01, "elapsed_time": "10:53:27", "remaining_time": "3:37:42"}
24
- {"current_steps": 3840, "total_steps": 5106, "loss": 0.4707, "lr": 4.3542382513048814e-05, "epoch": 4.512338425381904, "percentage": 75.21, "elapsed_time": "11:21:50", "remaining_time": "3:44:47"}
 
1
+ {"current_steps": 10, "total_steps": 2550, "loss": 3.3727, "lr": 6.75e-05, "epoch": 0.023515579071134628, "percentage": 0.39, "elapsed_time": "0:51:39", "remaining_time": "9 days, 2:43:18"}
2
+ {"current_steps": 20, "total_steps": 2550, "loss": 2.4747, "lr": 0.0001425, "epoch": 0.047031158142269255, "percentage": 0.78, "elapsed_time": "1:43:33", "remaining_time": "9 days, 2:21:05"}
3
+ {"current_steps": 30, "total_steps": 2550, "loss": 1.6981, "lr": 0.00021749999999999997, "epoch": 0.07054673721340388, "percentage": 1.18, "elapsed_time": "2:35:29", "remaining_time": "9 days, 1:40:48"}
4
+ {"current_steps": 40, "total_steps": 2550, "loss": 1.1544, "lr": 0.00029249999999999995, "epoch": 0.09406231628453851, "percentage": 1.57, "elapsed_time": "3:27:30", "remaining_time": "9 days, 1:00:44"}
5
+ {"current_steps": 50, "total_steps": 2550, "loss": 0.889, "lr": 0.0002999904831331692, "epoch": 0.11757789535567313, "percentage": 1.96, "elapsed_time": "4:19:27", "remaining_time": "9 days, 0:13:15"}
6
+ {"current_steps": 60, "total_steps": 2550, "loss": 0.765, "lr": 0.00029995758687228834, "epoch": 0.14109347442680775, "percentage": 2.35, "elapsed_time": "5:11:26", "remaining_time": "8 days, 23:25:05"}
7
+ {"current_steps": 70, "total_steps": 2550, "loss": 0.7138, "lr": 0.000299901198877339, "epoch": 0.1646090534979424, "percentage": 2.75, "elapsed_time": "6:03:24", "remaining_time": "8 days, 22:35:03"}
8
+ {"current_steps": 80, "total_steps": 2550, "loss": 0.6962, "lr": 0.0002998213279818309, "epoch": 0.18812463256907702, "percentage": 3.14, "elapsed_time": "6:55:23", "remaining_time": "8 days, 21:45:24"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d989995d6d190678db5cabbe56f81f8fe71417ab2b772b948d310b176b0211f6
3
- size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4ff7073a4da3309636bbb72b82f1473dd3f796afe5e7d3e7687cadcf17ca0f
3
+ size 5688