Training in progress, step 80
Browse files- adapter_config.json +6 -6
- adapter_model.safetensors +2 -2
- added_tokens.json +4 -0
- tokenizer.json +2 -2
- tokenizer_config.json +32 -0
- trainer_log.jsonl +8 -24
- training_args.bin +2 -2
adapter_config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
-
"base_model_name_or_path": "Qwen/
|
5 |
"bias": "none",
|
6 |
"eva_config": null,
|
7 |
"exclude_modules": null,
|
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"up_proj",
|
27 |
-
"gate_proj",
|
28 |
-
"o_proj",
|
29 |
"q_proj",
|
30 |
-
"down_proj",
|
31 |
"v_proj",
|
32 |
-
"
|
|
|
|
|
|
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
1 |
{
|
2 |
"alpha_pattern": {},
|
3 |
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "Qwen/Qwen3-0.6B-Base",
|
5 |
"bias": "none",
|
6 |
"eva_config": null,
|
7 |
"exclude_modules": null,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
|
|
|
|
|
|
26 |
"q_proj",
|
|
|
27 |
"v_proj",
|
28 |
+
"gate_proj",
|
29 |
+
"down_proj",
|
30 |
+
"o_proj",
|
31 |
+
"k_proj",
|
32 |
+
"up_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:704aa895f2f07026040c80aabf199a59a516b3812a5f621b843a6d90d51efef1
|
3 |
+
size 80792456
|
added_tokens.json
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
{
|
|
|
2 |
"</tool_call>": 151658,
|
|
|
|
|
3 |
"<tool_call>": 151657,
|
|
|
4 |
"<|box_end|>": 151649,
|
5 |
"<|box_start|>": 151648,
|
6 |
"<|endoftext|>": 151643,
|
|
|
1 |
{
|
2 |
+
"</think>": 151668,
|
3 |
"</tool_call>": 151658,
|
4 |
+
"</tool_response>": 151666,
|
5 |
+
"<think>": 151667,
|
6 |
"<tool_call>": 151657,
|
7 |
+
"<tool_response>": 151665,
|
8 |
"<|box_end|>": 151649,
|
9 |
"<|box_start|>": 151648,
|
10 |
"<|endoftext|>": 151643,
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
3 |
+
size 11422654
|
tokenizer_config.json
CHANGED
@@ -177,6 +177,38 @@
|
|
177 |
"rstrip": false,
|
178 |
"single_word": false,
|
179 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
}
|
181 |
},
|
182 |
"additional_special_tokens": [
|
|
|
177 |
"rstrip": false,
|
178 |
"single_word": false,
|
179 |
"special": false
|
180 |
+
},
|
181 |
+
"151665": {
|
182 |
+
"content": "<tool_response>",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": false,
|
185 |
+
"rstrip": false,
|
186 |
+
"single_word": false,
|
187 |
+
"special": false
|
188 |
+
},
|
189 |
+
"151666": {
|
190 |
+
"content": "</tool_response>",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": false,
|
193 |
+
"rstrip": false,
|
194 |
+
"single_word": false,
|
195 |
+
"special": false
|
196 |
+
},
|
197 |
+
"151667": {
|
198 |
+
"content": "<think>",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": false,
|
201 |
+
"rstrip": false,
|
202 |
+
"single_word": false,
|
203 |
+
"special": false
|
204 |
+
},
|
205 |
+
"151668": {
|
206 |
+
"content": "</think>",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": false,
|
209 |
+
"rstrip": false,
|
210 |
+
"single_word": false,
|
211 |
+
"special": false
|
212 |
}
|
213 |
},
|
214 |
"additional_special_tokens": [
|
trainer_log.jsonl
CHANGED
@@ -1,24 +1,8 @@
|
|
1 |
-
{"current_steps":
|
2 |
-
{"current_steps":
|
3 |
-
{"current_steps":
|
4 |
-
{"current_steps":
|
5 |
-
{"current_steps":
|
6 |
-
{"current_steps":
|
7 |
-
{"current_steps":
|
8 |
-
{"current_steps":
|
9 |
-
{"current_steps": 3690, "total_steps": 5106, "loss": 0.4581, "lr": 5.3755127271761567e-05, "epoch": 4.336075205640423, "percentage": 72.27, "elapsed_time": "4:15:51", "remaining_time": "1:38:10"}
|
10 |
-
{"current_steps": 3700, "total_steps": 5106, "loss": 0.4834, "lr": 5.3047686719990736e-05, "epoch": 4.3478260869565215, "percentage": 72.46, "elapsed_time": "4:44:15", "remaining_time": "1:48:01"}
|
11 |
-
{"current_steps": 3710, "total_steps": 5106, "loss": 0.4693, "lr": 5.2343930830657565e-05, "epoch": 4.35957696827262, "percentage": 72.66, "elapsed_time": "5:12:39", "remaining_time": "1:57:38"}
|
12 |
-
{"current_steps": 3720, "total_steps": 5106, "loss": 0.4623, "lr": 5.1643886349930477e-05, "epoch": 4.371327849588719, "percentage": 72.86, "elapsed_time": "5:41:02", "remaining_time": "2:07:03"}
|
13 |
-
{"current_steps": 3730, "total_steps": 5106, "loss": 0.4786, "lr": 5.094757988292612e-05, "epoch": 4.383078730904818, "percentage": 73.05, "elapsed_time": "6:09:27", "remaining_time": "2:16:17"}
|
14 |
-
{"current_steps": 3740, "total_steps": 5106, "loss": 0.4736, "lr": 5.0255037892698235e-05, "epoch": 4.394829612220916, "percentage": 73.25, "elapsed_time": "6:37:52", "remaining_time": "2:25:19"}
|
15 |
-
{"current_steps": 3750, "total_steps": 5106, "loss": 0.4723, "lr": 4.956628669923218e-05, "epoch": 4.406580493537016, "percentage": 73.44, "elapsed_time": "7:06:16", "remaining_time": "2:34:08"}
|
16 |
-
{"current_steps": 3760, "total_steps": 5106, "loss": 0.4794, "lr": 4.8881352478444295e-05, "epoch": 4.418331374853114, "percentage": 73.64, "elapsed_time": "7:34:39", "remaining_time": "2:42:45"}
|
17 |
-
{"current_steps": 3770, "total_steps": 5106, "loss": 0.4671, "lr": 4.820026126118745e-05, "epoch": 4.430082256169213, "percentage": 73.83, "elapsed_time": "8:03:05", "remaining_time": "2:51:11"}
|
18 |
-
{"current_steps": 3780, "total_steps": 5106, "loss": 0.4523, "lr": 4.752303893226154e-05, "epoch": 4.441833137485311, "percentage": 74.03, "elapsed_time": "8:31:29", "remaining_time": "2:59:25"}
|
19 |
-
{"current_steps": 3790, "total_steps": 5106, "loss": 0.4687, "lr": 4.6849711229429644e-05, "epoch": 4.45358401880141, "percentage": 74.23, "elapsed_time": "8:59:54", "remaining_time": "3:07:28"}
|
20 |
-
{"current_steps": 3800, "total_steps": 5106, "loss": 0.4528, "lr": 4.61803037424401e-05, "epoch": 4.465334900117509, "percentage": 74.42, "elapsed_time": "9:28:16", "remaining_time": "3:15:18"}
|
21 |
-
{"current_steps": 3810, "total_steps": 5106, "loss": 0.4643, "lr": 4.551484191205389e-05, "epoch": 4.477085781433607, "percentage": 74.62, "elapsed_time": "9:56:41", "remaining_time": "3:22:58"}
|
22 |
-
{"current_steps": 3820, "total_steps": 5106, "loss": 0.4634, "lr": 4.4853351029077576e-05, "epoch": 4.488836662749706, "percentage": 74.81, "elapsed_time": "10:25:05", "remaining_time": "3:30:26"}
|
23 |
-
{"current_steps": 3830, "total_steps": 5106, "loss": 0.471, "lr": 4.4195856233402423e-05, "epoch": 4.500587544065805, "percentage": 75.01, "elapsed_time": "10:53:27", "remaining_time": "3:37:42"}
|
24 |
-
{"current_steps": 3840, "total_steps": 5106, "loss": 0.4707, "lr": 4.3542382513048814e-05, "epoch": 4.512338425381904, "percentage": 75.21, "elapsed_time": "11:21:50", "remaining_time": "3:44:47"}
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 2550, "loss": 3.3727, "lr": 6.75e-05, "epoch": 0.023515579071134628, "percentage": 0.39, "elapsed_time": "0:51:39", "remaining_time": "9 days, 2:43:18"}
|
2 |
+
{"current_steps": 20, "total_steps": 2550, "loss": 2.4747, "lr": 0.0001425, "epoch": 0.047031158142269255, "percentage": 0.78, "elapsed_time": "1:43:33", "remaining_time": "9 days, 2:21:05"}
|
3 |
+
{"current_steps": 30, "total_steps": 2550, "loss": 1.6981, "lr": 0.00021749999999999997, "epoch": 0.07054673721340388, "percentage": 1.18, "elapsed_time": "2:35:29", "remaining_time": "9 days, 1:40:48"}
|
4 |
+
{"current_steps": 40, "total_steps": 2550, "loss": 1.1544, "lr": 0.00029249999999999995, "epoch": 0.09406231628453851, "percentage": 1.57, "elapsed_time": "3:27:30", "remaining_time": "9 days, 1:00:44"}
|
5 |
+
{"current_steps": 50, "total_steps": 2550, "loss": 0.889, "lr": 0.0002999904831331692, "epoch": 0.11757789535567313, "percentage": 1.96, "elapsed_time": "4:19:27", "remaining_time": "9 days, 0:13:15"}
|
6 |
+
{"current_steps": 60, "total_steps": 2550, "loss": 0.765, "lr": 0.00029995758687228834, "epoch": 0.14109347442680775, "percentage": 2.35, "elapsed_time": "5:11:26", "remaining_time": "8 days, 23:25:05"}
|
7 |
+
{"current_steps": 70, "total_steps": 2550, "loss": 0.7138, "lr": 0.000299901198877339, "epoch": 0.1646090534979424, "percentage": 2.75, "elapsed_time": "6:03:24", "remaining_time": "8 days, 22:35:03"}
|
8 |
+
{"current_steps": 80, "total_steps": 2550, "loss": 0.6962, "lr": 0.0002998213279818309, "epoch": 0.18812463256907702, "percentage": 3.14, "elapsed_time": "6:55:23", "remaining_time": "8 days, 21:45:24"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db4ff7073a4da3309636bbb72b82f1473dd3f796afe5e7d3e7687cadcf17ca0f
|
3 |
+
size 5688
|