winnieyangwannan commited on
Commit
b89705b
·
verified ·
1 Parent(s): 085bc15

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -24,12 +24,12 @@
24
  "revision": null,
25
  "target_modules": [
26
  "gate_proj",
27
- "q_proj",
28
  "k_proj",
29
- "up_proj",
30
  "o_proj",
31
- "down_proj",
32
- "v_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
24
  "revision": null,
25
  "target_modules": [
26
  "gate_proj",
27
+ "v_proj",
28
  "k_proj",
29
+ "q_proj",
30
  "o_proj",
31
+ "up_proj",
32
+ "down_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7a8bc21fa2e8a47b90a2816d7be2e2bf2dbb623a1119ece2f43a082c62d469a
3
  size 216151256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c81f7e0e47d5acc29d6efedf7aabffa931304887d58bb084174adf8e936b6b43
3
  size 216151256
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10ea843b4f8a0fac716dca25b8e9c16d7bb8ff022d4bb6695c5aacaa7518c72e
3
  size 432640054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c818751d33036cf8f4d360814fe89ffbc54a01f0c8f4f12f93ad68ae9353099
3
  size 432640054
checkpoint-100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc0f4b4def19d80238543e9204366e45f9cfc570f06744d235af389f5feb7e13
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40a12c8bf4a99e5b8cf8700f3f0607f02765e51239fdbfb9883f71b9b2047d61
3
  size 14244
checkpoint-100/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 0.12658227848101267,
5
- "eval_steps": 50,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -10,88 +10,152 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.012658227848101266,
13
- "grad_norm": 16.027029037475586,
14
  "learning_rate": 4.9789029535864986e-05,
15
- "loss": 2.6925,
 
 
 
 
 
 
 
 
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.02531645569620253,
20
- "grad_norm": 1.0948777198791504,
21
  "learning_rate": 4.957805907172996e-05,
22
- "loss": 1.386,
 
 
 
 
 
 
 
 
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.0379746835443038,
27
- "grad_norm": 1.1341983079910278,
28
  "learning_rate": 4.936708860759494e-05,
29
- "loss": 1.108,
 
 
 
 
 
 
 
 
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.05063291139240506,
34
- "grad_norm": 1.152009129524231,
35
  "learning_rate": 4.9156118143459915e-05,
36
- "loss": 0.9436,
 
 
 
 
 
 
 
 
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.06329113924050633,
41
- "grad_norm": 1.0990614891052246,
42
  "learning_rate": 4.89451476793249e-05,
43
- "loss": 0.7499,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.06329113924050633,
48
- "eval_loss": 0.7157873511314392,
49
- "eval_runtime": 12.1594,
50
- "eval_samples_per_second": 39.476,
51
- "eval_steps_per_second": 2.467,
52
  "step": 50
53
  },
54
  {
55
  "epoch": 0.0759493670886076,
56
- "grad_norm": 1.0284477472305298,
57
  "learning_rate": 4.8734177215189874e-05,
58
- "loss": 0.6095,
 
 
 
 
 
 
 
 
59
  "step": 60
60
  },
61
  {
62
  "epoch": 0.08860759493670886,
63
- "grad_norm": 0.8609589338302612,
64
  "learning_rate": 4.852320675105486e-05,
65
- "loss": 0.5355,
 
 
 
 
 
 
 
 
66
  "step": 70
67
  },
68
  {
69
  "epoch": 0.10126582278481013,
70
- "grad_norm": 0.9162376523017883,
71
  "learning_rate": 4.8312236286919834e-05,
72
- "loss": 0.5705,
 
 
 
 
 
 
 
 
73
  "step": 80
74
  },
75
  {
76
  "epoch": 0.11392405063291139,
77
- "grad_norm": 0.9415847659111023,
78
  "learning_rate": 4.810126582278481e-05,
79
- "loss": 0.5449,
 
 
 
 
 
 
 
 
80
  "step": 90
81
  },
82
  {
83
  "epoch": 0.12658227848101267,
84
- "grad_norm": 0.8756884336471558,
85
  "learning_rate": 4.789029535864979e-05,
86
- "loss": 0.5157,
87
  "step": 100
88
  },
89
  {
90
  "epoch": 0.12658227848101267,
91
- "eval_loss": 0.563517153263092,
92
- "eval_runtime": 12.1236,
93
- "eval_samples_per_second": 39.592,
94
- "eval_steps_per_second": 2.475,
95
  "step": 100
96
  }
97
  ],
@@ -99,7 +163,7 @@
99
  "max_steps": 2370,
100
  "num_input_tokens_seen": 0,
101
  "num_train_epochs": 3,
102
- "save_steps": 100,
103
  "stateful_callbacks": {
104
  "TrainerControl": {
105
  "args": {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 0.12658227848101267,
5
+ "eval_steps": 10,
6
  "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.012658227848101266,
13
+ "grad_norm": 12.934767723083496,
14
  "learning_rate": 4.9789029535864986e-05,
15
+ "loss": 2.6869,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.012658227848101266,
20
+ "eval_loss": 1.651185393333435,
21
+ "eval_runtime": 11.831,
22
+ "eval_samples_per_second": 40.571,
23
+ "eval_steps_per_second": 2.536,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.02531645569620253,
28
+ "grad_norm": 1.104798674583435,
29
  "learning_rate": 4.957805907172996e-05,
30
+ "loss": 1.3694,
31
+ "step": 20
32
+ },
33
+ {
34
+ "epoch": 0.02531645569620253,
35
+ "eval_loss": 1.2200205326080322,
36
+ "eval_runtime": 11.8928,
37
+ "eval_samples_per_second": 40.361,
38
+ "eval_steps_per_second": 2.523,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.0379746835443038,
43
+ "grad_norm": 1.1069142818450928,
44
  "learning_rate": 4.936708860759494e-05,
45
+ "loss": 1.1029,
46
+ "step": 30
47
+ },
48
+ {
49
+ "epoch": 0.0379746835443038,
50
+ "eval_loss": 1.0691736936569214,
51
+ "eval_runtime": 11.9127,
52
+ "eval_samples_per_second": 40.293,
53
+ "eval_steps_per_second": 2.518,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 0.05063291139240506,
58
+ "grad_norm": 1.1594161987304688,
59
  "learning_rate": 4.9156118143459915e-05,
60
+ "loss": 0.9395,
61
+ "step": 40
62
+ },
63
+ {
64
+ "epoch": 0.05063291139240506,
65
+ "eval_loss": 0.9162012934684753,
66
+ "eval_runtime": 11.9373,
67
+ "eval_samples_per_second": 40.21,
68
+ "eval_steps_per_second": 2.513,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 0.06329113924050633,
73
+ "grad_norm": 1.1133538484573364,
74
  "learning_rate": 4.89451476793249e-05,
75
+ "loss": 0.7489,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 0.06329113924050633,
80
+ "eval_loss": 0.713701605796814,
81
+ "eval_runtime": 11.9661,
82
+ "eval_samples_per_second": 40.113,
83
+ "eval_steps_per_second": 2.507,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 0.0759493670886076,
88
+ "grad_norm": 1.0406183004379272,
89
  "learning_rate": 4.8734177215189874e-05,
90
+ "loss": 0.6096,
91
+ "step": 60
92
+ },
93
+ {
94
+ "epoch": 0.0759493670886076,
95
+ "eval_loss": 0.6309535503387451,
96
+ "eval_runtime": 11.9895,
97
+ "eval_samples_per_second": 40.035,
98
+ "eval_steps_per_second": 2.502,
99
  "step": 60
100
  },
101
  {
102
  "epoch": 0.08860759493670886,
103
+ "grad_norm": 0.8599340915679932,
104
  "learning_rate": 4.852320675105486e-05,
105
+ "loss": 0.5357,
106
+ "step": 70
107
+ },
108
+ {
109
+ "epoch": 0.08860759493670886,
110
+ "eval_loss": 0.6159886717796326,
111
+ "eval_runtime": 12.0107,
112
+ "eval_samples_per_second": 39.965,
113
+ "eval_steps_per_second": 2.498,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.10126582278481013,
118
+ "grad_norm": 0.9128267168998718,
119
  "learning_rate": 4.8312236286919834e-05,
120
+ "loss": 0.5703,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.10126582278481013,
125
+ "eval_loss": 0.5933937430381775,
126
+ "eval_runtime": 11.9716,
127
+ "eval_samples_per_second": 40.095,
128
+ "eval_steps_per_second": 2.506,
129
  "step": 80
130
  },
131
  {
132
  "epoch": 0.11392405063291139,
133
+ "grad_norm": 0.9396541118621826,
134
  "learning_rate": 4.810126582278481e-05,
135
+ "loss": 0.5445,
136
+ "step": 90
137
+ },
138
+ {
139
+ "epoch": 0.11392405063291139,
140
+ "eval_loss": 0.5727818608283997,
141
+ "eval_runtime": 11.9685,
142
+ "eval_samples_per_second": 40.105,
143
+ "eval_steps_per_second": 2.507,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 0.12658227848101267,
148
+ "grad_norm": 0.8805290460586548,
149
  "learning_rate": 4.789029535864979e-05,
150
+ "loss": 0.5151,
151
  "step": 100
152
  },
153
  {
154
  "epoch": 0.12658227848101267,
155
+ "eval_loss": 0.5640087127685547,
156
+ "eval_runtime": 11.9824,
157
+ "eval_samples_per_second": 40.059,
158
+ "eval_steps_per_second": 2.504,
159
  "step": 100
160
  }
161
  ],
 
163
  "max_steps": 2370,
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 3,
166
+ "save_steps": 10,
167
  "stateful_callbacks": {
168
  "TrainerControl": {
169
  "args": {
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8ef5fa4aad3a350c14df025074931ad8a003d4b851f4886f3b2f66ae6653e4b
3
  size 5880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e7aae8b855413d55586dd498c7d7d805796f0c02067ce9d8ccb1ef37f72d29
3
  size 5880