DeepDream2045 commited on
Commit
c486624
·
verified ·
1 Parent(s): dae705e

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "up_proj",
25
  "q_proj",
 
26
  "gate_proj",
27
  "down_proj",
28
  "v_proj",
29
- "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "up_proj",
24
  "q_proj",
25
+ "k_proj",
26
  "gate_proj",
27
  "down_proj",
28
  "v_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aa329c5af8be49237ec6cbf7cd7b41a9d168f1bd675bb8dd6c40d1f9e518fa0
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20dba402be068580d83c349df7bfa19ffa24da5405bb81aac9b521966ba1281b
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5e523954bfced769866218d35dee78603de618ab1436e409282cb882182c7c4
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d0373a5bb2e653c160150cd9460848f2f9dd55c2249f0621f848263f819ddf
3
  size 671466706
last-checkpoint/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.0359615087509155,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
  "epoch": 0.6182380216383307,
5
  "eval_steps": 25,
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.012364760432766615,
13
- "grad_norm": 65.4753189086914,
14
  "learning_rate": 5e-05,
15
  "loss": 1.7005,
16
  "step": 1
@@ -18,368 +18,368 @@
18
  {
19
  "epoch": 0.012364760432766615,
20
  "eval_loss": 1.5441035032272339,
21
- "eval_runtime": 26.9469,
22
- "eval_samples_per_second": 20.225,
23
- "eval_steps_per_second": 2.561,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.02472952086553323,
28
- "grad_norm": 63.677093505859375,
29
  "learning_rate": 0.0001,
30
  "loss": 1.5408,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.03709428129829984,
35
- "grad_norm": 38.44874954223633,
36
  "learning_rate": 9.989294616193017e-05,
37
- "loss": 1.4339,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.04945904173106646,
42
- "grad_norm": 36.82088088989258,
43
  "learning_rate": 9.957224306869053e-05,
44
- "loss": 1.2845,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.061823802163833076,
49
- "grad_norm": 18.930330276489258,
50
  "learning_rate": 9.903926402016153e-05,
51
- "loss": 1.2066,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.07418856259659969,
56
- "grad_norm": 19.951047897338867,
57
  "learning_rate": 9.829629131445342e-05,
58
- "loss": 1.1864,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 0.0865533230293663,
63
- "grad_norm": 15.55380630493164,
64
  "learning_rate": 9.73465064747553e-05,
65
- "loss": 1.115,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 0.09891808346213292,
70
- "grad_norm": 51.38351821899414,
71
  "learning_rate": 9.619397662556435e-05,
72
- "loss": 1.1459,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 0.11128284389489954,
77
- "grad_norm": 20.464824676513672,
78
  "learning_rate": 9.484363707663442e-05,
79
- "loss": 1.1179,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 0.12364760432766615,
84
- "grad_norm": 14.560519218444824,
85
  "learning_rate": 9.330127018922194e-05,
86
- "loss": 1.0324,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 0.13601236476043277,
91
- "grad_norm": 16.44891357421875,
92
  "learning_rate": 9.157348061512727e-05,
93
- "loss": 1.0985,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 0.14837712519319937,
98
- "grad_norm": 13.887516021728516,
99
  "learning_rate": 8.966766701456177e-05,
100
- "loss": 1.0447,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 0.160741885625966,
105
- "grad_norm": 17.173248291015625,
106
  "learning_rate": 8.759199037394887e-05,
107
- "loss": 1.1134,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 0.1731066460587326,
112
- "grad_norm": 18.054431915283203,
113
  "learning_rate": 8.535533905932738e-05,
114
- "loss": 1.1994,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 0.18547140649149924,
119
- "grad_norm": 13.658798217773438,
120
  "learning_rate": 8.296729075500344e-05,
121
- "loss": 1.138,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 0.19783616692426584,
126
- "grad_norm": 15.112719535827637,
127
  "learning_rate": 8.043807145043604e-05,
128
- "loss": 1.1268,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 0.21020092735703247,
133
- "grad_norm": 14.573080062866211,
134
  "learning_rate": 7.777851165098012e-05,
135
- "loss": 1.0827,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 0.22256568778979907,
140
- "grad_norm": 12.513110160827637,
141
  "learning_rate": 7.500000000000001e-05,
142
- "loss": 1.0823,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 0.23493044822256567,
147
- "grad_norm": 14.179524421691895,
148
  "learning_rate": 7.211443451095007e-05,
149
- "loss": 1.0901,
150
  "step": 19
151
  },
152
  {
153
  "epoch": 0.2472952086553323,
154
- "grad_norm": 12.364250183105469,
155
  "learning_rate": 6.91341716182545e-05,
156
- "loss": 1.0374,
157
  "step": 20
158
  },
159
  {
160
  "epoch": 0.2596599690880989,
161
- "grad_norm": 12.568696022033691,
162
  "learning_rate": 6.607197326515808e-05,
163
- "loss": 1.0597,
164
  "step": 21
165
  },
166
  {
167
  "epoch": 0.27202472952086554,
168
- "grad_norm": 12.904902458190918,
169
  "learning_rate": 6.294095225512603e-05,
170
- "loss": 0.9841,
171
  "step": 22
172
  },
173
  {
174
  "epoch": 0.28438948995363217,
175
- "grad_norm": 13.623453140258789,
176
  "learning_rate": 5.9754516100806423e-05,
177
- "loss": 1.0478,
178
  "step": 23
179
  },
180
  {
181
  "epoch": 0.29675425038639874,
182
- "grad_norm": 15.266836166381836,
183
  "learning_rate": 5.6526309611002594e-05,
184
- "loss": 0.9646,
185
  "step": 24
186
  },
187
  {
188
  "epoch": 0.3091190108191654,
189
- "grad_norm": 13.770989418029785,
190
  "learning_rate": 5.327015646150716e-05,
191
- "loss": 0.9647,
192
  "step": 25
193
  },
194
  {
195
  "epoch": 0.3091190108191654,
196
- "eval_loss": 1.0564574003219604,
197
- "eval_runtime": 26.9441,
198
- "eval_samples_per_second": 20.227,
199
- "eval_steps_per_second": 2.561,
200
  "step": 25
201
  },
202
  {
203
  "epoch": 0.321483771251932,
204
- "grad_norm": 15.856243133544922,
205
  "learning_rate": 5e-05,
206
- "loss": 1.2542,
207
  "step": 26
208
  },
209
  {
210
  "epoch": 0.33384853168469864,
211
- "grad_norm": 16.397891998291016,
212
  "learning_rate": 4.6729843538492847e-05,
213
- "loss": 1.1305,
214
  "step": 27
215
  },
216
  {
217
  "epoch": 0.3462132921174652,
218
- "grad_norm": 13.795293807983398,
219
  "learning_rate": 4.347369038899744e-05,
220
- "loss": 1.0607,
221
  "step": 28
222
  },
223
  {
224
  "epoch": 0.35857805255023184,
225
- "grad_norm": 12.077628135681152,
226
  "learning_rate": 4.0245483899193595e-05,
227
- "loss": 1.1596,
228
  "step": 29
229
  },
230
  {
231
  "epoch": 0.37094281298299847,
232
- "grad_norm": 11.004748344421387,
233
  "learning_rate": 3.705904774487396e-05,
234
- "loss": 1.0742,
235
  "step": 30
236
  },
237
  {
238
  "epoch": 0.38330757341576505,
239
- "grad_norm": 11.478070259094238,
240
  "learning_rate": 3.392802673484193e-05,
241
- "loss": 1.0366,
242
  "step": 31
243
  },
244
  {
245
  "epoch": 0.3956723338485317,
246
- "grad_norm": 13.015902519226074,
247
  "learning_rate": 3.086582838174551e-05,
248
- "loss": 1.0506,
249
  "step": 32
250
  },
251
  {
252
  "epoch": 0.4080370942812983,
253
- "grad_norm": 11.043506622314453,
254
  "learning_rate": 2.7885565489049946e-05,
255
- "loss": 0.9651,
256
  "step": 33
257
  },
258
  {
259
  "epoch": 0.42040185471406494,
260
- "grad_norm": 11.857297897338867,
261
  "learning_rate": 2.500000000000001e-05,
262
- "loss": 1.0363,
263
  "step": 34
264
  },
265
  {
266
  "epoch": 0.4327666151468315,
267
- "grad_norm": 10.669569969177246,
268
  "learning_rate": 2.2221488349019903e-05,
269
- "loss": 0.9911,
270
  "step": 35
271
  },
272
  {
273
  "epoch": 0.44513137557959814,
274
- "grad_norm": 12.18918514251709,
275
  "learning_rate": 1.9561928549563968e-05,
276
- "loss": 1.0056,
277
  "step": 36
278
  },
279
  {
280
  "epoch": 0.4574961360123648,
281
- "grad_norm": 11.107763290405273,
282
  "learning_rate": 1.703270924499656e-05,
283
- "loss": 0.9801,
284
  "step": 37
285
  },
286
  {
287
  "epoch": 0.46986089644513135,
288
- "grad_norm": 12.230752944946289,
289
  "learning_rate": 1.4644660940672627e-05,
290
- "loss": 1.1017,
291
  "step": 38
292
  },
293
  {
294
  "epoch": 0.482225656877898,
295
- "grad_norm": 11.509124755859375,
296
  "learning_rate": 1.2408009626051137e-05,
297
- "loss": 1.1986,
298
  "step": 39
299
  },
300
  {
301
  "epoch": 0.4945904173106646,
302
- "grad_norm": 11.408638000488281,
303
  "learning_rate": 1.0332332985438248e-05,
304
- "loss": 1.0935,
305
  "step": 40
306
  },
307
  {
308
  "epoch": 0.5069551777434312,
309
- "grad_norm": 11.174295425415039,
310
  "learning_rate": 8.426519384872733e-06,
311
- "loss": 1.1101,
312
  "step": 41
313
  },
314
  {
315
  "epoch": 0.5193199381761978,
316
- "grad_norm": 12.814458847045898,
317
  "learning_rate": 6.698729810778065e-06,
318
- "loss": 1.0953,
319
  "step": 42
320
  },
321
  {
322
  "epoch": 0.5316846986089645,
323
- "grad_norm": 11.73488712310791,
324
  "learning_rate": 5.156362923365588e-06,
325
- "loss": 1.1307,
326
  "step": 43
327
  },
328
  {
329
  "epoch": 0.5440494590417311,
330
- "grad_norm": 10.956543922424316,
331
  "learning_rate": 3.8060233744356633e-06,
332
- "loss": 1.0286,
333
  "step": 44
334
  },
335
  {
336
  "epoch": 0.5564142194744977,
337
- "grad_norm": 10.427705764770508,
338
  "learning_rate": 2.653493525244721e-06,
339
- "loss": 0.991,
340
  "step": 45
341
  },
342
  {
343
  "epoch": 0.5687789799072643,
344
- "grad_norm": 11.056525230407715,
345
  "learning_rate": 1.70370868554659e-06,
346
- "loss": 1.0113,
347
  "step": 46
348
  },
349
  {
350
  "epoch": 0.5811437403400309,
351
- "grad_norm": 10.139036178588867,
352
  "learning_rate": 9.607359798384785e-07,
353
- "loss": 0.9341,
354
  "step": 47
355
  },
356
  {
357
  "epoch": 0.5935085007727975,
358
- "grad_norm": 10.551136016845703,
359
  "learning_rate": 4.277569313094809e-07,
360
- "loss": 1.0015,
361
  "step": 48
362
  },
363
  {
364
  "epoch": 0.6058732612055642,
365
- "grad_norm": 10.819656372070312,
366
  "learning_rate": 1.0705383806982606e-07,
367
- "loss": 0.9729,
368
  "step": 49
369
  },
370
  {
371
  "epoch": 0.6182380216383307,
372
- "grad_norm": 11.754311561584473,
373
  "learning_rate": 0.0,
374
- "loss": 0.9203,
375
  "step": 50
376
  },
377
  {
378
  "epoch": 0.6182380216383307,
379
- "eval_loss": 1.0359615087509155,
380
- "eval_runtime": 26.9396,
381
- "eval_samples_per_second": 20.23,
382
- "eval_steps_per_second": 2.561,
383
  "step": 50
384
  }
385
  ],
 
1
  {
2
+ "best_metric": 1.035404920578003,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
  "epoch": 0.6182380216383307,
5
  "eval_steps": 25,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.012364760432766615,
13
+ "grad_norm": 64.29004669189453,
14
  "learning_rate": 5e-05,
15
  "loss": 1.7005,
16
  "step": 1
 
18
  {
19
  "epoch": 0.012364760432766615,
20
  "eval_loss": 1.5441035032272339,
21
+ "eval_runtime": 27.0566,
22
+ "eval_samples_per_second": 20.143,
23
+ "eval_steps_per_second": 2.55,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 0.02472952086553323,
28
+ "grad_norm": 62.47732162475586,
29
  "learning_rate": 0.0001,
30
  "loss": 1.5408,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 0.03709428129829984,
35
+ "grad_norm": 37.50821304321289,
36
  "learning_rate": 9.989294616193017e-05,
37
+ "loss": 1.4364,
38
  "step": 3
39
  },
40
  {
41
  "epoch": 0.04945904173106646,
42
+ "grad_norm": 35.94296646118164,
43
  "learning_rate": 9.957224306869053e-05,
44
+ "loss": 1.285,
45
  "step": 4
46
  },
47
  {
48
  "epoch": 0.061823802163833076,
49
+ "grad_norm": 18.204742431640625,
50
  "learning_rate": 9.903926402016153e-05,
51
+ "loss": 1.2052,
52
  "step": 5
53
  },
54
  {
55
  "epoch": 0.07418856259659969,
56
+ "grad_norm": 19.261959075927734,
57
  "learning_rate": 9.829629131445342e-05,
58
+ "loss": 1.1855,
59
  "step": 6
60
  },
61
  {
62
  "epoch": 0.0865533230293663,
63
+ "grad_norm": 14.603866577148438,
64
  "learning_rate": 9.73465064747553e-05,
65
+ "loss": 1.1152,
66
  "step": 7
67
  },
68
  {
69
  "epoch": 0.09891808346213292,
70
+ "grad_norm": 16.302173614501953,
71
  "learning_rate": 9.619397662556435e-05,
72
+ "loss": 1.1424,
73
  "step": 8
74
  },
75
  {
76
  "epoch": 0.11128284389489954,
77
+ "grad_norm": 15.016532897949219,
78
  "learning_rate": 9.484363707663442e-05,
79
+ "loss": 1.1089,
80
  "step": 9
81
  },
82
  {
83
  "epoch": 0.12364760432766615,
84
+ "grad_norm": 15.044660568237305,
85
  "learning_rate": 9.330127018922194e-05,
86
+ "loss": 1.0282,
87
  "step": 10
88
  },
89
  {
90
  "epoch": 0.13601236476043277,
91
+ "grad_norm": 14.534330368041992,
92
  "learning_rate": 9.157348061512727e-05,
93
+ "loss": 1.0921,
94
  "step": 11
95
  },
96
  {
97
  "epoch": 0.14837712519319937,
98
+ "grad_norm": 14.35930347442627,
99
  "learning_rate": 8.966766701456177e-05,
100
+ "loss": 1.0389,
101
  "step": 12
102
  },
103
  {
104
  "epoch": 0.160741885625966,
105
+ "grad_norm": 16.542173385620117,
106
  "learning_rate": 8.759199037394887e-05,
107
+ "loss": 1.1104,
108
  "step": 13
109
  },
110
  {
111
  "epoch": 0.1731066460587326,
112
+ "grad_norm": 15.786033630371094,
113
  "learning_rate": 8.535533905932738e-05,
114
+ "loss": 1.1943,
115
  "step": 14
116
  },
117
  {
118
  "epoch": 0.18547140649149924,
119
+ "grad_norm": 13.105801582336426,
120
  "learning_rate": 8.296729075500344e-05,
121
+ "loss": 1.1353,
122
  "step": 15
123
  },
124
  {
125
  "epoch": 0.19783616692426584,
126
+ "grad_norm": 14.734046936035156,
127
  "learning_rate": 8.043807145043604e-05,
128
+ "loss": 1.1239,
129
  "step": 16
130
  },
131
  {
132
  "epoch": 0.21020092735703247,
133
+ "grad_norm": 13.833138465881348,
134
  "learning_rate": 7.777851165098012e-05,
135
+ "loss": 1.0785,
136
  "step": 17
137
  },
138
  {
139
  "epoch": 0.22256568778979907,
140
+ "grad_norm": 12.960760116577148,
141
  "learning_rate": 7.500000000000001e-05,
142
+ "loss": 1.0789,
143
  "step": 18
144
  },
145
  {
146
  "epoch": 0.23493044822256567,
147
+ "grad_norm": 14.09332275390625,
148
  "learning_rate": 7.211443451095007e-05,
149
+ "loss": 1.0874,
150
  "step": 19
151
  },
152
  {
153
  "epoch": 0.2472952086553323,
154
+ "grad_norm": 11.763240814208984,
155
  "learning_rate": 6.91341716182545e-05,
156
+ "loss": 1.0358,
157
  "step": 20
158
  },
159
  {
160
  "epoch": 0.2596599690880989,
161
+ "grad_norm": 12.183867454528809,
162
  "learning_rate": 6.607197326515808e-05,
163
+ "loss": 1.0567,
164
  "step": 21
165
  },
166
  {
167
  "epoch": 0.27202472952086554,
168
+ "grad_norm": 12.834959983825684,
169
  "learning_rate": 6.294095225512603e-05,
170
+ "loss": 0.9824,
171
  "step": 22
172
  },
173
  {
174
  "epoch": 0.28438948995363217,
175
+ "grad_norm": 12.709415435791016,
176
  "learning_rate": 5.9754516100806423e-05,
177
+ "loss": 1.0453,
178
  "step": 23
179
  },
180
  {
181
  "epoch": 0.29675425038639874,
182
+ "grad_norm": 12.786401748657227,
183
  "learning_rate": 5.6526309611002594e-05,
184
+ "loss": 0.9603,
185
  "step": 24
186
  },
187
  {
188
  "epoch": 0.3091190108191654,
189
+ "grad_norm": 14.301573753356934,
190
  "learning_rate": 5.327015646150716e-05,
191
+ "loss": 0.9649,
192
  "step": 25
193
  },
194
  {
195
  "epoch": 0.3091190108191654,
196
+ "eval_loss": 1.0556217432022095,
197
+ "eval_runtime": 27.0621,
198
+ "eval_samples_per_second": 20.139,
199
+ "eval_steps_per_second": 2.55,
200
  "step": 25
201
  },
202
  {
203
  "epoch": 0.321483771251932,
204
+ "grad_norm": 16.074369430541992,
205
  "learning_rate": 5e-05,
206
+ "loss": 1.2545,
207
  "step": 26
208
  },
209
  {
210
  "epoch": 0.33384853168469864,
211
+ "grad_norm": 15.460994720458984,
212
  "learning_rate": 4.6729843538492847e-05,
213
+ "loss": 1.1281,
214
  "step": 27
215
  },
216
  {
217
  "epoch": 0.3462132921174652,
218
+ "grad_norm": 13.048852920532227,
219
  "learning_rate": 4.347369038899744e-05,
220
+ "loss": 1.0592,
221
  "step": 28
222
  },
223
  {
224
  "epoch": 0.35857805255023184,
225
+ "grad_norm": 11.718851089477539,
226
  "learning_rate": 4.0245483899193595e-05,
227
+ "loss": 1.1608,
228
  "step": 29
229
  },
230
  {
231
  "epoch": 0.37094281298299847,
232
+ "grad_norm": 11.466394424438477,
233
  "learning_rate": 3.705904774487396e-05,
234
+ "loss": 1.0716,
235
  "step": 30
236
  },
237
  {
238
  "epoch": 0.38330757341576505,
239
+ "grad_norm": 11.176027297973633,
240
  "learning_rate": 3.392802673484193e-05,
241
+ "loss": 1.0359,
242
  "step": 31
243
  },
244
  {
245
  "epoch": 0.3956723338485317,
246
+ "grad_norm": 10.946759223937988,
247
  "learning_rate": 3.086582838174551e-05,
248
+ "loss": 1.0483,
249
  "step": 32
250
  },
251
  {
252
  "epoch": 0.4080370942812983,
253
+ "grad_norm": 10.408958435058594,
254
  "learning_rate": 2.7885565489049946e-05,
255
+ "loss": 0.9645,
256
  "step": 33
257
  },
258
  {
259
  "epoch": 0.42040185471406494,
260
+ "grad_norm": 10.959866523742676,
261
  "learning_rate": 2.500000000000001e-05,
262
+ "loss": 1.0344,
263
  "step": 34
264
  },
265
  {
266
  "epoch": 0.4327666151468315,
267
+ "grad_norm": 10.49497127532959,
268
  "learning_rate": 2.2221488349019903e-05,
269
+ "loss": 0.9897,
270
  "step": 35
271
  },
272
  {
273
  "epoch": 0.44513137557959814,
274
+ "grad_norm": 11.895526885986328,
275
  "learning_rate": 1.9561928549563968e-05,
276
+ "loss": 1.0052,
277
  "step": 36
278
  },
279
  {
280
  "epoch": 0.4574961360123648,
281
+ "grad_norm": 10.539115905761719,
282
  "learning_rate": 1.703270924499656e-05,
283
+ "loss": 0.9767,
284
  "step": 37
285
  },
286
  {
287
  "epoch": 0.46986089644513135,
288
+ "grad_norm": 11.975604057312012,
289
  "learning_rate": 1.4644660940672627e-05,
290
+ "loss": 1.102,
291
  "step": 38
292
  },
293
  {
294
  "epoch": 0.482225656877898,
295
+ "grad_norm": 11.794081687927246,
296
  "learning_rate": 1.2408009626051137e-05,
297
+ "loss": 1.197,
298
  "step": 39
299
  },
300
  {
301
  "epoch": 0.4945904173106646,
302
+ "grad_norm": 11.514941215515137,
303
  "learning_rate": 1.0332332985438248e-05,
304
+ "loss": 1.0931,
305
  "step": 40
306
  },
307
  {
308
  "epoch": 0.5069551777434312,
309
+ "grad_norm": 11.728755950927734,
310
  "learning_rate": 8.426519384872733e-06,
311
+ "loss": 1.1109,
312
  "step": 41
313
  },
314
  {
315
  "epoch": 0.5193199381761978,
316
+ "grad_norm": 12.327763557434082,
317
  "learning_rate": 6.698729810778065e-06,
318
+ "loss": 1.0959,
319
  "step": 42
320
  },
321
  {
322
  "epoch": 0.5316846986089645,
323
+ "grad_norm": 11.505321502685547,
324
  "learning_rate": 5.156362923365588e-06,
325
+ "loss": 1.1281,
326
  "step": 43
327
  },
328
  {
329
  "epoch": 0.5440494590417311,
330
+ "grad_norm": 10.898706436157227,
331
  "learning_rate": 3.8060233744356633e-06,
332
+ "loss": 1.0272,
333
  "step": 44
334
  },
335
  {
336
  "epoch": 0.5564142194744977,
337
+ "grad_norm": 10.793313980102539,
338
  "learning_rate": 2.653493525244721e-06,
339
+ "loss": 0.9923,
340
  "step": 45
341
  },
342
  {
343
  "epoch": 0.5687789799072643,
344
+ "grad_norm": 10.757425308227539,
345
  "learning_rate": 1.70370868554659e-06,
346
+ "loss": 1.0094,
347
  "step": 46
348
  },
349
  {
350
  "epoch": 0.5811437403400309,
351
+ "grad_norm": 10.546533584594727,
352
  "learning_rate": 9.607359798384785e-07,
353
+ "loss": 0.934,
354
  "step": 47
355
  },
356
  {
357
  "epoch": 0.5935085007727975,
358
+ "grad_norm": 11.380433082580566,
359
  "learning_rate": 4.277569313094809e-07,
360
+ "loss": 1.0027,
361
  "step": 48
362
  },
363
  {
364
  "epoch": 0.6058732612055642,
365
+ "grad_norm": 11.34750747680664,
366
  "learning_rate": 1.0705383806982606e-07,
367
+ "loss": 0.9724,
368
  "step": 49
369
  },
370
  {
371
  "epoch": 0.6182380216383307,
372
+ "grad_norm": 13.081766128540039,
373
  "learning_rate": 0.0,
374
+ "loss": 0.9192,
375
  "step": 50
376
  },
377
  {
378
  "epoch": 0.6182380216383307,
379
+ "eval_loss": 1.035404920578003,
380
+ "eval_runtime": 27.0703,
381
+ "eval_samples_per_second": 20.133,
382
+ "eval_steps_per_second": 2.549,
383
  "step": 50
384
  }
385
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec7a24b66787d2564811f1dc1209b71bc9e9e6e058875b5a7e97f8a60519fc7
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5088dc23c6397af8783b01b33eec8575b490cbf9f4f4e1a8c92e7c4b1b56b82
3
  size 6776