BTX24 commited on
Commit
3a6af06
·
verified ·
1 Parent(s): a923af2

End of training

Browse files
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [nvidia/mit-b4](https://huggingface.co/nvidia/mit-b4) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.1130
25
- - Accuracy: 0.9683
26
- - F1: 0.9683
27
- - Precision: 0.9684
28
- - Recall: 0.9683
29
 
30
  ## Model description
31
 
 
21
 
22
  This model is a fine-tuned version of [nvidia/mit-b4](https://huggingface.co/nvidia/mit-b4) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.1162
25
+ - Accuracy: 0.9701
26
+ - F1: 0.9701
27
+ - Precision: 0.9701
28
+ - Recall: 0.9701
29
 
30
  ## Model description
31
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.930232558139535,
3
+ "eval_accuracy": 0.9701492537313433,
4
+ "eval_f1": 0.9701426609443169,
5
+ "eval_loss": 0.11616706103086472,
6
+ "eval_precision": 0.9701377402873191,
7
+ "eval_recall": 0.9701492537313433,
8
+ "eval_runtime": 57.5155,
9
+ "eval_samples_per_second": 38.442,
10
+ "eval_steps_per_second": 4.816,
11
+ "total_flos": 1.7667354394198278e+19,
12
+ "train_loss": 0.17970547851321614,
13
+ "train_runtime": 5653.5088,
14
+ "train_samples_per_second": 10.948,
15
+ "train_steps_per_second": 0.342
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.930232558139535,
3
+ "eval_accuracy": 0.9701492537313433,
4
+ "eval_f1": 0.9701426609443169,
5
+ "eval_loss": 0.11616706103086472,
6
+ "eval_precision": 0.9701377402873191,
7
+ "eval_recall": 0.9701492537313433,
8
+ "eval_runtime": 57.5155,
9
+ "eval_samples_per_second": 38.442,
10
+ "eval_steps_per_second": 4.816
11
+ }
runs/Mar16_21-02-20_774ccf98b3fe/events.out.tfevents.1742164700.774ccf98b3fe.5066.32 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96206c640804f39e8f0a908bdba8a5eacbc1e61fea77c9a2a62f625aba56daa9
3
+ size 560
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 11.930232558139535,
3
+ "total_flos": 1.7667354394198278e+19,
4
+ "train_loss": 0.17970547851321614,
5
+ "train_runtime": 5653.5088,
6
+ "train_samples_per_second": 10.948,
7
+ "train_steps_per_second": 0.342
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9701426609443169,
3
+ "best_model_checkpoint": "mit-b4-finetuned-stroke-binary/checkpoint-1700",
4
+ "epoch": 11.930232558139535,
5
+ "eval_steps": 100,
6
+ "global_step": 1932,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.31007751937984496,
13
+ "grad_norm": 2.3132071495056152,
14
+ "learning_rate": 5.154639175257732e-06,
15
+ "loss": 0.6722,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.6201550387596899,
20
+ "grad_norm": 6.47659969329834,
21
+ "learning_rate": 1.0309278350515464e-05,
22
+ "loss": 0.5714,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.6201550387596899,
27
+ "eval_accuracy": 0.7878787878787878,
28
+ "eval_f1": 0.7800219254126745,
29
+ "eval_loss": 0.477566123008728,
30
+ "eval_precision": 0.7900169125975578,
31
+ "eval_recall": 0.7878787878787878,
32
+ "eval_runtime": 56.589,
33
+ "eval_samples_per_second": 39.071,
34
+ "eval_steps_per_second": 4.895,
35
+ "step": 100
36
+ },
37
+ {
38
+ "epoch": 0.9302325581395349,
39
+ "grad_norm": 4.08937406539917,
40
+ "learning_rate": 1.5360824742268042e-05,
41
+ "loss": 0.4471,
42
+ "step": 150
43
+ },
44
+ {
45
+ "epoch": 1.235658914728682,
46
+ "grad_norm": 8.325615882873535,
47
+ "learning_rate": 1.9999738610404825e-05,
48
+ "loss": 0.3897,
49
+ "step": 200
50
+ },
51
+ {
52
+ "epoch": 1.235658914728682,
53
+ "eval_accuracy": 0.8715513342379014,
54
+ "eval_f1": 0.8704450429260716,
55
+ "eval_loss": 0.3238992393016815,
56
+ "eval_precision": 0.8710997928702509,
57
+ "eval_recall": 0.8715513342379014,
58
+ "eval_runtime": 56.7955,
59
+ "eval_samples_per_second": 38.929,
60
+ "eval_steps_per_second": 4.877,
61
+ "step": 200
62
+ },
63
+ {
64
+ "epoch": 1.5457364341085271,
65
+ "grad_norm": 8.041431427001953,
66
+ "learning_rate": 1.9952399350448247e-05,
67
+ "loss": 0.3177,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 1.8558139534883722,
72
+ "grad_norm": 7.284913063049316,
73
+ "learning_rate": 1.9823819633544185e-05,
74
+ "loss": 0.2951,
75
+ "step": 300
76
+ },
77
+ {
78
+ "epoch": 1.8558139534883722,
79
+ "eval_accuracy": 0.8765264586160109,
80
+ "eval_f1": 0.8723990420158877,
81
+ "eval_loss": 0.31197357177734375,
82
+ "eval_precision": 0.885806076698763,
83
+ "eval_recall": 0.8765264586160109,
84
+ "eval_runtime": 56.5628,
85
+ "eval_samples_per_second": 39.089,
86
+ "eval_steps_per_second": 4.897,
87
+ "step": 300
88
+ },
89
+ {
90
+ "epoch": 2.1612403100775195,
91
+ "grad_norm": 3.927116870880127,
92
+ "learning_rate": 1.9615049043274207e-05,
93
+ "loss": 0.2786,
94
+ "step": 350
95
+ },
96
+ {
97
+ "epoch": 2.471317829457364,
98
+ "grad_norm": 13.424771308898926,
99
+ "learning_rate": 1.932779175343134e-05,
100
+ "loss": 0.23,
101
+ "step": 400
102
+ },
103
+ {
104
+ "epoch": 2.471317829457364,
105
+ "eval_accuracy": 0.9280868385345997,
106
+ "eval_f1": 0.9271058471970156,
107
+ "eval_loss": 0.1993584781885147,
108
+ "eval_precision": 0.9303851930689407,
109
+ "eval_recall": 0.9280868385345997,
110
+ "eval_runtime": 56.4712,
111
+ "eval_samples_per_second": 39.153,
112
+ "eval_steps_per_second": 4.905,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 2.781395348837209,
117
+ "grad_norm": 11.889705657958984,
118
+ "learning_rate": 1.8964392617017013e-05,
119
+ "loss": 0.2424,
120
+ "step": 450
121
+ },
122
+ {
123
+ "epoch": 3.0868217054263565,
124
+ "grad_norm": 14.436951637268066,
125
+ "learning_rate": 1.8527818025436662e-05,
126
+ "loss": 0.2135,
127
+ "step": 500
128
+ },
129
+ {
130
+ "epoch": 3.0868217054263565,
131
+ "eval_accuracy": 0.9280868385345997,
132
+ "eval_f1": 0.9266634060111499,
133
+ "eval_loss": 0.2157154679298401,
134
+ "eval_precision": 0.9332581537403823,
135
+ "eval_recall": 0.9280868385345997,
136
+ "eval_runtime": 56.599,
137
+ "eval_samples_per_second": 39.064,
138
+ "eval_steps_per_second": 4.894,
139
+ "step": 500
140
+ },
141
+ {
142
+ "epoch": 3.3968992248062015,
143
+ "grad_norm": 16.79993438720703,
144
+ "learning_rate": 1.802163169413846e-05,
145
+ "loss": 0.1746,
146
+ "step": 550
147
+ },
148
+ {
149
+ "epoch": 3.7069767441860466,
150
+ "grad_norm": 14.388134002685547,
151
+ "learning_rate": 1.7449965572354675e-05,
152
+ "loss": 0.2106,
153
+ "step": 600
154
+ },
155
+ {
156
+ "epoch": 3.7069767441860466,
157
+ "eval_accuracy": 0.9380370872908186,
158
+ "eval_f1": 0.9382102726921231,
159
+ "eval_loss": 0.18085584044456482,
160
+ "eval_precision": 0.9386737275368563,
161
+ "eval_recall": 0.9380370872908186,
162
+ "eval_runtime": 56.6285,
163
+ "eval_samples_per_second": 39.044,
164
+ "eval_steps_per_second": 4.892,
165
+ "step": 600
166
+ },
167
+ {
168
+ "epoch": 4.0124031007751935,
169
+ "grad_norm": 14.440871238708496,
170
+ "learning_rate": 1.68174861144065e-05,
171
+ "loss": 0.1802,
172
+ "step": 650
173
+ },
174
+ {
175
+ "epoch": 4.322480620155039,
176
+ "grad_norm": 6.46437931060791,
177
+ "learning_rate": 1.612935618789643e-05,
178
+ "loss": 0.1576,
179
+ "step": 700
180
+ },
181
+ {
182
+ "epoch": 4.322480620155039,
183
+ "eval_accuracy": 0.9402985074626866,
184
+ "eval_f1": 0.9403501047780813,
185
+ "eval_loss": 0.1628771722316742,
186
+ "eval_precision": 0.9404274341623059,
187
+ "eval_recall": 0.9402985074626866,
188
+ "eval_runtime": 56.5976,
189
+ "eval_samples_per_second": 39.065,
190
+ "eval_steps_per_second": 4.894,
191
+ "step": 700
192
+ },
193
+ {
194
+ "epoch": 4.632558139534884,
195
+ "grad_norm": 12.626237869262695,
196
+ "learning_rate": 1.5391192929727884e-05,
197
+ "loss": 0.1752,
198
+ "step": 750
199
+ },
200
+ {
201
+ "epoch": 4.942635658914728,
202
+ "grad_norm": 20.061859130859375,
203
+ "learning_rate": 1.460902189396916e-05,
204
+ "loss": 0.1434,
205
+ "step": 800
206
+ },
207
+ {
208
+ "epoch": 4.942635658914728,
209
+ "eval_accuracy": 0.9543193125282677,
210
+ "eval_f1": 0.9541868361381168,
211
+ "eval_loss": 0.1526043862104416,
212
+ "eval_precision": 0.9543134613523963,
213
+ "eval_recall": 0.9543193125282677,
214
+ "eval_runtime": 56.4759,
215
+ "eval_samples_per_second": 39.149,
216
+ "eval_steps_per_second": 4.905,
217
+ "step": 800
218
+ },
219
+ {
220
+ "epoch": 5.248062015503876,
221
+ "grad_norm": 11.535951614379883,
222
+ "learning_rate": 1.3789227865848282e-05,
223
+ "loss": 0.1601,
224
+ "step": 850
225
+ },
226
+ {
227
+ "epoch": 5.558139534883721,
228
+ "grad_norm": 9.65259838104248,
229
+ "learning_rate": 1.2938502743379212e-05,
230
+ "loss": 0.1391,
231
+ "step": 900
232
+ },
233
+ {
234
+ "epoch": 5.558139534883721,
235
+ "eval_accuracy": 0.9574853007688828,
236
+ "eval_f1": 0.9574664394664735,
237
+ "eval_loss": 0.1268271952867508,
238
+ "eval_precision": 0.9574541746977356,
239
+ "eval_recall": 0.9574853007688828,
240
+ "eval_runtime": 56.6517,
241
+ "eval_samples_per_second": 39.028,
242
+ "eval_steps_per_second": 4.89,
243
+ "step": 900
244
+ },
245
+ {
246
+ "epoch": 5.868217054263566,
247
+ "grad_norm": 7.883506774902344,
248
+ "learning_rate": 1.2063790912056577e-05,
249
+ "loss": 0.1605,
250
+ "step": 950
251
+ },
252
+ {
253
+ "epoch": 6.173643410852713,
254
+ "grad_norm": 9.489595413208008,
255
+ "learning_rate": 1.1172232558519983e-05,
256
+ "loss": 0.1048,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 6.173643410852713,
261
+ "eval_accuracy": 0.9556761646313885,
262
+ "eval_f1": 0.9555092018481103,
263
+ "eval_loss": 0.14889651536941528,
264
+ "eval_precision": 0.9557688265871488,
265
+ "eval_recall": 0.9556761646313885,
266
+ "eval_runtime": 56.5428,
267
+ "eval_samples_per_second": 39.103,
268
+ "eval_steps_per_second": 4.899,
269
+ "step": 1000
270
+ },
271
+ {
272
+ "epoch": 6.4837209302325585,
273
+ "grad_norm": 5.59963846206665,
274
+ "learning_rate": 1.0271105385912779e-05,
275
+ "loss": 0.116,
276
+ "step": 1050
277
+ },
278
+ {
279
+ "epoch": 6.793798449612403,
280
+ "grad_norm": 4.873887538909912,
281
+ "learning_rate": 9.367765206707174e-06,
282
+ "loss": 0.1271,
283
+ "step": 1100
284
+ },
285
+ {
286
+ "epoch": 6.793798449612403,
287
+ "eval_accuracy": 0.9570330167345092,
288
+ "eval_f1": 0.9566191699282054,
289
+ "eval_loss": 0.14482761919498444,
290
+ "eval_precision": 0.9586051623091093,
291
+ "eval_recall": 0.9570330167345092,
292
+ "eval_runtime": 56.8397,
293
+ "eval_samples_per_second": 38.899,
294
+ "eval_steps_per_second": 4.873,
295
+ "step": 1100
296
+ },
297
+ {
298
+ "epoch": 7.09922480620155,
299
+ "grad_norm": 4.528378963470459,
300
+ "learning_rate": 8.469585897930557e-06,
301
+ "loss": 0.1333,
302
+ "step": 1150
303
+ },
304
+ {
305
+ "epoch": 7.409302325581395,
306
+ "grad_norm": 1.5636117458343506,
307
+ "learning_rate": 7.583899208932648e-06,
308
+ "loss": 0.091,
309
+ "step": 1200
310
+ },
311
+ {
312
+ "epoch": 7.409302325581395,
313
+ "eval_accuracy": 0.9570330167345092,
314
+ "eval_f1": 0.9567056534394107,
315
+ "eval_loss": 0.14507929980754852,
316
+ "eval_precision": 0.9579537259191305,
317
+ "eval_recall": 0.9570330167345092,
318
+ "eval_runtime": 56.7,
319
+ "eval_samples_per_second": 38.995,
320
+ "eval_steps_per_second": 4.885,
321
+ "step": 1200
322
+ },
323
+ {
324
+ "epoch": 7.7193798449612405,
325
+ "grad_norm": 5.480973720550537,
326
+ "learning_rate": 6.7179349130367235e-06,
327
+ "loss": 0.1089,
328
+ "step": 1250
329
+ },
330
+ {
331
+ "epoch": 8.024806201550387,
332
+ "grad_norm": 6.6158223152160645,
333
+ "learning_rate": 5.878761791611129e-06,
334
+ "loss": 0.1159,
335
+ "step": 1300
336
+ },
337
+ {
338
+ "epoch": 8.024806201550387,
339
+ "eval_accuracy": 0.9629127091813658,
340
+ "eval_f1": 0.9626761639787419,
341
+ "eval_loss": 0.1205127015709877,
342
+ "eval_precision": 0.9635871559570208,
343
+ "eval_recall": 0.9629127091813658,
344
+ "eval_runtime": 56.6004,
345
+ "eval_samples_per_second": 39.063,
346
+ "eval_steps_per_second": 4.894,
347
+ "step": 1300
348
+ },
349
+ {
350
+ "epoch": 8.334883720930232,
351
+ "grad_norm": 5.2470316886901855,
352
+ "learning_rate": 5.073229932302277e-06,
353
+ "loss": 0.0886,
354
+ "step": 1350
355
+ },
356
+ {
357
+ "epoch": 8.644961240310078,
358
+ "grad_norm": 6.15119743347168,
359
+ "learning_rate": 4.307914812442993e-06,
360
+ "loss": 0.1151,
361
+ "step": 1400
362
+ },
363
+ {
364
+ "epoch": 8.644961240310078,
365
+ "eval_accuracy": 0.9665309814563546,
366
+ "eval_f1": 0.9664379980687814,
367
+ "eval_loss": 0.11242391169071198,
368
+ "eval_precision": 0.9665871538113867,
369
+ "eval_recall": 0.9665309814563546,
370
+ "eval_runtime": 56.6713,
371
+ "eval_samples_per_second": 39.014,
372
+ "eval_steps_per_second": 4.888,
373
+ "step": 1400
374
+ },
375
+ {
376
+ "epoch": 8.955038759689922,
377
+ "grad_norm": 12.04592227935791,
378
+ "learning_rate": 3.589063624077802e-06,
379
+ "loss": 0.0798,
380
+ "step": 1450
381
+ },
382
+ {
383
+ "epoch": 9.26046511627907,
384
+ "grad_norm": 13.314713478088379,
385
+ "learning_rate": 2.922544278748801e-06,
386
+ "loss": 0.0735,
387
+ "step": 1500
388
+ },
389
+ {
390
+ "epoch": 9.26046511627907,
391
+ "eval_accuracy": 0.9642695612844867,
392
+ "eval_f1": 0.9641123145223969,
393
+ "eval_loss": 0.11749936640262604,
394
+ "eval_precision": 0.9645326112328965,
395
+ "eval_recall": 0.9642695612844867,
396
+ "eval_runtime": 56.8212,
397
+ "eval_samples_per_second": 38.912,
398
+ "eval_steps_per_second": 4.875,
399
+ "step": 1500
400
+ },
401
+ {
402
+ "epoch": 9.570542635658915,
403
+ "grad_norm": 1.456084132194519,
404
+ "learning_rate": 2.3137975083109153e-06,
405
+ "loss": 0.0746,
406
+ "step": 1550
407
+ },
408
+ {
409
+ "epoch": 9.88062015503876,
410
+ "grad_norm": 2.1265344619750977,
411
+ "learning_rate": 1.7677924527729228e-06,
412
+ "loss": 0.0537,
413
+ "step": 1600
414
+ },
415
+ {
416
+ "epoch": 9.88062015503876,
417
+ "eval_accuracy": 0.9678878335594754,
418
+ "eval_f1": 0.9678102645900477,
419
+ "eval_loss": 0.11535227298736572,
420
+ "eval_precision": 0.9679181198554704,
421
+ "eval_recall": 0.9678878335594754,
422
+ "eval_runtime": 56.5576,
423
+ "eval_samples_per_second": 39.093,
424
+ "eval_steps_per_second": 4.898,
425
+ "step": 1600
426
+ },
427
+ {
428
+ "epoch": 10.186046511627907,
429
+ "grad_norm": 12.95783805847168,
430
+ "learning_rate": 1.2889860976963542e-06,
431
+ "loss": 0.0857,
432
+ "step": 1650
433
+ },
434
+ {
435
+ "epoch": 10.496124031007753,
436
+ "grad_norm": 3.627340078353882,
437
+ "learning_rate": 8.812868922607565e-07,
438
+ "loss": 0.0666,
439
+ "step": 1700
440
+ },
441
+ {
442
+ "epoch": 10.496124031007753,
443
+ "eval_accuracy": 0.9701492537313433,
444
+ "eval_f1": 0.9701426609443169,
445
+ "eval_loss": 0.11616706103086472,
446
+ "eval_precision": 0.9701377402873191,
447
+ "eval_recall": 0.9701492537313433,
448
+ "eval_runtime": 56.4987,
449
+ "eval_samples_per_second": 39.134,
450
+ "eval_steps_per_second": 4.903,
451
+ "step": 1700
452
+ },
453
+ {
454
+ "epoch": 10.806201550387597,
455
+ "grad_norm": 9.41781997680664,
456
+ "learning_rate": 5.480228449774882e-07,
457
+ "loss": 0.0722,
458
+ "step": 1750
459
+ },
460
+ {
461
+ "epoch": 11.111627906976745,
462
+ "grad_norm": 12.687678337097168,
463
+ "learning_rate": 2.9626582353969756e-07,
464
+ "loss": 0.0732,
465
+ "step": 1800
466
+ },
467
+ {
468
+ "epoch": 11.111627906976745,
469
+ "eval_accuracy": 0.9678878335594754,
470
+ "eval_f1": 0.9678179490403084,
471
+ "eval_loss": 0.11334193497896194,
472
+ "eval_precision": 0.9678997125749722,
473
+ "eval_recall": 0.9678878335594754,
474
+ "eval_runtime": 56.7681,
475
+ "eval_samples_per_second": 38.948,
476
+ "eval_steps_per_second": 4.88,
477
+ "step": 1800
478
+ },
479
+ {
480
+ "epoch": 11.421705426356588,
481
+ "grad_norm": 13.630194664001465,
482
+ "learning_rate": 1.1780223451346994e-07,
483
+ "loss": 0.0868,
484
+ "step": 1850
485
+ },
486
+ {
487
+ "epoch": 11.731782945736434,
488
+ "grad_norm": 17.37832260131836,
489
+ "learning_rate": 2.0006053801937543e-08,
490
+ "loss": 0.0775,
491
+ "step": 1900
492
+ },
493
+ {
494
+ "epoch": 11.731782945736434,
495
+ "eval_accuracy": 0.968340117593849,
496
+ "eval_f1": 0.9682674370094175,
497
+ "eval_loss": 0.112978994846344,
498
+ "eval_precision": 0.9683629530510336,
499
+ "eval_recall": 0.968340117593849,
500
+ "eval_runtime": 56.6602,
501
+ "eval_samples_per_second": 39.022,
502
+ "eval_steps_per_second": 4.889,
503
+ "step": 1900
504
+ },
505
+ {
506
+ "epoch": 11.930232558139535,
507
+ "step": 1932,
508
+ "total_flos": 1.7667354394198278e+19,
509
+ "train_loss": 0.17970547851321614,
510
+ "train_runtime": 5653.5088,
511
+ "train_samples_per_second": 10.948,
512
+ "train_steps_per_second": 0.342
513
+ }
514
+ ],
515
+ "logging_steps": 50,
516
+ "max_steps": 1932,
517
+ "num_input_tokens_seen": 0,
518
+ "num_train_epochs": 12,
519
+ "save_steps": 100,
520
+ "stateful_callbacks": {
521
+ "TrainerControl": {
522
+ "args": {
523
+ "should_epoch_stop": false,
524
+ "should_evaluate": false,
525
+ "should_log": false,
526
+ "should_save": true,
527
+ "should_training_stop": true
528
+ },
529
+ "attributes": {}
530
+ }
531
+ },
532
+ "total_flos": 1.7667354394198278e+19,
533
+ "train_batch_size": 8,
534
+ "trial_name": null,
535
+ "trial_params": null
536
+ }