BTX24 commited on
Commit
780cc87
·
verified ·
1 Parent(s): 12cd9bb

End of training

Browse files
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [facebook/convnextv2-base-22k-224](https://huggingface.co/facebook/convnextv2-base-22k-224) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.9135
25
- - Accuracy: 0.5954
26
- - F1: 0.5879
27
- - Precision: 0.5865
28
- - Recall: 0.5954
29
 
30
  ## Model description
31
 
 
21
 
22
  This model is a fine-tuned version of [facebook/convnextv2-base-22k-224](https://huggingface.co/facebook/convnextv2-base-22k-224) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.9120
25
+ - Accuracy: 0.6138
26
+ - F1: 0.5996
27
+ - Precision: 0.5969
28
+ - Recall: 0.6138
29
 
30
  ## Model description
31
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 11.956043956043956,
3
- "eval_accuracy": 0.5932047750229569,
4
- "eval_f1": 0.5674291939014156,
5
- "eval_loss": 0.9468401074409485,
6
- "eval_precision": 0.5709384462011384,
7
- "eval_recall": 0.5932047750229569,
8
- "eval_runtime": 10.108,
9
- "eval_samples_per_second": 107.737,
10
- "eval_steps_per_second": 6.826,
11
- "total_flos": 4.123334853511373e+18,
12
- "train_loss": 1.090085435147379,
13
- "train_runtime": 2115.1144,
14
- "train_samples_per_second": 24.697,
15
- "train_steps_per_second": 0.386
16
  }
 
1
  {
2
+ "epoch": 11.961661341853034,
3
+ "eval_accuracy": 0.6137931034482759,
4
+ "eval_f1": 0.5996477650763593,
5
+ "eval_loss": 0.9119637608528137,
6
+ "eval_precision": 0.596855024118962,
7
+ "eval_recall": 0.6137931034482759,
8
+ "eval_runtime": 4.1197,
9
+ "eval_samples_per_second": 105.59,
10
+ "eval_steps_per_second": 6.797,
11
+ "total_flos": 4.743827088137626e+18,
12
+ "train_loss": 1.079931161342523,
13
+ "train_runtime": 2308.5403,
14
+ "train_samples_per_second": 26.027,
15
+ "train_steps_per_second": 0.405
16
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 11.956043956043956,
3
- "eval_accuracy": 0.5932047750229569,
4
- "eval_f1": 0.5674291939014156,
5
- "eval_loss": 0.9468401074409485,
6
- "eval_precision": 0.5709384462011384,
7
- "eval_recall": 0.5932047750229569,
8
- "eval_runtime": 10.108,
9
- "eval_samples_per_second": 107.737,
10
- "eval_steps_per_second": 6.826
11
  }
 
1
  {
2
+ "epoch": 11.961661341853034,
3
+ "eval_accuracy": 0.6137931034482759,
4
+ "eval_f1": 0.5996477650763593,
5
+ "eval_loss": 0.9119637608528137,
6
+ "eval_precision": 0.596855024118962,
7
+ "eval_recall": 0.6137931034482759,
8
+ "eval_runtime": 4.1197,
9
+ "eval_samples_per_second": 105.59,
10
+ "eval_steps_per_second": 6.797
11
  }
runs/Sep05_08-10-30_43d6627a09b0/events.out.tfevents.1725526395.43d6627a09b0.309.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2221cf45441b9f89a927e8c90687a0e0bf1fabcc59dccee723f36556679457e
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 11.956043956043956,
3
- "total_flos": 4.123334853511373e+18,
4
- "train_loss": 1.090085435147379,
5
- "train_runtime": 2115.1144,
6
- "train_samples_per_second": 24.697,
7
- "train_steps_per_second": 0.386
8
  }
 
1
  {
2
+ "epoch": 11.961661341853034,
3
+ "total_flos": 4.743827088137626e+18,
4
+ "train_loss": 1.079931161342523,
5
+ "train_runtime": 2308.5403,
6
+ "train_samples_per_second": 26.027,
7
+ "train_steps_per_second": 0.405
8
  }
trainer_state.json CHANGED
@@ -1,736 +1,820 @@
1
  {
2
- "best_metric": 0.5932047750229569,
3
- "best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24/checkpoint-614",
4
- "epoch": 11.956043956043956,
5
  "eval_steps": 500,
6
- "global_step": 816,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.14652014652014653,
13
- "grad_norm": 16.747648239135742,
14
- "learning_rate": 6.0975609756097564e-06,
15
- "loss": 1.4185,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.29304029304029305,
20
- "grad_norm": 14.544577598571777,
21
- "learning_rate": 1.2195121951219513e-05,
22
- "loss": 1.4153,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.43956043956043955,
27
- "grad_norm": 12.55716323852539,
28
- "learning_rate": 1.8292682926829268e-05,
29
- "loss": 1.3581,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.5860805860805861,
34
- "grad_norm": 16.329694747924805,
35
- "learning_rate": 2.378048780487805e-05,
36
- "loss": 1.3421,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.7326007326007326,
41
- "grad_norm": 9.924437522888184,
42
- "learning_rate": 2.9878048780487805e-05,
43
- "loss": 1.3174,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.8791208791208791,
48
- "grad_norm": 8.243997573852539,
49
- "learning_rate": 3.597560975609756e-05,
50
- "loss": 1.2755,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.9963369963369964,
55
- "eval_accuracy": 0.4903581267217631,
56
- "eval_f1": 0.39101576597192117,
57
- "eval_loss": 1.2008219957351685,
58
- "eval_precision": 0.45771669331368264,
59
- "eval_recall": 0.4903581267217631,
60
- "eval_runtime": 10.5546,
61
- "eval_samples_per_second": 103.178,
62
- "eval_steps_per_second": 6.537,
63
- "step": 68
64
  },
65
  {
66
- "epoch": 1.0256410256410255,
67
- "grad_norm": 13.558063507080078,
68
- "learning_rate": 4.207317073170732e-05,
69
- "loss": 1.2711,
70
- "step": 70
 
 
 
 
 
71
  },
72
  {
73
- "epoch": 1.1721611721611722,
74
- "grad_norm": 27.544034957885742,
75
- "learning_rate": 4.817073170731707e-05,
76
- "loss": 1.2731,
77
  "step": 80
78
  },
79
  {
80
- "epoch": 1.3186813186813187,
81
- "grad_norm": 12.061452865600586,
82
- "learning_rate": 4.952316076294278e-05,
83
- "loss": 1.2582,
84
  "step": 90
85
  },
86
  {
87
- "epoch": 1.4652014652014653,
88
- "grad_norm": 13.313647270202637,
89
- "learning_rate": 4.884196185286104e-05,
90
- "loss": 1.2729,
91
  "step": 100
92
  },
93
  {
94
- "epoch": 1.6117216117216118,
95
- "grad_norm": 8.85893440246582,
96
- "learning_rate": 4.816076294277929e-05,
97
- "loss": 1.2093,
98
  "step": 110
99
  },
100
  {
101
- "epoch": 1.7582417582417582,
102
- "grad_norm": 7.619632244110107,
103
- "learning_rate": 4.747956403269755e-05,
104
- "loss": 1.2445,
105
  "step": 120
106
  },
107
  {
108
- "epoch": 1.9047619047619047,
109
- "grad_norm": 9.313189506530762,
110
- "learning_rate": 4.6798365122615805e-05,
111
- "loss": 1.1711,
112
  "step": 130
113
  },
114
  {
115
- "epoch": 1.9926739926739927,
116
- "eval_accuracy": 0.5353535353535354,
117
- "eval_f1": 0.47656758341858724,
118
- "eval_loss": 1.0650511980056763,
119
- "eval_precision": 0.48653241546321574,
120
- "eval_recall": 0.5353535353535354,
121
- "eval_runtime": 10.5232,
122
- "eval_samples_per_second": 103.486,
123
- "eval_steps_per_second": 6.557,
124
- "step": 136
125
- },
126
- {
127
- "epoch": 2.051282051282051,
128
- "grad_norm": 9.400361061096191,
129
- "learning_rate": 4.6117166212534065e-05,
130
- "loss": 1.1145,
131
  "step": 140
132
  },
133
  {
134
- "epoch": 2.197802197802198,
135
- "grad_norm": 5.284915924072266,
136
- "learning_rate": 4.543596730245232e-05,
137
- "loss": 1.1809,
138
  "step": 150
139
  },
140
  {
141
- "epoch": 2.3443223443223444,
142
- "grad_norm": 10.18017292022705,
143
- "learning_rate": 4.475476839237057e-05,
144
- "loss": 1.1668,
 
 
 
 
 
 
 
 
 
 
 
 
145
  "step": 160
146
  },
147
  {
148
- "epoch": 2.490842490842491,
149
- "grad_norm": 7.79976224899292,
150
- "learning_rate": 4.407356948228883e-05,
151
- "loss": 1.1582,
152
  "step": 170
153
  },
154
  {
155
- "epoch": 2.6373626373626373,
156
- "grad_norm": 8.457806587219238,
157
- "learning_rate": 4.339237057220708e-05,
158
- "loss": 1.1628,
159
  "step": 180
160
  },
161
  {
162
- "epoch": 2.7838827838827838,
163
- "grad_norm": 6.220970630645752,
164
- "learning_rate": 4.271117166212534e-05,
165
- "loss": 1.2035,
166
  "step": 190
167
  },
168
  {
169
- "epoch": 2.9304029304029307,
170
- "grad_norm": 7.159059524536133,
171
- "learning_rate": 4.20299727520436e-05,
172
- "loss": 1.1599,
173
  "step": 200
174
  },
175
  {
176
- "epoch": 2.989010989010989,
177
- "eval_accuracy": 0.54178145087236,
178
- "eval_f1": 0.5077316365087595,
179
- "eval_loss": 1.0533095598220825,
180
- "eval_precision": 0.5274645952658832,
181
- "eval_recall": 0.54178145087236,
182
- "eval_runtime": 10.5408,
183
- "eval_samples_per_second": 103.313,
184
- "eval_steps_per_second": 6.546,
185
- "step": 204
186
- },
187
- {
188
- "epoch": 3.076923076923077,
189
- "grad_norm": 5.542409896850586,
190
- "learning_rate": 4.1348773841961855e-05,
191
- "loss": 1.1692,
192
  "step": 210
193
  },
194
  {
195
- "epoch": 3.2234432234432235,
196
- "grad_norm": 7.356290340423584,
197
- "learning_rate": 4.066757493188011e-05,
198
- "loss": 1.1176,
199
  "step": 220
200
  },
201
  {
202
- "epoch": 3.36996336996337,
203
- "grad_norm": 6.849288463592529,
204
- "learning_rate": 3.998637602179837e-05,
205
- "loss": 1.1335,
206
  "step": 230
207
  },
208
  {
209
- "epoch": 3.5164835164835164,
210
- "grad_norm": 11.968345642089844,
211
- "learning_rate": 3.930517711171662e-05,
212
- "loss": 1.14,
 
 
 
 
 
 
 
 
 
 
 
 
213
  "step": 240
214
  },
215
  {
216
- "epoch": 3.663003663003663,
217
- "grad_norm": 8.681733131408691,
218
- "learning_rate": 3.862397820163488e-05,
219
- "loss": 1.1195,
220
  "step": 250
221
  },
222
  {
223
- "epoch": 3.8095238095238093,
224
- "grad_norm": 8.258611679077148,
225
- "learning_rate": 3.794277929155314e-05,
226
- "loss": 1.1578,
227
  "step": 260
228
  },
229
  {
230
- "epoch": 3.956043956043956,
231
- "grad_norm": 9.197736740112305,
232
- "learning_rate": 3.726158038147139e-05,
233
- "loss": 1.1595,
234
  "step": 270
235
  },
236
  {
237
- "epoch": 4.0,
238
- "eval_accuracy": 0.5454545454545454,
239
- "eval_f1": 0.5413509466694156,
240
- "eval_loss": 1.0423349142074585,
241
- "eval_precision": 0.5690587889084956,
242
- "eval_recall": 0.5454545454545454,
243
- "eval_runtime": 10.5435,
244
- "eval_samples_per_second": 103.286,
245
- "eval_steps_per_second": 6.544,
246
- "step": 273
247
- },
248
- {
249
- "epoch": 4.102564102564102,
250
- "grad_norm": 7.987867832183838,
251
- "learning_rate": 3.6580381471389645e-05,
252
- "loss": 1.0988,
253
  "step": 280
254
  },
255
  {
256
- "epoch": 4.249084249084249,
257
- "grad_norm": 6.383251667022705,
258
- "learning_rate": 3.5899182561307905e-05,
259
- "loss": 1.1338,
260
  "step": 290
261
  },
262
  {
263
- "epoch": 4.395604395604396,
264
- "grad_norm": 7.749887943267822,
265
- "learning_rate": 3.521798365122616e-05,
266
- "loss": 1.1635,
267
  "step": 300
268
  },
269
  {
270
- "epoch": 4.542124542124542,
271
- "grad_norm": 9.830282211303711,
272
- "learning_rate": 3.453678474114442e-05,
273
- "loss": 1.0912,
274
  "step": 310
275
  },
276
  {
277
- "epoch": 4.688644688644689,
278
- "grad_norm": 9.785733222961426,
279
- "learning_rate": 3.385558583106267e-05,
280
- "loss": 1.1293,
 
 
 
 
 
 
 
 
 
 
 
 
281
  "step": 320
282
  },
283
  {
284
- "epoch": 4.835164835164835,
285
- "grad_norm": 9.794586181640625,
286
- "learning_rate": 3.317438692098093e-05,
287
- "loss": 1.0867,
288
  "step": 330
289
  },
290
  {
291
- "epoch": 4.981684981684982,
292
- "grad_norm": 5.60059928894043,
293
- "learning_rate": 3.249318801089918e-05,
294
  "loss": 1.096,
295
  "step": 340
296
  },
297
  {
298
- "epoch": 4.996336996336996,
299
- "eval_accuracy": 0.5610651974288338,
300
- "eval_f1": 0.5462709659715219,
301
- "eval_loss": 1.0159707069396973,
302
- "eval_precision": 0.5419035604614381,
303
- "eval_recall": 0.5610651974288338,
304
- "eval_runtime": 10.5161,
305
- "eval_samples_per_second": 103.556,
306
- "eval_steps_per_second": 6.561,
307
- "step": 341
308
- },
309
- {
310
- "epoch": 5.128205128205128,
311
- "grad_norm": 6.809742450714111,
312
- "learning_rate": 3.181198910081744e-05,
313
- "loss": 1.071,
314
  "step": 350
315
  },
316
  {
317
- "epoch": 5.274725274725275,
318
- "grad_norm": 7.61679744720459,
319
- "learning_rate": 3.1130790190735695e-05,
320
- "loss": 1.1031,
321
  "step": 360
322
  },
323
  {
324
- "epoch": 5.4212454212454215,
325
- "grad_norm": 7.127725601196289,
326
- "learning_rate": 3.0449591280653955e-05,
327
- "loss": 1.0971,
328
  "step": 370
329
  },
330
  {
331
- "epoch": 5.5677655677655675,
332
- "grad_norm": 6.362829685211182,
333
- "learning_rate": 2.9768392370572208e-05,
334
- "loss": 1.1078,
335
  "step": 380
336
  },
337
  {
338
- "epoch": 5.714285714285714,
339
- "grad_norm": 8.149834632873535,
340
- "learning_rate": 2.9087193460490464e-05,
341
- "loss": 1.0871,
342
  "step": 390
343
  },
344
  {
345
- "epoch": 5.860805860805861,
346
- "grad_norm": 8.003039360046387,
347
- "learning_rate": 2.840599455040872e-05,
348
- "loss": 1.0592,
349
- "step": 400
 
 
 
 
 
350
  },
351
  {
352
- "epoch": 5.992673992673993,
353
- "eval_accuracy": 0.5766758494031221,
354
- "eval_f1": 0.5414852771503386,
355
- "eval_loss": 0.9847236275672913,
356
- "eval_precision": 0.5484837093262834,
357
- "eval_recall": 0.5766758494031221,
358
- "eval_runtime": 10.5345,
359
- "eval_samples_per_second": 103.374,
360
- "eval_steps_per_second": 6.55,
361
- "step": 409
362
  },
363
  {
364
- "epoch": 6.007326007326007,
365
- "grad_norm": 9.939899444580078,
366
- "learning_rate": 2.772479564032698e-05,
367
- "loss": 1.0573,
368
  "step": 410
369
  },
370
  {
371
- "epoch": 6.153846153846154,
372
- "grad_norm": 8.810173988342285,
373
- "learning_rate": 2.7043596730245236e-05,
374
- "loss": 1.038,
375
  "step": 420
376
  },
377
  {
378
- "epoch": 6.3003663003663,
379
- "grad_norm": 9.228567123413086,
380
- "learning_rate": 2.6362397820163485e-05,
381
- "loss": 1.0441,
382
  "step": 430
383
  },
384
  {
385
- "epoch": 6.446886446886447,
386
- "grad_norm": 10.357806205749512,
387
- "learning_rate": 2.5681198910081745e-05,
388
- "loss": 1.0313,
389
  "step": 440
390
  },
391
  {
392
- "epoch": 6.593406593406593,
393
- "grad_norm": 6.874061584472656,
394
- "learning_rate": 2.5e-05,
395
- "loss": 1.0667,
396
  "step": 450
397
  },
398
  {
399
- "epoch": 6.73992673992674,
400
- "grad_norm": 10.741703987121582,
401
- "learning_rate": 2.4318801089918257e-05,
402
- "loss": 1.0353,
403
  "step": 460
404
  },
405
  {
406
- "epoch": 6.886446886446887,
407
- "grad_norm": 7.215301513671875,
408
- "learning_rate": 2.3637602179836514e-05,
409
- "loss": 1.0706,
410
- "step": 470
 
 
 
 
 
411
  },
412
  {
413
- "epoch": 6.989010989010989,
414
- "eval_accuracy": 0.588613406795225,
415
- "eval_f1": 0.5835655659113839,
416
- "eval_loss": 0.9867706894874573,
417
- "eval_precision": 0.5861531495568983,
418
- "eval_recall": 0.588613406795225,
419
- "eval_runtime": 10.5429,
420
- "eval_samples_per_second": 103.292,
421
- "eval_steps_per_second": 6.545,
422
- "step": 477
423
  },
424
  {
425
- "epoch": 7.032967032967033,
426
- "grad_norm": 9.452975273132324,
427
- "learning_rate": 2.295640326975477e-05,
428
- "loss": 1.1093,
429
  "step": 480
430
  },
431
  {
432
- "epoch": 7.17948717948718,
433
- "grad_norm": 5.909883499145508,
434
- "learning_rate": 2.2275204359673023e-05,
435
- "loss": 1.0628,
436
  "step": 490
437
  },
438
  {
439
- "epoch": 7.326007326007326,
440
- "grad_norm": 8.430510520935059,
441
- "learning_rate": 2.1594005449591282e-05,
442
- "loss": 1.0175,
443
  "step": 500
444
  },
445
  {
446
- "epoch": 7.472527472527473,
447
- "grad_norm": 11.566703796386719,
448
- "learning_rate": 2.091280653950954e-05,
449
- "loss": 0.9975,
450
  "step": 510
451
  },
452
  {
453
- "epoch": 7.619047619047619,
454
- "grad_norm": 8.561046600341797,
455
- "learning_rate": 2.023160762942779e-05,
456
- "loss": 1.057,
457
  "step": 520
458
  },
459
  {
460
- "epoch": 7.7655677655677655,
461
- "grad_norm": 9.214874267578125,
462
- "learning_rate": 1.955040871934605e-05,
463
- "loss": 1.0085,
464
  "step": 530
465
  },
466
  {
467
- "epoch": 7.912087912087912,
468
- "grad_norm": 7.050257682800293,
469
- "learning_rate": 1.8869209809264307e-05,
470
- "loss": 1.0404,
471
  "step": 540
472
  },
473
  {
474
- "epoch": 8.0,
475
- "eval_accuracy": 0.5867768595041323,
476
- "eval_f1": 0.5736657074074295,
477
- "eval_loss": 0.9758484363555908,
478
- "eval_precision": 0.5694509571644558,
479
- "eval_recall": 0.5867768595041323,
480
- "eval_runtime": 10.5054,
481
- "eval_samples_per_second": 103.661,
482
- "eval_steps_per_second": 6.568,
483
- "step": 546
484
- },
485
- {
486
- "epoch": 8.058608058608058,
487
- "grad_norm": 9.406164169311523,
488
- "learning_rate": 1.818801089918256e-05,
489
- "loss": 1.0564,
490
  "step": 550
491
  },
492
  {
493
- "epoch": 8.205128205128204,
494
- "grad_norm": 8.063167572021484,
495
- "learning_rate": 1.750681198910082e-05,
496
- "loss": 1.0381,
497
  "step": 560
498
  },
499
  {
500
- "epoch": 8.351648351648352,
501
- "grad_norm": 7.877150535583496,
502
- "learning_rate": 1.6825613079019073e-05,
503
- "loss": 0.9996,
504
  "step": 570
505
  },
506
  {
507
- "epoch": 8.498168498168498,
508
- "grad_norm": 10.49506664276123,
509
- "learning_rate": 1.614441416893733e-05,
510
- "loss": 0.9723,
511
  "step": 580
512
  },
513
  {
514
- "epoch": 8.644688644688644,
515
- "grad_norm": 8.74528694152832,
516
- "learning_rate": 1.546321525885559e-05,
517
- "loss": 1.0267,
518
  "step": 590
519
  },
520
  {
521
- "epoch": 8.791208791208792,
522
- "grad_norm": 7.0554962158203125,
523
- "learning_rate": 1.4782016348773841e-05,
524
- "loss": 1.0251,
525
  "step": 600
526
  },
527
  {
528
- "epoch": 8.937728937728938,
529
- "grad_norm": 7.974668502807617,
530
- "learning_rate": 1.41008174386921e-05,
531
- "loss": 1.0059,
532
  "step": 610
533
  },
534
  {
535
- "epoch": 8.996336996336996,
536
- "eval_accuracy": 0.5932047750229569,
537
- "eval_f1": 0.5674291939014156,
538
- "eval_loss": 0.9468401074409485,
539
- "eval_precision": 0.5709384462011384,
540
- "eval_recall": 0.5932047750229569,
541
- "eval_runtime": 10.4878,
542
- "eval_samples_per_second": 103.835,
543
- "eval_steps_per_second": 6.579,
544
- "step": 614
545
- },
546
- {
547
- "epoch": 9.084249084249084,
548
- "grad_norm": 8.571954727172852,
549
- "learning_rate": 1.3419618528610356e-05,
550
- "loss": 0.9878,
551
  "step": 620
552
  },
553
  {
554
- "epoch": 9.23076923076923,
555
- "grad_norm": 8.298359870910645,
556
- "learning_rate": 1.273841961852861e-05,
557
- "loss": 1.0056,
 
 
 
 
 
 
 
 
 
 
 
 
558
  "step": 630
559
  },
560
  {
561
- "epoch": 9.377289377289378,
562
- "grad_norm": 7.087368965148926,
563
- "learning_rate": 1.2057220708446868e-05,
564
- "loss": 1.037,
565
  "step": 640
566
  },
567
  {
568
- "epoch": 9.523809523809524,
569
- "grad_norm": 7.409104347229004,
570
- "learning_rate": 1.1376021798365123e-05,
571
- "loss": 0.9326,
572
  "step": 650
573
  },
574
  {
575
- "epoch": 9.67032967032967,
576
- "grad_norm": 7.1452531814575195,
577
- "learning_rate": 1.0694822888283379e-05,
578
- "loss": 0.9723,
579
  "step": 660
580
  },
581
  {
582
- "epoch": 9.816849816849818,
583
- "grad_norm": 8.340729713439941,
584
- "learning_rate": 1.0013623978201635e-05,
585
- "loss": 0.9914,
586
  "step": 670
587
  },
588
  {
589
- "epoch": 9.963369963369964,
590
- "grad_norm": 10.698880195617676,
591
- "learning_rate": 9.332425068119891e-06,
592
- "loss": 0.965,
593
  "step": 680
594
  },
595
  {
596
- "epoch": 9.992673992673993,
597
- "eval_accuracy": 0.5932047750229569,
598
- "eval_f1": 0.5804462637419235,
599
- "eval_loss": 0.9565483331680298,
600
- "eval_precision": 0.5857538351608745,
601
- "eval_recall": 0.5932047750229569,
602
- "eval_runtime": 10.4791,
603
- "eval_samples_per_second": 103.921,
604
- "eval_steps_per_second": 6.585,
605
- "step": 682
606
- },
607
- {
608
- "epoch": 10.10989010989011,
609
- "grad_norm": 10.67618465423584,
610
- "learning_rate": 8.651226158038147e-06,
611
- "loss": 0.9732,
612
  "step": 690
613
  },
614
  {
615
- "epoch": 10.256410256410255,
616
- "grad_norm": 9.102426528930664,
617
- "learning_rate": 7.970027247956404e-06,
618
- "loss": 0.9804,
619
  "step": 700
620
  },
621
  {
622
- "epoch": 10.402930402930403,
623
- "grad_norm": 11.115556716918945,
624
- "learning_rate": 7.288828337874659e-06,
625
- "loss": 0.9855,
 
 
 
 
 
 
 
 
 
 
 
 
626
  "step": 710
627
  },
628
  {
629
- "epoch": 10.54945054945055,
630
- "grad_norm": 11.00426197052002,
631
- "learning_rate": 6.607629427792916e-06,
632
- "loss": 0.9691,
633
  "step": 720
634
  },
635
  {
636
- "epoch": 10.695970695970695,
637
- "grad_norm": 10.043339729309082,
638
- "learning_rate": 5.9264305177111724e-06,
639
- "loss": 0.9365,
640
  "step": 730
641
  },
642
  {
643
- "epoch": 10.842490842490843,
644
- "grad_norm": 11.518232345581055,
645
- "learning_rate": 5.245231607629428e-06,
646
- "loss": 0.9659,
647
  "step": 740
648
  },
649
  {
650
- "epoch": 10.989010989010989,
651
- "grad_norm": 9.314995765686035,
652
- "learning_rate": 4.564032697547684e-06,
653
- "loss": 0.9362,
654
- "step": 750
655
- },
656
- {
657
- "epoch": 10.989010989010989,
658
- "eval_accuracy": 0.588613406795225,
659
- "eval_f1": 0.5778239696801104,
660
- "eval_loss": 0.9466218948364258,
661
- "eval_precision": 0.5767647827506791,
662
- "eval_recall": 0.588613406795225,
663
- "eval_runtime": 10.5258,
664
- "eval_samples_per_second": 103.46,
665
- "eval_steps_per_second": 6.555,
666
  "step": 750
667
  },
668
  {
669
- "epoch": 11.135531135531135,
670
- "grad_norm": 10.667020797729492,
671
- "learning_rate": 3.88283378746594e-06,
672
- "loss": 0.9564,
673
  "step": 760
674
  },
675
  {
676
- "epoch": 11.282051282051283,
677
- "grad_norm": 10.923125267028809,
678
- "learning_rate": 3.2016348773841965e-06,
679
- "loss": 0.9177,
680
  "step": 770
681
  },
682
  {
683
- "epoch": 11.428571428571429,
684
- "grad_norm": 9.893692016601562,
685
- "learning_rate": 2.5204359673024523e-06,
686
- "loss": 0.9732,
687
  "step": 780
688
  },
689
  {
690
- "epoch": 11.575091575091575,
691
- "grad_norm": 8.784825325012207,
692
- "learning_rate": 1.8392370572207086e-06,
693
- "loss": 0.9558,
 
 
 
 
 
 
 
 
 
 
 
 
694
  "step": 790
695
  },
696
  {
697
- "epoch": 11.72161172161172,
698
- "grad_norm": 8.990778923034668,
699
- "learning_rate": 1.1580381471389646e-06,
700
- "loss": 0.9656,
701
  "step": 800
702
  },
703
  {
704
- "epoch": 11.868131868131869,
705
- "grad_norm": 8.444221496582031,
706
- "learning_rate": 4.768392370572207e-07,
707
- "loss": 0.9334,
708
  "step": 810
709
  },
710
  {
711
- "epoch": 11.956043956043956,
712
- "eval_accuracy": 0.5858585858585859,
713
- "eval_f1": 0.5699572730591373,
714
- "eval_loss": 0.9441593885421753,
715
- "eval_precision": 0.5692451226403554,
716
- "eval_recall": 0.5858585858585859,
717
- "eval_runtime": 10.5926,
718
- "eval_samples_per_second": 102.808,
719
- "eval_steps_per_second": 6.514,
720
- "step": 816
721
- },
722
- {
723
- "epoch": 11.956043956043956,
724
- "step": 816,
725
- "total_flos": 4.123334853511373e+18,
726
- "train_loss": 1.090085435147379,
727
- "train_runtime": 2115.1144,
728
- "train_samples_per_second": 24.697,
729
- "train_steps_per_second": 0.386
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  ],
732
  "logging_steps": 10,
733
- "max_steps": 816,
734
  "num_input_tokens_seen": 0,
735
  "num_train_epochs": 12,
736
  "save_steps": 500,
@@ -746,7 +830,7 @@
746
  "attributes": {}
747
  }
748
  },
749
- "total_flos": 4.123334853511373e+18,
750
  "train_batch_size": 16,
751
  "trial_name": null,
752
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6137931034482759,
3
+ "best_model_checkpoint": "convnextv2-base-22k-224-finetuned-tekno24/checkpoint-860",
4
+ "epoch": 11.961661341853034,
5
  "eval_steps": 500,
6
+ "global_step": 936,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.12779552715654952,
13
+ "grad_norm": 11.641097068786621,
14
+ "learning_rate": 5.319148936170213e-06,
15
+ "loss": 1.4077,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.25559105431309903,
20
+ "grad_norm": 10.328614234924316,
21
+ "learning_rate": 1.0638297872340426e-05,
22
+ "loss": 1.3907,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.38338658146964855,
27
+ "grad_norm": 11.496760368347168,
28
+ "learning_rate": 1.595744680851064e-05,
29
+ "loss": 1.3597,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.5111821086261981,
34
+ "grad_norm": 9.975175857543945,
35
+ "learning_rate": 2.1276595744680852e-05,
36
+ "loss": 1.354,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.6389776357827476,
41
+ "grad_norm": 11.381126403808594,
42
+ "learning_rate": 2.6595744680851064e-05,
43
+ "loss": 1.3266,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.7667731629392971,
48
+ "grad_norm": 9.771955490112305,
49
+ "learning_rate": 3.191489361702128e-05,
50
+ "loss": 1.291,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 0.8945686900958466,
55
+ "grad_norm": 21.0851993560791,
56
+ "learning_rate": 3.617021276595745e-05,
57
+ "loss": 1.3179,
58
+ "step": 70
 
 
 
 
 
59
  },
60
  {
61
+ "epoch": 0.9968051118210862,
62
+ "eval_accuracy": 0.4206896551724138,
63
+ "eval_f1": 0.39792953648489043,
64
+ "eval_loss": 1.2415151596069336,
65
+ "eval_precision": 0.4642161976696202,
66
+ "eval_recall": 0.4206896551724138,
67
+ "eval_runtime": 4.2118,
68
+ "eval_samples_per_second": 103.281,
69
+ "eval_steps_per_second": 6.648,
70
+ "step": 78
71
  },
72
  {
73
+ "epoch": 1.0223642172523961,
74
+ "grad_norm": 19.948223114013672,
75
+ "learning_rate": 4.148936170212766e-05,
76
+ "loss": 1.2668,
77
  "step": 80
78
  },
79
  {
80
+ "epoch": 1.1501597444089458,
81
+ "grad_norm": 11.563603401184082,
82
+ "learning_rate": 4.680851063829788e-05,
83
+ "loss": 1.2202,
84
  "step": 90
85
  },
86
  {
87
+ "epoch": 1.2779552715654952,
88
+ "grad_norm": 9.605425834655762,
89
+ "learning_rate": 4.97624703087886e-05,
90
+ "loss": 1.2626,
91
  "step": 100
92
  },
93
  {
94
+ "epoch": 1.4057507987220448,
95
+ "grad_norm": 10.846478462219238,
96
+ "learning_rate": 4.9168646080760093e-05,
97
+ "loss": 1.251,
98
  "step": 110
99
  },
100
  {
101
+ "epoch": 1.5335463258785942,
102
+ "grad_norm": 10.546998023986816,
103
+ "learning_rate": 4.8574821852731594e-05,
104
+ "loss": 1.2136,
105
  "step": 120
106
  },
107
  {
108
+ "epoch": 1.6613418530351438,
109
+ "grad_norm": 10.678705215454102,
110
+ "learning_rate": 4.798099762470309e-05,
111
+ "loss": 1.1764,
112
  "step": 130
113
  },
114
  {
115
+ "epoch": 1.7891373801916934,
116
+ "grad_norm": 10.283668518066406,
117
+ "learning_rate": 4.738717339667459e-05,
118
+ "loss": 1.2624,
 
 
 
 
 
 
 
 
 
 
 
 
119
  "step": 140
120
  },
121
  {
122
+ "epoch": 1.9169329073482428,
123
+ "grad_norm": 10.07646656036377,
124
+ "learning_rate": 4.679334916864608e-05,
125
+ "loss": 1.1998,
126
  "step": 150
127
  },
128
  {
129
+ "epoch": 1.9936102236421727,
130
+ "eval_accuracy": 0.5103448275862069,
131
+ "eval_f1": 0.4525112700085999,
132
+ "eval_loss": 1.0768730640411377,
133
+ "eval_precision": 0.5309344450319118,
134
+ "eval_recall": 0.5103448275862069,
135
+ "eval_runtime": 4.2293,
136
+ "eval_samples_per_second": 102.853,
137
+ "eval_steps_per_second": 6.62,
138
+ "step": 156
139
+ },
140
+ {
141
+ "epoch": 2.0447284345047922,
142
+ "grad_norm": 10.981221199035645,
143
+ "learning_rate": 4.6199524940617575e-05,
144
+ "loss": 1.1287,
145
  "step": 160
146
  },
147
  {
148
+ "epoch": 2.1725239616613417,
149
+ "grad_norm": 6.460533142089844,
150
+ "learning_rate": 4.5605700712589075e-05,
151
+ "loss": 1.1806,
152
  "step": 170
153
  },
154
  {
155
+ "epoch": 2.3003194888178915,
156
+ "grad_norm": 7.450798511505127,
157
+ "learning_rate": 4.501187648456057e-05,
158
+ "loss": 1.1834,
159
  "step": 180
160
  },
161
  {
162
+ "epoch": 2.428115015974441,
163
+ "grad_norm": 5.972067832946777,
164
+ "learning_rate": 4.441805225653207e-05,
165
+ "loss": 1.1639,
166
  "step": 190
167
  },
168
  {
169
+ "epoch": 2.5559105431309903,
170
+ "grad_norm": 10.565897941589355,
171
+ "learning_rate": 4.382422802850357e-05,
172
+ "loss": 1.143,
173
  "step": 200
174
  },
175
  {
176
+ "epoch": 2.68370607028754,
177
+ "grad_norm": 11.751137733459473,
178
+ "learning_rate": 4.323040380047506e-05,
179
+ "loss": 1.1911,
 
 
 
 
 
 
 
 
 
 
 
 
180
  "step": 210
181
  },
182
  {
183
+ "epoch": 2.8115015974440896,
184
+ "grad_norm": 8.518805503845215,
185
+ "learning_rate": 4.263657957244656e-05,
186
+ "loss": 1.169,
187
  "step": 220
188
  },
189
  {
190
+ "epoch": 2.939297124600639,
191
+ "grad_norm": 7.631802558898926,
192
+ "learning_rate": 4.204275534441806e-05,
193
+ "loss": 1.168,
194
  "step": 230
195
  },
196
  {
197
+ "epoch": 2.9904153354632586,
198
+ "eval_accuracy": 0.5494252873563218,
199
+ "eval_f1": 0.5033023418313557,
200
+ "eval_loss": 1.0573328733444214,
201
+ "eval_precision": 0.5604674717576669,
202
+ "eval_recall": 0.5494252873563218,
203
+ "eval_runtime": 4.2015,
204
+ "eval_samples_per_second": 103.535,
205
+ "eval_steps_per_second": 6.664,
206
+ "step": 234
207
+ },
208
+ {
209
+ "epoch": 3.0670926517571884,
210
+ "grad_norm": 7.9839911460876465,
211
+ "learning_rate": 4.144893111638955e-05,
212
+ "loss": 1.1172,
213
  "step": 240
214
  },
215
  {
216
+ "epoch": 3.194888178913738,
217
+ "grad_norm": 8.082262992858887,
218
+ "learning_rate": 4.0855106888361044e-05,
219
+ "loss": 1.1659,
220
  "step": 250
221
  },
222
  {
223
+ "epoch": 3.3226837060702876,
224
+ "grad_norm": 5.319189548492432,
225
+ "learning_rate": 4.0261282660332545e-05,
226
+ "loss": 1.1493,
227
  "step": 260
228
  },
229
  {
230
+ "epoch": 3.450479233226837,
231
+ "grad_norm": 8.54591178894043,
232
+ "learning_rate": 3.966745843230404e-05,
233
+ "loss": 1.1158,
234
  "step": 270
235
  },
236
  {
237
+ "epoch": 3.5782747603833864,
238
+ "grad_norm": 9.336274147033691,
239
+ "learning_rate": 3.907363420427554e-05,
240
+ "loss": 1.1638,
 
 
 
 
 
 
 
 
 
 
 
 
241
  "step": 280
242
  },
243
  {
244
+ "epoch": 3.7060702875399363,
245
+ "grad_norm": 8.625086784362793,
246
+ "learning_rate": 3.847980997624703e-05,
247
+ "loss": 1.0932,
248
  "step": 290
249
  },
250
  {
251
+ "epoch": 3.8338658146964857,
252
+ "grad_norm": 9.026288032531738,
253
+ "learning_rate": 3.7885985748218526e-05,
254
+ "loss": 1.1056,
255
  "step": 300
256
  },
257
  {
258
+ "epoch": 3.961661341853035,
259
+ "grad_norm": 10.40346622467041,
260
+ "learning_rate": 3.7292161520190026e-05,
261
+ "loss": 1.1107,
262
  "step": 310
263
  },
264
  {
265
+ "epoch": 4.0,
266
+ "eval_accuracy": 0.5540229885057472,
267
+ "eval_f1": 0.5163004360032211,
268
+ "eval_loss": 0.9923611879348755,
269
+ "eval_precision": 0.525717492734745,
270
+ "eval_recall": 0.5540229885057472,
271
+ "eval_runtime": 4.1795,
272
+ "eval_samples_per_second": 104.079,
273
+ "eval_steps_per_second": 6.699,
274
+ "step": 313
275
+ },
276
+ {
277
+ "epoch": 4.0894568690095845,
278
+ "grad_norm": 6.684942722320557,
279
+ "learning_rate": 3.669833729216152e-05,
280
+ "loss": 1.1252,
281
  "step": 320
282
  },
283
  {
284
+ "epoch": 4.217252396166134,
285
+ "grad_norm": 10.904282569885254,
286
+ "learning_rate": 3.6104513064133013e-05,
287
+ "loss": 1.1098,
288
  "step": 330
289
  },
290
  {
291
+ "epoch": 4.345047923322683,
292
+ "grad_norm": 9.764264106750488,
293
+ "learning_rate": 3.5510688836104514e-05,
294
  "loss": 1.096,
295
  "step": 340
296
  },
297
  {
298
+ "epoch": 4.472843450479234,
299
+ "grad_norm": 8.857853889465332,
300
+ "learning_rate": 3.4916864608076014e-05,
301
+ "loss": 1.0528,
 
 
 
 
 
 
 
 
 
 
 
 
302
  "step": 350
303
  },
304
  {
305
+ "epoch": 4.600638977635783,
306
+ "grad_norm": 7.500421524047852,
307
+ "learning_rate": 3.432304038004751e-05,
308
+ "loss": 1.1455,
309
  "step": 360
310
  },
311
  {
312
+ "epoch": 4.728434504792332,
313
+ "grad_norm": 9.362208366394043,
314
+ "learning_rate": 3.372921615201901e-05,
315
+ "loss": 1.0993,
316
  "step": 370
317
  },
318
  {
319
+ "epoch": 4.856230031948882,
320
+ "grad_norm": 9.38844108581543,
321
+ "learning_rate": 3.31353919239905e-05,
322
+ "loss": 1.0855,
323
  "step": 380
324
  },
325
  {
326
+ "epoch": 4.984025559105431,
327
+ "grad_norm": 6.451258659362793,
328
+ "learning_rate": 3.2541567695961995e-05,
329
+ "loss": 1.1062,
330
  "step": 390
331
  },
332
  {
333
+ "epoch": 4.996805111821086,
334
+ "eval_accuracy": 0.5747126436781609,
335
+ "eval_f1": 0.5507051314754062,
336
+ "eval_loss": 1.0018237829208374,
337
+ "eval_precision": 0.5659922189344513,
338
+ "eval_recall": 0.5747126436781609,
339
+ "eval_runtime": 4.1822,
340
+ "eval_samples_per_second": 104.012,
341
+ "eval_steps_per_second": 6.695,
342
+ "step": 391
343
  },
344
  {
345
+ "epoch": 5.111821086261981,
346
+ "grad_norm": 8.044933319091797,
347
+ "learning_rate": 3.1947743467933496e-05,
348
+ "loss": 1.0556,
349
+ "step": 400
 
 
 
 
 
350
  },
351
  {
352
+ "epoch": 5.23961661341853,
353
+ "grad_norm": 9.200007438659668,
354
+ "learning_rate": 3.135391923990499e-05,
355
+ "loss": 1.056,
356
  "step": 410
357
  },
358
  {
359
+ "epoch": 5.36741214057508,
360
+ "grad_norm": 9.929868698120117,
361
+ "learning_rate": 3.076009501187649e-05,
362
+ "loss": 1.0098,
363
  "step": 420
364
  },
365
  {
366
+ "epoch": 5.49520766773163,
367
+ "grad_norm": 7.43991231918335,
368
+ "learning_rate": 3.0166270783847983e-05,
369
+ "loss": 1.1396,
370
  "step": 430
371
  },
372
  {
373
+ "epoch": 5.623003194888179,
374
+ "grad_norm": 7.019424915313721,
375
+ "learning_rate": 2.9572446555819477e-05,
376
+ "loss": 1.0659,
377
  "step": 440
378
  },
379
  {
380
+ "epoch": 5.7507987220447285,
381
+ "grad_norm": 7.227243900299072,
382
+ "learning_rate": 2.8978622327790977e-05,
383
+ "loss": 1.0453,
384
  "step": 450
385
  },
386
  {
387
+ "epoch": 5.878594249201278,
388
+ "grad_norm": 9.71042537689209,
389
+ "learning_rate": 2.838479809976247e-05,
390
+ "loss": 1.0331,
391
  "step": 460
392
  },
393
  {
394
+ "epoch": 5.993610223642173,
395
+ "eval_accuracy": 0.593103448275862,
396
+ "eval_f1": 0.5768496494171164,
397
+ "eval_loss": 0.9901189208030701,
398
+ "eval_precision": 0.6202225107325855,
399
+ "eval_recall": 0.593103448275862,
400
+ "eval_runtime": 4.1789,
401
+ "eval_samples_per_second": 104.095,
402
+ "eval_steps_per_second": 6.7,
403
+ "step": 469
404
  },
405
  {
406
+ "epoch": 6.006389776357827,
407
+ "grad_norm": 6.03436279296875,
408
+ "learning_rate": 2.7790973871733968e-05,
409
+ "loss": 1.135,
410
+ "step": 470
 
 
 
 
 
411
  },
412
  {
413
+ "epoch": 6.134185303514377,
414
+ "grad_norm": 7.317747592926025,
415
+ "learning_rate": 2.7197149643705465e-05,
416
+ "loss": 1.0316,
417
  "step": 480
418
  },
419
  {
420
+ "epoch": 6.261980830670926,
421
+ "grad_norm": 7.093214511871338,
422
+ "learning_rate": 2.6603325415676962e-05,
423
+ "loss": 1.0445,
424
  "step": 490
425
  },
426
  {
427
+ "epoch": 6.389776357827476,
428
+ "grad_norm": 8.44990348815918,
429
+ "learning_rate": 2.6009501187648455e-05,
430
+ "loss": 1.064,
431
  "step": 500
432
  },
433
  {
434
+ "epoch": 6.517571884984026,
435
+ "grad_norm": 8.586387634277344,
436
+ "learning_rate": 2.5415676959619956e-05,
437
+ "loss": 1.0428,
438
  "step": 510
439
  },
440
  {
441
+ "epoch": 6.645367412140575,
442
+ "grad_norm": 8.286543846130371,
443
+ "learning_rate": 2.482185273159145e-05,
444
+ "loss": 1.0424,
445
  "step": 520
446
  },
447
  {
448
+ "epoch": 6.773162939297125,
449
+ "grad_norm": 11.05722427368164,
450
+ "learning_rate": 2.4228028503562946e-05,
451
+ "loss": 1.0486,
452
  "step": 530
453
  },
454
  {
455
+ "epoch": 6.900958466453674,
456
+ "grad_norm": 11.715845108032227,
457
+ "learning_rate": 2.3634204275534443e-05,
458
+ "loss": 1.0409,
459
  "step": 540
460
  },
461
  {
462
+ "epoch": 6.9904153354632586,
463
+ "eval_accuracy": 0.5747126436781609,
464
+ "eval_f1": 0.5723268617274374,
465
+ "eval_loss": 0.9633908867835999,
466
+ "eval_precision": 0.5722167544506434,
467
+ "eval_recall": 0.5747126436781609,
468
+ "eval_runtime": 4.2036,
469
+ "eval_samples_per_second": 103.482,
470
+ "eval_steps_per_second": 6.661,
471
+ "step": 547
472
+ },
473
+ {
474
+ "epoch": 7.0287539936102235,
475
+ "grad_norm": 9.900256156921387,
476
+ "learning_rate": 2.3040380047505937e-05,
477
+ "loss": 1.021,
478
  "step": 550
479
  },
480
  {
481
+ "epoch": 7.156549520766773,
482
+ "grad_norm": 11.22358226776123,
483
+ "learning_rate": 2.2446555819477437e-05,
484
+ "loss": 1.0387,
485
  "step": 560
486
  },
487
  {
488
+ "epoch": 7.284345047923322,
489
+ "grad_norm": 6.466274738311768,
490
+ "learning_rate": 2.1852731591448934e-05,
491
+ "loss": 1.0166,
492
  "step": 570
493
  },
494
  {
495
+ "epoch": 7.412140575079873,
496
+ "grad_norm": 9.685991287231445,
497
+ "learning_rate": 2.1258907363420428e-05,
498
+ "loss": 1.0459,
499
  "step": 580
500
  },
501
  {
502
+ "epoch": 7.539936102236422,
503
+ "grad_norm": 9.211965560913086,
504
+ "learning_rate": 2.0665083135391925e-05,
505
+ "loss": 0.9852,
506
  "step": 590
507
  },
508
  {
509
+ "epoch": 7.667731629392971,
510
+ "grad_norm": 9.930057525634766,
511
+ "learning_rate": 2.0071258907363422e-05,
512
+ "loss": 1.0176,
513
  "step": 600
514
  },
515
  {
516
+ "epoch": 7.795527156549521,
517
+ "grad_norm": 12.529800415039062,
518
+ "learning_rate": 1.947743467933492e-05,
519
+ "loss": 1.0465,
520
  "step": 610
521
  },
522
  {
523
+ "epoch": 7.92332268370607,
524
+ "grad_norm": 9.018549919128418,
525
+ "learning_rate": 1.8883610451306412e-05,
526
+ "loss": 1.0176,
 
 
 
 
 
 
 
 
 
 
 
 
527
  "step": 620
528
  },
529
  {
530
+ "epoch": 8.0,
531
+ "eval_accuracy": 0.593103448275862,
532
+ "eval_f1": 0.5833555266825051,
533
+ "eval_loss": 0.9504066705703735,
534
+ "eval_precision": 0.5813528993323179,
535
+ "eval_recall": 0.593103448275862,
536
+ "eval_runtime": 4.1774,
537
+ "eval_samples_per_second": 104.131,
538
+ "eval_steps_per_second": 6.703,
539
+ "step": 626
540
+ },
541
+ {
542
+ "epoch": 8.05111821086262,
543
+ "grad_norm": 8.328828811645508,
544
+ "learning_rate": 1.828978622327791e-05,
545
+ "loss": 0.9516,
546
  "step": 630
547
  },
548
  {
549
+ "epoch": 8.178913738019169,
550
+ "grad_norm": 7.750000476837158,
551
+ "learning_rate": 1.7695961995249406e-05,
552
+ "loss": 0.972,
553
  "step": 640
554
  },
555
  {
556
+ "epoch": 8.30670926517572,
557
+ "grad_norm": 6.95557165145874,
558
+ "learning_rate": 1.7102137767220903e-05,
559
+ "loss": 0.9842,
560
  "step": 650
561
  },
562
  {
563
+ "epoch": 8.434504792332268,
564
+ "grad_norm": 8.893524169921875,
565
+ "learning_rate": 1.65083135391924e-05,
566
+ "loss": 1.02,
567
  "step": 660
568
  },
569
  {
570
+ "epoch": 8.562300319488818,
571
+ "grad_norm": 9.984440803527832,
572
+ "learning_rate": 1.5914489311163897e-05,
573
+ "loss": 0.97,
574
  "step": 670
575
  },
576
  {
577
+ "epoch": 8.690095846645367,
578
+ "grad_norm": 8.314949989318848,
579
+ "learning_rate": 1.5320665083135394e-05,
580
+ "loss": 1.0366,
581
  "step": 680
582
  },
583
  {
584
+ "epoch": 8.817891373801917,
585
+ "grad_norm": 9.685540199279785,
586
+ "learning_rate": 1.4726840855106888e-05,
587
+ "loss": 1.0284,
 
 
 
 
 
 
 
 
 
 
 
 
588
  "step": 690
589
  },
590
  {
591
+ "epoch": 8.945686900958467,
592
+ "grad_norm": 10.43076229095459,
593
+ "learning_rate": 1.4133016627078385e-05,
594
+ "loss": 0.995,
595
  "step": 700
596
  },
597
  {
598
+ "epoch": 8.996805111821086,
599
+ "eval_accuracy": 0.5908045977011495,
600
+ "eval_f1": 0.5853582114263199,
601
+ "eval_loss": 0.9584209322929382,
602
+ "eval_precision": 0.5853113777126823,
603
+ "eval_recall": 0.5908045977011495,
604
+ "eval_runtime": 4.2105,
605
+ "eval_samples_per_second": 103.314,
606
+ "eval_steps_per_second": 6.65,
607
+ "step": 704
608
+ },
609
+ {
610
+ "epoch": 9.073482428115016,
611
+ "grad_norm": 8.847938537597656,
612
+ "learning_rate": 1.3539192399049882e-05,
613
+ "loss": 0.9524,
614
  "step": 710
615
  },
616
  {
617
+ "epoch": 9.201277955271566,
618
+ "grad_norm": 8.750248908996582,
619
+ "learning_rate": 1.2945368171021377e-05,
620
+ "loss": 0.9846,
621
  "step": 720
622
  },
623
  {
624
+ "epoch": 9.329073482428115,
625
+ "grad_norm": 9.161256790161133,
626
+ "learning_rate": 1.2351543942992874e-05,
627
+ "loss": 0.9751,
628
  "step": 730
629
  },
630
  {
631
+ "epoch": 9.456869009584665,
632
+ "grad_norm": 9.544412612915039,
633
+ "learning_rate": 1.1757719714964371e-05,
634
+ "loss": 0.9417,
635
  "step": 740
636
  },
637
  {
638
+ "epoch": 9.584664536741213,
639
+ "grad_norm": 9.703606605529785,
640
+ "learning_rate": 1.1163895486935868e-05,
641
+ "loss": 0.9707,
 
 
 
 
 
 
 
 
 
 
 
 
642
  "step": 750
643
  },
644
  {
645
+ "epoch": 9.712460063897764,
646
+ "grad_norm": 9.677326202392578,
647
+ "learning_rate": 1.0570071258907365e-05,
648
+ "loss": 1.0009,
649
  "step": 760
650
  },
651
  {
652
+ "epoch": 9.840255591054314,
653
+ "grad_norm": 8.860432624816895,
654
+ "learning_rate": 9.97624703087886e-06,
655
+ "loss": 0.9817,
656
  "step": 770
657
  },
658
  {
659
+ "epoch": 9.968051118210862,
660
+ "grad_norm": 8.743229866027832,
661
+ "learning_rate": 9.382422802850356e-06,
662
+ "loss": 0.9937,
663
  "step": 780
664
  },
665
  {
666
+ "epoch": 9.993610223642172,
667
+ "eval_accuracy": 0.6022988505747127,
668
+ "eval_f1": 0.5934331301305344,
669
+ "eval_loss": 0.9338871240615845,
670
+ "eval_precision": 0.5893665058889589,
671
+ "eval_recall": 0.6022988505747127,
672
+ "eval_runtime": 4.2268,
673
+ "eval_samples_per_second": 102.915,
674
+ "eval_steps_per_second": 6.624,
675
+ "step": 782
676
+ },
677
+ {
678
+ "epoch": 10.095846645367413,
679
+ "grad_norm": 7.1755900382995605,
680
+ "learning_rate": 8.788598574821852e-06,
681
+ "loss": 0.9908,
682
  "step": 790
683
  },
684
  {
685
+ "epoch": 10.223642172523961,
686
+ "grad_norm": 8.917673110961914,
687
+ "learning_rate": 8.19477434679335e-06,
688
+ "loss": 0.9361,
689
  "step": 800
690
  },
691
  {
692
+ "epoch": 10.351437699680512,
693
+ "grad_norm": 15.518793106079102,
694
+ "learning_rate": 7.6009501187648464e-06,
695
+ "loss": 0.9495,
696
  "step": 810
697
  },
698
  {
699
+ "epoch": 10.47923322683706,
700
+ "grad_norm": 12.074665069580078,
701
+ "learning_rate": 7.007125890736342e-06,
702
+ "loss": 0.9689,
703
+ "step": 820
704
+ },
705
+ {
706
+ "epoch": 10.60702875399361,
707
+ "grad_norm": 9.314558982849121,
708
+ "learning_rate": 6.4133016627078396e-06,
709
+ "loss": 0.983,
710
+ "step": 830
711
+ },
712
+ {
713
+ "epoch": 10.73482428115016,
714
+ "grad_norm": 9.714004516601562,
715
+ "learning_rate": 5.819477434679335e-06,
716
+ "loss": 1.0,
717
+ "step": 840
718
+ },
719
+ {
720
+ "epoch": 10.86261980830671,
721
+ "grad_norm": 9.66527271270752,
722
+ "learning_rate": 5.225653206650832e-06,
723
+ "loss": 0.9036,
724
+ "step": 850
725
+ },
726
+ {
727
+ "epoch": 10.99041533546326,
728
+ "grad_norm": 11.544416427612305,
729
+ "learning_rate": 4.631828978622328e-06,
730
+ "loss": 0.9387,
731
+ "step": 860
732
+ },
733
+ {
734
+ "epoch": 10.99041533546326,
735
+ "eval_accuracy": 0.6137931034482759,
736
+ "eval_f1": 0.5996477650763593,
737
+ "eval_loss": 0.9119637608528137,
738
+ "eval_precision": 0.596855024118962,
739
+ "eval_recall": 0.6137931034482759,
740
+ "eval_runtime": 4.2091,
741
+ "eval_samples_per_second": 103.347,
742
+ "eval_steps_per_second": 6.652,
743
+ "step": 860
744
+ },
745
+ {
746
+ "epoch": 11.118210862619808,
747
+ "grad_norm": 9.786779403686523,
748
+ "learning_rate": 4.038004750593825e-06,
749
+ "loss": 0.911,
750
+ "step": 870
751
+ },
752
+ {
753
+ "epoch": 11.246006389776358,
754
+ "grad_norm": 10.40623664855957,
755
+ "learning_rate": 3.4441805225653207e-06,
756
+ "loss": 0.9683,
757
+ "step": 880
758
+ },
759
+ {
760
+ "epoch": 11.373801916932907,
761
+ "grad_norm": 10.116272926330566,
762
+ "learning_rate": 2.850356294536817e-06,
763
+ "loss": 0.9074,
764
+ "step": 890
765
+ },
766
+ {
767
+ "epoch": 11.501597444089457,
768
+ "grad_norm": 9.423429489135742,
769
+ "learning_rate": 2.2565320665083133e-06,
770
+ "loss": 0.908,
771
+ "step": 900
772
+ },
773
+ {
774
+ "epoch": 11.629392971246006,
775
+ "grad_norm": 11.146402359008789,
776
+ "learning_rate": 1.6627078384798101e-06,
777
+ "loss": 0.9744,
778
+ "step": 910
779
+ },
780
+ {
781
+ "epoch": 11.757188498402556,
782
+ "grad_norm": 11.125927925109863,
783
+ "learning_rate": 1.0688836104513065e-06,
784
+ "loss": 0.9541,
785
+ "step": 920
786
+ },
787
+ {
788
+ "epoch": 11.884984025559106,
789
+ "grad_norm": 10.135693550109863,
790
+ "learning_rate": 4.750593824228029e-07,
791
+ "loss": 0.9324,
792
+ "step": 930
793
+ },
794
+ {
795
+ "epoch": 11.961661341853034,
796
+ "eval_accuracy": 0.5954022988505747,
797
+ "eval_f1": 0.5878627034099811,
798
+ "eval_loss": 0.9134895205497742,
799
+ "eval_precision": 0.5865253155328708,
800
+ "eval_recall": 0.5954022988505747,
801
+ "eval_runtime": 4.3073,
802
+ "eval_samples_per_second": 100.992,
803
+ "eval_steps_per_second": 6.501,
804
+ "step": 936
805
+ },
806
+ {
807
+ "epoch": 11.961661341853034,
808
+ "step": 936,
809
+ "total_flos": 4.743827088137626e+18,
810
+ "train_loss": 1.079931161342523,
811
+ "train_runtime": 2308.5403,
812
+ "train_samples_per_second": 26.027,
813
+ "train_steps_per_second": 0.405
814
  }
815
  ],
816
  "logging_steps": 10,
817
+ "max_steps": 936,
818
  "num_input_tokens_seen": 0,
819
  "num_train_epochs": 12,
820
  "save_steps": 500,
 
830
  "attributes": {}
831
  }
832
  },
833
+ "total_flos": 4.743827088137626e+18,
834
  "train_batch_size": 16,
835
  "trial_name": null,
836
  "trial_params": null