BTX24 commited on
Commit
9654e48
·
verified ·
1 Parent(s): 42c42ff

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.496124031007753,
3
+ "eval_accuracy": 0.9488919041157847,
4
+ "eval_f1": 0.9483693001512903,
5
+ "eval_loss": 0.15268893539905548,
6
+ "eval_precision": 0.9504887785210339,
7
+ "eval_recall": 0.9488919041157847,
8
+ "eval_runtime": 8.6365,
9
+ "eval_samples_per_second": 256.007,
10
+ "eval_steps_per_second": 32.073,
11
+ "total_flos": 5.998049534242161e+18,
12
+ "train_loss": 0.14669023766237146,
13
+ "train_runtime": 774.0157,
14
+ "train_samples_per_second": 319.869,
15
+ "train_steps_per_second": 9.984
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.496124031007753,
3
+ "eval_accuracy": 0.9488919041157847,
4
+ "eval_f1": 0.9483693001512903,
5
+ "eval_loss": 0.15268893539905548,
6
+ "eval_precision": 0.9504887785210339,
7
+ "eval_recall": 0.9488919041157847,
8
+ "eval_runtime": 8.6365,
9
+ "eval_samples_per_second": 256.007,
10
+ "eval_steps_per_second": 32.073
11
+ }
runs/Mar16_19-32-23_774ccf98b3fe/events.out.tfevents.1742158382.774ccf98b3fe.5066.29 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e7f51520db145abdf9c1a6d5fc28a1eda7f709213a10bf83aaac42a08112e8
3
+ size 560
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.496124031007753,
3
+ "total_flos": 5.998049534242161e+18,
4
+ "train_loss": 0.14669023766237146,
5
+ "train_runtime": 774.0157,
6
+ "train_samples_per_second": 319.869,
7
+ "train_steps_per_second": 9.984
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,484 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9483693001512903,
3
+ "best_model_checkpoint": "deit-base-patch16-224-finetuned-stroke-binary/checkpoint-1700",
4
+ "epoch": 10.496124031007753,
5
+ "eval_steps": 100,
6
+ "global_step": 1700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.31007751937984496,
13
+ "grad_norm": 1.1434299945831299,
14
+ "learning_rate": 1.29366106080207e-06,
15
+ "loss": 0.1628,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.6201550387596899,
20
+ "grad_norm": 0.9698119163513184,
21
+ "learning_rate": 2.58732212160414e-06,
22
+ "loss": 0.1646,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.6201550387596899,
27
+ "eval_accuracy": 0.9430122116689281,
28
+ "eval_f1": 0.9424717047287801,
29
+ "eval_loss": 0.15877728164196014,
30
+ "eval_precision": 0.9442262324920166,
31
+ "eval_recall": 0.9430122116689281,
32
+ "eval_runtime": 9.3645,
33
+ "eval_samples_per_second": 236.103,
34
+ "eval_steps_per_second": 29.58,
35
+ "step": 100
36
+ },
37
+ {
38
+ "epoch": 0.9302325581395349,
39
+ "grad_norm": 1.4945017099380493,
40
+ "learning_rate": 3.88098318240621e-06,
41
+ "loss": 0.1355,
42
+ "step": 150
43
+ },
44
+ {
45
+ "epoch": 1.235658914728682,
46
+ "grad_norm": 1.1095311641693115,
47
+ "learning_rate": 5.17464424320828e-06,
48
+ "loss": 0.1417,
49
+ "step": 200
50
+ },
51
+ {
52
+ "epoch": 1.235658914728682,
53
+ "eval_accuracy": 0.9439167797376753,
54
+ "eval_f1": 0.9432839027033096,
55
+ "eval_loss": 0.16397197544574738,
56
+ "eval_precision": 0.9457751540323792,
57
+ "eval_recall": 0.9439167797376753,
58
+ "eval_runtime": 9.5146,
59
+ "eval_samples_per_second": 232.38,
60
+ "eval_steps_per_second": 29.113,
61
+ "step": 200
62
+ },
63
+ {
64
+ "epoch": 1.5457364341085271,
65
+ "grad_norm": 2.9033849239349365,
66
+ "learning_rate": 6.468305304010349e-06,
67
+ "loss": 0.1576,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 1.8558139534883722,
72
+ "grad_norm": 0.8375345468521118,
73
+ "learning_rate": 7.76196636481242e-06,
74
+ "loss": 0.1681,
75
+ "step": 300
76
+ },
77
+ {
78
+ "epoch": 1.8558139534883722,
79
+ "eval_accuracy": 0.945273631840796,
80
+ "eval_f1": 0.9446810798244,
81
+ "eval_loss": 0.16215302050113678,
82
+ "eval_precision": 0.9469984434420068,
83
+ "eval_recall": 0.945273631840796,
84
+ "eval_runtime": 9.4184,
85
+ "eval_samples_per_second": 234.752,
86
+ "eval_steps_per_second": 29.41,
87
+ "step": 300
88
+ },
89
+ {
90
+ "epoch": 2.1612403100775195,
91
+ "grad_norm": 1.2844864130020142,
92
+ "learning_rate": 9.055627425614489e-06,
93
+ "loss": 0.1487,
94
+ "step": 350
95
+ },
96
+ {
97
+ "epoch": 2.471317829457364,
98
+ "grad_norm": 2.3872156143188477,
99
+ "learning_rate": 1.034928848641656e-05,
100
+ "loss": 0.1512,
101
+ "step": 400
102
+ },
103
+ {
104
+ "epoch": 2.471317829457364,
105
+ "eval_accuracy": 0.9434644957033017,
106
+ "eval_f1": 0.9430495459530136,
107
+ "eval_loss": 0.15099050104618073,
108
+ "eval_precision": 0.9440827208056073,
109
+ "eval_recall": 0.9434644957033017,
110
+ "eval_runtime": 9.4764,
111
+ "eval_samples_per_second": 233.316,
112
+ "eval_steps_per_second": 29.23,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 2.781395348837209,
117
+ "grad_norm": 1.7295209169387817,
118
+ "learning_rate": 1.164294954721863e-05,
119
+ "loss": 0.161,
120
+ "step": 450
121
+ },
122
+ {
123
+ "epoch": 3.0868217054263565,
124
+ "grad_norm": 3.4684560298919678,
125
+ "learning_rate": 1.2936610608020698e-05,
126
+ "loss": 0.1506,
127
+ "step": 500
128
+ },
129
+ {
130
+ "epoch": 3.0868217054263565,
131
+ "eval_accuracy": 0.9339665309814563,
132
+ "eval_f1": 0.9326951378602786,
133
+ "eval_loss": 0.19129638373851776,
134
+ "eval_precision": 0.9391386500820929,
135
+ "eval_recall": 0.9339665309814563,
136
+ "eval_runtime": 9.3995,
137
+ "eval_samples_per_second": 235.226,
138
+ "eval_steps_per_second": 29.47,
139
+ "step": 500
140
+ },
141
+ {
142
+ "epoch": 3.3968992248062015,
143
+ "grad_norm": 2.042165517807007,
144
+ "learning_rate": 1.423027166882277e-05,
145
+ "loss": 0.1263,
146
+ "step": 550
147
+ },
148
+ {
149
+ "epoch": 3.7069767441860466,
150
+ "grad_norm": 0.8381805419921875,
151
+ "learning_rate": 1.552393272962484e-05,
152
+ "loss": 0.1654,
153
+ "step": 600
154
+ },
155
+ {
156
+ "epoch": 3.7069767441860466,
157
+ "eval_accuracy": 0.9425599276345545,
158
+ "eval_f1": 0.9419379928735436,
159
+ "eval_loss": 0.1679351031780243,
160
+ "eval_precision": 0.9442161451256601,
161
+ "eval_recall": 0.9425599276345545,
162
+ "eval_runtime": 9.4346,
163
+ "eval_samples_per_second": 234.351,
164
+ "eval_steps_per_second": 29.36,
165
+ "step": 600
166
+ },
167
+ {
168
+ "epoch": 4.0124031007751935,
169
+ "grad_norm": 2.1265010833740234,
170
+ "learning_rate": 1.6817593790426908e-05,
171
+ "loss": 0.1507,
172
+ "step": 650
173
+ },
174
+ {
175
+ "epoch": 4.322480620155039,
176
+ "grad_norm": 2.001044511795044,
177
+ "learning_rate": 1.8111254851228977e-05,
178
+ "loss": 0.1482,
179
+ "step": 700
180
+ },
181
+ {
182
+ "epoch": 4.322480620155039,
183
+ "eval_accuracy": 0.9402985074626866,
184
+ "eval_f1": 0.9402037869673187,
185
+ "eval_loss": 0.1550845354795456,
186
+ "eval_precision": 0.9401885824772098,
187
+ "eval_recall": 0.9402985074626866,
188
+ "eval_runtime": 9.4957,
189
+ "eval_samples_per_second": 232.841,
190
+ "eval_steps_per_second": 29.171,
191
+ "step": 700
192
+ },
193
+ {
194
+ "epoch": 4.632558139534884,
195
+ "grad_norm": 0.40356162190437317,
196
+ "learning_rate": 1.940491591203105e-05,
197
+ "loss": 0.1451,
198
+ "step": 750
199
+ },
200
+ {
201
+ "epoch": 4.942635658914728,
202
+ "grad_norm": 0.9926322102546692,
203
+ "learning_rate": 1.999925630026586e-05,
204
+ "loss": 0.1599,
205
+ "step": 800
206
+ },
207
+ {
208
+ "epoch": 4.942635658914728,
209
+ "eval_accuracy": 0.9461781999095432,
210
+ "eval_f1": 0.9457377367508388,
211
+ "eval_loss": 0.14886853098869324,
212
+ "eval_precision": 0.9470663350522793,
213
+ "eval_recall": 0.9461781999095432,
214
+ "eval_runtime": 9.4883,
215
+ "eval_samples_per_second": 233.025,
216
+ "eval_steps_per_second": 29.194,
217
+ "step": 800
218
+ },
219
+ {
220
+ "epoch": 5.248062015503876,
221
+ "grad_norm": 1.0929352045059204,
222
+ "learning_rate": 1.9993951980962474e-05,
223
+ "loss": 0.1598,
224
+ "step": 850
225
+ },
226
+ {
227
+ "epoch": 5.558139534883721,
228
+ "grad_norm": 1.2597020864486694,
229
+ "learning_rate": 1.9983550078926357e-05,
230
+ "loss": 0.1477,
231
+ "step": 900
232
+ },
233
+ {
234
+ "epoch": 5.558139534883721,
235
+ "eval_accuracy": 0.9425599276345545,
236
+ "eval_f1": 0.9423650667942064,
237
+ "eval_loss": 0.14367012679576874,
238
+ "eval_precision": 0.942514608438234,
239
+ "eval_recall": 0.9425599276345545,
240
+ "eval_runtime": 9.4876,
241
+ "eval_samples_per_second": 233.041,
242
+ "eval_steps_per_second": 29.196,
243
+ "step": 900
244
+ },
245
+ {
246
+ "epoch": 5.868217054263566,
247
+ "grad_norm": 1.0700503587722778,
248
+ "learning_rate": 1.9968055899822005e-05,
249
+ "loss": 0.1432,
250
+ "step": 950
251
+ },
252
+ {
253
+ "epoch": 6.173643410852713,
254
+ "grad_norm": 2.013948440551758,
255
+ "learning_rate": 1.9947477346715192e-05,
256
+ "loss": 0.1308,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 6.173643410852713,
261
+ "eval_accuracy": 0.9416553595658074,
262
+ "eval_f1": 0.9414428935954616,
263
+ "eval_loss": 0.15271881222724915,
264
+ "eval_precision": 0.9416239036557209,
265
+ "eval_recall": 0.9416553595658074,
266
+ "eval_runtime": 9.4259,
267
+ "eval_samples_per_second": 234.566,
268
+ "eval_steps_per_second": 29.387,
269
+ "step": 1000
270
+ },
271
+ {
272
+ "epoch": 6.4837209302325585,
273
+ "grad_norm": 2.9245336055755615,
274
+ "learning_rate": 1.9921824916041882e-05,
275
+ "loss": 0.1355,
276
+ "step": 1050
277
+ },
278
+ {
279
+ "epoch": 6.793798449612403,
280
+ "grad_norm": 2.073958396911621,
281
+ "learning_rate": 1.9891111692254346e-05,
282
+ "loss": 0.1362,
283
+ "step": 1100
284
+ },
285
+ {
286
+ "epoch": 6.793798449612403,
287
+ "eval_accuracy": 0.9425599276345545,
288
+ "eval_f1": 0.9421222945979488,
289
+ "eval_loss": 0.16084641218185425,
290
+ "eval_precision": 0.943232512538474,
291
+ "eval_recall": 0.9425599276345545,
292
+ "eval_runtime": 9.4085,
293
+ "eval_samples_per_second": 235.0,
294
+ "eval_steps_per_second": 29.441,
295
+ "step": 1100
296
+ },
297
+ {
298
+ "epoch": 7.09922480620155,
299
+ "grad_norm": 3.3903515338897705,
300
+ "learning_rate": 1.98553533411472e-05,
301
+ "loss": 0.1437,
302
+ "step": 1150
303
+ },
304
+ {
305
+ "epoch": 7.409302325581395,
306
+ "grad_norm": 0.6468881964683533,
307
+ "learning_rate": 1.9814568101866843e-05,
308
+ "loss": 0.1494,
309
+ "step": 1200
310
+ },
311
+ {
312
+ "epoch": 7.409302325581395,
313
+ "eval_accuracy": 0.9434644957033017,
314
+ "eval_f1": 0.9428523551904958,
315
+ "eval_loss": 0.16010308265686035,
316
+ "eval_precision": 0.9451435778977758,
317
+ "eval_recall": 0.9434644957033017,
318
+ "eval_runtime": 9.4239,
319
+ "eval_samples_per_second": 234.617,
320
+ "eval_steps_per_second": 29.394,
321
+ "step": 1200
322
+ },
323
+ {
324
+ "epoch": 7.7193798449612405,
325
+ "grad_norm": 1.8461512327194214,
326
+ "learning_rate": 1.9768776777608227e-05,
327
+ "loss": 0.1437,
328
+ "step": 1250
329
+ },
330
+ {
331
+ "epoch": 8.024806201550387,
332
+ "grad_norm": 1.100502371788025,
333
+ "learning_rate": 1.971800272500388e-05,
334
+ "loss": 0.1592,
335
+ "step": 1300
336
+ },
337
+ {
338
+ "epoch": 8.024806201550387,
339
+ "eval_accuracy": 0.9430122116689281,
340
+ "eval_f1": 0.9428538886036008,
341
+ "eval_loss": 0.1429978460073471,
342
+ "eval_precision": 0.9429310401027086,
343
+ "eval_recall": 0.9430122116689281,
344
+ "eval_runtime": 9.449,
345
+ "eval_samples_per_second": 233.992,
346
+ "eval_steps_per_second": 29.315,
347
+ "step": 1300
348
+ },
349
+ {
350
+ "epoch": 8.334883720930232,
351
+ "grad_norm": 4.64835786819458,
352
+ "learning_rate": 1.9662271842210433e-05,
353
+ "loss": 0.131,
354
+ "step": 1350
355
+ },
356
+ {
357
+ "epoch": 8.644961240310078,
358
+ "grad_norm": 1.8132234811782837,
359
+ "learning_rate": 1.960161255569886e-05,
360
+ "loss": 0.16,
361
+ "step": 1400
362
+ },
363
+ {
364
+ "epoch": 8.644961240310078,
365
+ "eval_accuracy": 0.9457259158751696,
366
+ "eval_f1": 0.9451300205802559,
367
+ "eval_loss": 0.1504276692867279,
368
+ "eval_precision": 0.9475179562129199,
369
+ "eval_recall": 0.9457259158751696,
370
+ "eval_runtime": 9.4602,
371
+ "eval_samples_per_second": 233.715,
372
+ "eval_steps_per_second": 29.28,
373
+ "step": 1400
374
+ },
375
+ {
376
+ "epoch": 8.955038759689922,
377
+ "grad_norm": 1.1923645734786987,
378
+ "learning_rate": 1.9536055805755044e-05,
379
+ "loss": 0.1105,
380
+ "step": 1450
381
+ },
382
+ {
383
+ "epoch": 9.26046511627907,
384
+ "grad_norm": 2.665161609649658,
385
+ "learning_rate": 1.9465635030698203e-05,
386
+ "loss": 0.1245,
387
+ "step": 1500
388
+ },
389
+ {
390
+ "epoch": 9.26046511627907,
391
+ "eval_accuracy": 0.9461781999095432,
392
+ "eval_f1": 0.9457529907245916,
393
+ "eval_loss": 0.1505969762802124,
394
+ "eval_precision": 0.9469878940244384,
395
+ "eval_recall": 0.9461781999095432,
396
+ "eval_runtime": 9.4828,
397
+ "eval_samples_per_second": 233.159,
398
+ "eval_steps_per_second": 29.211,
399
+ "step": 1500
400
+ },
401
+ {
402
+ "epoch": 9.570542635658915,
403
+ "grad_norm": 0.45807984471321106,
404
+ "learning_rate": 1.939038614982509e-05,
405
+ "loss": 0.1342,
406
+ "step": 1550
407
+ },
408
+ {
409
+ "epoch": 9.88062015503876,
410
+ "grad_norm": 1.0274362564086914,
411
+ "learning_rate": 1.9310347545088764e-05,
412
+ "loss": 0.1397,
413
+ "step": 1600
414
+ },
415
+ {
416
+ "epoch": 9.88062015503876,
417
+ "eval_accuracy": 0.9312528267752148,
418
+ "eval_f1": 0.9299781453592552,
419
+ "eval_loss": 0.19713716208934784,
420
+ "eval_precision": 0.9359197905804808,
421
+ "eval_recall": 0.9312528267752148,
422
+ "eval_runtime": 9.4563,
423
+ "eval_samples_per_second": 233.811,
424
+ "eval_steps_per_second": 29.293,
425
+ "step": 1600
426
+ },
427
+ {
428
+ "epoch": 10.186046511627907,
429
+ "grad_norm": 2.8293967247009277,
430
+ "learning_rate": 1.9225560041521225e-05,
431
+ "loss": 0.1615,
432
+ "step": 1650
433
+ },
434
+ {
435
+ "epoch": 10.496124031007753,
436
+ "grad_norm": 0.916153609752655,
437
+ "learning_rate": 1.913606688640993e-05,
438
+ "loss": 0.1396,
439
+ "step": 1700
440
+ },
441
+ {
442
+ "epoch": 10.496124031007753,
443
+ "eval_accuracy": 0.9488919041157847,
444
+ "eval_f1": 0.9483693001512903,
445
+ "eval_loss": 0.15268893539905548,
446
+ "eval_precision": 0.9504887785210339,
447
+ "eval_recall": 0.9488919041157847,
448
+ "eval_runtime": 9.4488,
449
+ "eval_samples_per_second": 233.998,
450
+ "eval_steps_per_second": 29.316,
451
+ "step": 1700
452
+ },
453
+ {
454
+ "epoch": 10.496124031007753,
455
+ "step": 1700,
456
+ "total_flos": 5.998049534242161e+18,
457
+ "train_loss": 0.14669023766237146,
458
+ "train_runtime": 774.0157,
459
+ "train_samples_per_second": 319.869,
460
+ "train_steps_per_second": 9.984
461
+ }
462
+ ],
463
+ "logging_steps": 50,
464
+ "max_steps": 7728,
465
+ "num_input_tokens_seen": 0,
466
+ "num_train_epochs": 48,
467
+ "save_steps": 100,
468
+ "stateful_callbacks": {
469
+ "TrainerControl": {
470
+ "args": {
471
+ "should_epoch_stop": false,
472
+ "should_evaluate": false,
473
+ "should_log": false,
474
+ "should_save": true,
475
+ "should_training_stop": true
476
+ },
477
+ "attributes": {}
478
+ }
479
+ },
480
+ "total_flos": 5.998049534242161e+18,
481
+ "train_batch_size": 8,
482
+ "trial_name": null,
483
+ "trial_params": null
484
+ }