BTX24 commited on
Commit
743f44e
·
verified ·
1 Parent(s): 94d304f

End of training

Browse files
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224-pt22k-ft22k](https://huggingface.co/microsoft/beit-base-patch16-224-pt22k-ft22k) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.2066
25
- - Accuracy: 0.9181
26
- - F1: 0.9170
27
- - Precision: 0.9201
28
- - Recall: 0.9181
29
 
30
  ## Model description
31
 
 
21
 
22
  This model is a fine-tuned version of [microsoft/beit-base-patch16-224-pt22k-ft22k](https://huggingface.co/microsoft/beit-base-patch16-224-pt22k-ft22k) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.2029
25
+ - Accuracy: 0.9222
26
+ - F1: 0.9214
27
+ - Precision: 0.9234
28
+ - Recall: 0.9222
29
 
30
  ## Model description
31
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 47.9907120743034,
3
+ "eval_accuracy": 0.9222071460877431,
4
+ "eval_f1": 0.9213530294983667,
5
+ "eval_loss": 0.20286186039447784,
6
+ "eval_precision": 0.9233953907433776,
7
+ "eval_recall": 0.9222071460877431,
8
+ "eval_runtime": 9.8001,
9
+ "eval_samples_per_second": 225.611,
10
+ "eval_steps_per_second": 28.265,
11
+ "total_flos": 2.738563601264935e+19,
12
+ "train_loss": 0.3012936460475127,
13
+ "train_runtime": 3086.985,
14
+ "train_samples_per_second": 80.203,
15
+ "train_steps_per_second": 1.244
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 47.9907120743034,
3
+ "eval_accuracy": 0.9222071460877431,
4
+ "eval_f1": 0.9213530294983667,
5
+ "eval_loss": 0.20286186039447784,
6
+ "eval_precision": 0.9233953907433776,
7
+ "eval_recall": 0.9222071460877431,
8
+ "eval_runtime": 9.8001,
9
+ "eval_samples_per_second": 225.611,
10
+ "eval_steps_per_second": 28.265
11
+ }
runs/Mar16_11-27-33_774ccf98b3fe/events.out.tfevents.1742127600.774ccf98b3fe.5066.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc918b6f8d2e9be6798aab0da6703d7db791ebfb85d742fa2e2569de3759b156
3
+ size 560
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 47.9907120743034,
3
+ "total_flos": 2.738563601264935e+19,
4
+ "train_loss": 0.3012936460475127,
5
+ "train_runtime": 3086.985,
6
+ "train_samples_per_second": 80.203,
7
+ "train_steps_per_second": 1.244
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1030 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9213530294983667,
3
+ "best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-stroke-binary/checkpoint-3200",
4
+ "epoch": 47.9907120743034,
5
+ "eval_steps": 100,
6
+ "global_step": 3840,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.6191950464396285,
13
+ "grad_norm": 5.044180870056152,
14
+ "learning_rate": 2.604166666666667e-06,
15
+ "loss": 0.744,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 1.2476780185758514,
20
+ "grad_norm": 1.894531011581421,
21
+ "learning_rate": 5.208333333333334e-06,
22
+ "loss": 0.7256,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 1.2476780185758514,
27
+ "eval_accuracy": 0.5685210312075983,
28
+ "eval_f1": 0.48230573725672854,
29
+ "eval_loss": 0.6912839412689209,
30
+ "eval_precision": 0.47305459682975004,
31
+ "eval_recall": 0.5685210312075983,
32
+ "eval_runtime": 9.7307,
33
+ "eval_samples_per_second": 227.22,
34
+ "eval_steps_per_second": 28.467,
35
+ "step": 100
36
+ },
37
+ {
38
+ "epoch": 1.86687306501548,
39
+ "grad_norm": 1.9874528646469116,
40
+ "learning_rate": 7.8125e-06,
41
+ "loss": 0.7033,
42
+ "step": 150
43
+ },
44
+ {
45
+ "epoch": 2.4953560371517027,
46
+ "grad_norm": 2.1426079273223877,
47
+ "learning_rate": 1.0416666666666668e-05,
48
+ "loss": 0.6695,
49
+ "step": 200
50
+ },
51
+ {
52
+ "epoch": 2.4953560371517027,
53
+ "eval_accuracy": 0.6209859791949344,
54
+ "eval_f1": 0.5163511984739979,
55
+ "eval_loss": 0.64801025390625,
56
+ "eval_precision": 0.5986617059765201,
57
+ "eval_recall": 0.6209859791949344,
58
+ "eval_runtime": 9.7733,
59
+ "eval_samples_per_second": 226.229,
60
+ "eval_steps_per_second": 28.343,
61
+ "step": 200
62
+ },
63
+ {
64
+ "epoch": 3.123839009287926,
65
+ "grad_norm": 1.4950852394104004,
66
+ "learning_rate": 1.3020833333333334e-05,
67
+ "loss": 0.6399,
68
+ "step": 250
69
+ },
70
+ {
71
+ "epoch": 3.7430340557275543,
72
+ "grad_norm": 1.932099461555481,
73
+ "learning_rate": 1.5625e-05,
74
+ "loss": 0.5963,
75
+ "step": 300
76
+ },
77
+ {
78
+ "epoch": 3.7430340557275543,
79
+ "eval_accuracy": 0.6725463591135233,
80
+ "eval_f1": 0.6118497378760206,
81
+ "eval_loss": 0.5882277488708496,
82
+ "eval_precision": 0.6992514519414246,
83
+ "eval_recall": 0.6725463591135233,
84
+ "eval_runtime": 9.9265,
85
+ "eval_samples_per_second": 222.737,
86
+ "eval_steps_per_second": 27.905,
87
+ "step": 300
88
+ },
89
+ {
90
+ "epoch": 4.371517027863777,
91
+ "grad_norm": 1.4714239835739136,
92
+ "learning_rate": 1.8229166666666668e-05,
93
+ "loss": 0.5616,
94
+ "step": 350
95
+ },
96
+ {
97
+ "epoch": 4.9907120743034055,
98
+ "grad_norm": 1.6321817636489868,
99
+ "learning_rate": 1.9998942319271076e-05,
100
+ "loss": 0.518,
101
+ "step": 400
102
+ },
103
+ {
104
+ "epoch": 4.9907120743034055,
105
+ "eval_accuracy": 0.7480777928539123,
106
+ "eval_f1": 0.7167175429836762,
107
+ "eval_loss": 0.49900302290916443,
108
+ "eval_precision": 0.7891493639439078,
109
+ "eval_recall": 0.7480777928539123,
110
+ "eval_runtime": 9.9652,
111
+ "eval_samples_per_second": 221.873,
112
+ "eval_steps_per_second": 27.797,
113
+ "step": 400
114
+ },
115
+ {
116
+ "epoch": 5.6191950464396285,
117
+ "grad_norm": 2.7871556282043457,
118
+ "learning_rate": 1.998200798188685e-05,
119
+ "loss": 0.4858,
120
+ "step": 450
121
+ },
122
+ {
123
+ "epoch": 6.247678018575852,
124
+ "grad_norm": 3.0406832695007324,
125
+ "learning_rate": 1.9944456171551024e-05,
126
+ "loss": 0.4325,
127
+ "step": 500
128
+ },
129
+ {
130
+ "epoch": 6.247678018575852,
131
+ "eval_accuracy": 0.8073270013568521,
132
+ "eval_f1": 0.7957289617278764,
133
+ "eval_loss": 0.40903061628341675,
134
+ "eval_precision": 0.8231915996306657,
135
+ "eval_recall": 0.8073270013568521,
136
+ "eval_runtime": 10.0105,
137
+ "eval_samples_per_second": 220.868,
138
+ "eval_steps_per_second": 27.671,
139
+ "step": 500
140
+ },
141
+ {
142
+ "epoch": 6.86687306501548,
143
+ "grad_norm": 1.8095026016235352,
144
+ "learning_rate": 1.9886364450156485e-05,
145
+ "loss": 0.3996,
146
+ "step": 550
147
+ },
148
+ {
149
+ "epoch": 7.495356037151703,
150
+ "grad_norm": 2.2104787826538086,
151
+ "learning_rate": 1.9807852804032306e-05,
152
+ "loss": 0.3848,
153
+ "step": 600
154
+ },
155
+ {
156
+ "epoch": 7.495356037151703,
157
+ "eval_accuracy": 0.8340117593848937,
158
+ "eval_f1": 0.8257264682013935,
159
+ "eval_loss": 0.37033286690711975,
160
+ "eval_precision": 0.8481671958516468,
161
+ "eval_recall": 0.8340117593848937,
162
+ "eval_runtime": 10.0319,
163
+ "eval_samples_per_second": 220.396,
164
+ "eval_steps_per_second": 27.612,
165
+ "step": 600
166
+ },
167
+ {
168
+ "epoch": 8.123839009287925,
169
+ "grad_norm": 1.679534912109375,
170
+ "learning_rate": 1.970908339611638e-05,
171
+ "loss": 0.3628,
172
+ "step": 650
173
+ },
174
+ {
175
+ "epoch": 8.743034055727554,
176
+ "grad_norm": 1.1306818723678589,
177
+ "learning_rate": 1.9590260231013774e-05,
178
+ "loss": 0.3532,
179
+ "step": 700
180
+ },
181
+ {
182
+ "epoch": 8.743034055727554,
183
+ "eval_accuracy": 0.8312980551786522,
184
+ "eval_f1": 0.8200843868501977,
185
+ "eval_loss": 0.39578977227211,
186
+ "eval_precision": 0.8564151669725956,
187
+ "eval_recall": 0.8312980551786522,
188
+ "eval_runtime": 10.0177,
189
+ "eval_samples_per_second": 220.71,
190
+ "eval_steps_per_second": 27.651,
191
+ "step": 700
192
+ },
193
+ {
194
+ "epoch": 9.371517027863778,
195
+ "grad_norm": 2.967256784439087,
196
+ "learning_rate": 1.945162873363268e-05,
197
+ "loss": 0.3368,
198
+ "step": 750
199
+ },
200
+ {
201
+ "epoch": 9.990712074303406,
202
+ "grad_norm": 1.7120331525802612,
203
+ "learning_rate": 1.9293475242268224e-05,
204
+ "loss": 0.3297,
205
+ "step": 800
206
+ },
207
+ {
208
+ "epoch": 9.990712074303406,
209
+ "eval_accuracy": 0.8611488014473089,
210
+ "eval_f1": 0.8558119987362278,
211
+ "eval_loss": 0.32570937275886536,
212
+ "eval_precision": 0.871840635108417,
213
+ "eval_recall": 0.8611488014473089,
214
+ "eval_runtime": 10.0508,
215
+ "eval_samples_per_second": 219.982,
216
+ "eval_steps_per_second": 27.56,
217
+ "step": 800
218
+ },
219
+ {
220
+ "epoch": 10.619195046439629,
221
+ "grad_norm": 1.371287226676941,
222
+ "learning_rate": 1.9116126417181188e-05,
223
+ "loss": 0.3143,
224
+ "step": 850
225
+ },
226
+ {
227
+ "epoch": 11.24767801857585,
228
+ "grad_norm": 1.5851930379867554,
229
+ "learning_rate": 1.8919948565893144e-05,
230
+ "loss": 0.3281,
231
+ "step": 900
232
+ },
233
+ {
234
+ "epoch": 11.24767801857585,
235
+ "eval_accuracy": 0.866576209859792,
236
+ "eval_f1": 0.8611949189284244,
237
+ "eval_loss": 0.3168693780899048,
238
+ "eval_precision": 0.8790588339902896,
239
+ "eval_recall": 0.866576209859792,
240
+ "eval_runtime": 10.0434,
241
+ "eval_samples_per_second": 220.144,
242
+ "eval_steps_per_second": 27.58,
243
+ "step": 900
244
+ },
245
+ {
246
+ "epoch": 11.86687306501548,
247
+ "grad_norm": 0.8977594375610352,
248
+ "learning_rate": 1.8705346886591667e-05,
249
+ "loss": 0.3036,
250
+ "step": 950
251
+ },
252
+ {
253
+ "epoch": 12.495356037151703,
254
+ "grad_norm": 2.8011789321899414,
255
+ "learning_rate": 1.847276463120828e-05,
256
+ "loss": 0.2938,
257
+ "step": 1000
258
+ },
259
+ {
260
+ "epoch": 12.495356037151703,
261
+ "eval_accuracy": 0.8864767073722297,
262
+ "eval_f1": 0.8841053273932936,
263
+ "eval_loss": 0.28136056661605835,
264
+ "eval_precision": 0.8900209862709351,
265
+ "eval_recall": 0.8864767073722297,
266
+ "eval_runtime": 10.0799,
267
+ "eval_samples_per_second": 219.348,
268
+ "eval_steps_per_second": 27.481,
269
+ "step": 1000
270
+ },
271
+ {
272
+ "epoch": 13.123839009287925,
273
+ "grad_norm": 2.865165948867798,
274
+ "learning_rate": 1.822268218989775e-05,
275
+ "loss": 0.294,
276
+ "step": 1050
277
+ },
278
+ {
279
+ "epoch": 13.743034055727554,
280
+ "grad_norm": 1.8461377620697021,
281
+ "learning_rate": 1.79556160988098e-05,
282
+ "loss": 0.2866,
283
+ "step": 1100
284
+ },
285
+ {
286
+ "epoch": 13.743034055727554,
287
+ "eval_accuracy": 0.8869289914066033,
288
+ "eval_f1": 0.883705253652531,
289
+ "eval_loss": 0.2827624976634979,
290
+ "eval_precision": 0.8942761361900506,
291
+ "eval_recall": 0.8869289914066033,
292
+ "eval_runtime": 10.0748,
293
+ "eval_samples_per_second": 219.459,
294
+ "eval_steps_per_second": 27.494,
295
+ "step": 1100
296
+ },
297
+ {
298
+ "epoch": 14.371517027863778,
299
+ "grad_norm": 1.1585817337036133,
300
+ "learning_rate": 1.7672117973202526e-05,
301
+ "loss": 0.2667,
302
+ "step": 1150
303
+ },
304
+ {
305
+ "epoch": 14.990712074303406,
306
+ "grad_norm": 1.050703525543213,
307
+ "learning_rate": 1.737277336810124e-05,
308
+ "loss": 0.2884,
309
+ "step": 1200
310
+ },
311
+ {
312
+ "epoch": 14.990712074303406,
313
+ "eval_accuracy": 0.8846675712347354,
314
+ "eval_f1": 0.8810137319907275,
315
+ "eval_loss": 0.29289892315864563,
316
+ "eval_precision": 0.8936495972210173,
317
+ "eval_recall": 0.8846675712347354,
318
+ "eval_runtime": 10.0424,
319
+ "eval_samples_per_second": 220.167,
320
+ "eval_steps_per_second": 27.583,
321
+ "step": 1200
322
+ },
323
+ {
324
+ "epoch": 15.619195046439629,
325
+ "grad_norm": 1.1661638021469116,
326
+ "learning_rate": 1.705820056885596e-05,
327
+ "loss": 0.2722,
328
+ "step": 1250
329
+ },
330
+ {
331
+ "epoch": 16.24767801857585,
332
+ "grad_norm": 1.4716720581054688,
333
+ "learning_rate": 1.6729049314095578e-05,
334
+ "loss": 0.2808,
335
+ "step": 1300
336
+ },
337
+ {
338
+ "epoch": 16.24767801857585,
339
+ "eval_accuracy": 0.9014020805065581,
340
+ "eval_f1": 0.8998791257539092,
341
+ "eval_loss": 0.24578717350959778,
342
+ "eval_precision": 0.9034472507945439,
343
+ "eval_recall": 0.9014020805065581,
344
+ "eval_runtime": 10.1493,
345
+ "eval_samples_per_second": 217.847,
346
+ "eval_steps_per_second": 27.292,
347
+ "step": 1300
348
+ },
349
+ {
350
+ "epoch": 16.86687306501548,
351
+ "grad_norm": 1.2002067565917969,
352
+ "learning_rate": 1.6385999453716453e-05,
353
+ "loss": 0.2843,
354
+ "step": 1350
355
+ },
356
+ {
357
+ "epoch": 17.4953560371517,
358
+ "grad_norm": 2.676912307739258,
359
+ "learning_rate": 1.6029759544677298e-05,
360
+ "loss": 0.258,
361
+ "step": 1400
362
+ },
363
+ {
364
+ "epoch": 17.4953560371517,
365
+ "eval_accuracy": 0.9090909090909091,
366
+ "eval_f1": 0.908023053812085,
367
+ "eval_loss": 0.235076442360878,
368
+ "eval_precision": 0.9101607687708339,
369
+ "eval_recall": 0.9090909090909091,
370
+ "eval_runtime": 10.0503,
371
+ "eval_samples_per_second": 219.993,
372
+ "eval_steps_per_second": 27.561,
373
+ "step": 1400
374
+ },
375
+ {
376
+ "epoch": 18.123839009287927,
377
+ "grad_norm": 1.2184885740280151,
378
+ "learning_rate": 1.566106538750063e-05,
379
+ "loss": 0.2639,
380
+ "step": 1450
381
+ },
382
+ {
383
+ "epoch": 18.743034055727556,
384
+ "grad_norm": 1.8000015020370483,
385
+ "learning_rate": 1.528067850650368e-05,
386
+ "loss": 0.2744,
387
+ "step": 1500
388
+ },
389
+ {
390
+ "epoch": 18.743034055727556,
391
+ "eval_accuracy": 0.9014020805065581,
392
+ "eval_f1": 0.8993606111929588,
393
+ "eval_loss": 0.25163090229034424,
394
+ "eval_precision": 0.9056565314020832,
395
+ "eval_recall": 0.9014020805065581,
396
+ "eval_runtime": 10.0711,
397
+ "eval_samples_per_second": 219.538,
398
+ "eval_steps_per_second": 27.504,
399
+ "step": 1500
400
+ },
401
+ {
402
+ "epoch": 19.371517027863778,
403
+ "grad_norm": 0.9816193580627441,
404
+ "learning_rate": 1.4889384576897728e-05,
405
+ "loss": 0.2576,
406
+ "step": 1550
407
+ },
408
+ {
409
+ "epoch": 19.990712074303406,
410
+ "grad_norm": 1.5961647033691406,
411
+ "learning_rate": 1.4487991802004625e-05,
412
+ "loss": 0.261,
413
+ "step": 1600
414
+ },
415
+ {
416
+ "epoch": 19.990712074303406,
417
+ "eval_accuracy": 0.9068294889190411,
418
+ "eval_f1": 0.9050356054740915,
419
+ "eval_loss": 0.2453160583972931,
420
+ "eval_precision": 0.910682778753074,
421
+ "eval_recall": 0.9068294889190411,
422
+ "eval_runtime": 10.0772,
423
+ "eval_samples_per_second": 219.406,
424
+ "eval_steps_per_second": 27.488,
425
+ "step": 1600
426
+ },
427
+ {
428
+ "epoch": 20.61919504643963,
429
+ "grad_norm": 1.3451308012008667,
430
+ "learning_rate": 1.4077329243942368e-05,
431
+ "loss": 0.2573,
432
+ "step": 1650
433
+ },
434
+ {
435
+ "epoch": 21.24767801857585,
436
+ "grad_norm": 2.416846990585327,
437
+ "learning_rate": 1.3658245111227571e-05,
438
+ "loss": 0.2519,
439
+ "step": 1700
440
+ },
441
+ {
442
+ "epoch": 21.24767801857585,
443
+ "eval_accuracy": 0.8986883763003166,
444
+ "eval_f1": 0.8961275295230519,
445
+ "eval_loss": 0.25643372535705566,
446
+ "eval_precision": 0.9051038365282786,
447
+ "eval_recall": 0.8986883763003166,
448
+ "eval_runtime": 10.0812,
449
+ "eval_samples_per_second": 219.319,
450
+ "eval_steps_per_second": 27.477,
451
+ "step": 1700
452
+ },
453
+ {
454
+ "epoch": 21.86687306501548,
455
+ "grad_norm": 1.3278274536132812,
456
+ "learning_rate": 1.323160500683173e-05,
457
+ "loss": 0.2546,
458
+ "step": 1750
459
+ },
460
+ {
461
+ "epoch": 22.4953560371517,
462
+ "grad_norm": 2.4079225063323975,
463
+ "learning_rate": 1.2798290140309924e-05,
464
+ "loss": 0.2595,
465
+ "step": 1800
466
+ },
467
+ {
468
+ "epoch": 22.4953560371517,
469
+ "eval_accuracy": 0.9095431931252826,
470
+ "eval_f1": 0.9079295985892596,
471
+ "eval_loss": 0.23176445066928864,
472
+ "eval_precision": 0.9128858666371455,
473
+ "eval_recall": 0.9095431931252826,
474
+ "eval_runtime": 10.0549,
475
+ "eval_samples_per_second": 219.893,
476
+ "eval_steps_per_second": 27.549,
477
+ "step": 1800
478
+ },
479
+ {
480
+ "epoch": 23.123839009287927,
481
+ "grad_norm": 1.916286587715149,
482
+ "learning_rate": 1.2359195507694633e-05,
483
+ "loss": 0.2383,
484
+ "step": 1850
485
+ },
486
+ {
487
+ "epoch": 23.743034055727556,
488
+ "grad_norm": 3.8509888648986816,
489
+ "learning_rate": 1.1915228042914144e-05,
490
+ "loss": 0.2548,
491
+ "step": 1900
492
+ },
493
+ {
494
+ "epoch": 23.743034055727556,
495
+ "eval_accuracy": 0.9136137494346449,
496
+ "eval_f1": 0.912807074710142,
497
+ "eval_loss": 0.21959343552589417,
498
+ "eval_precision": 0.91415122785338,
499
+ "eval_recall": 0.9136137494346449,
500
+ "eval_runtime": 10.0827,
501
+ "eval_samples_per_second": 219.287,
502
+ "eval_steps_per_second": 27.473,
503
+ "step": 1900
504
+ },
505
+ {
506
+ "epoch": 24.371517027863778,
507
+ "grad_norm": 1.1695117950439453,
508
+ "learning_rate": 1.1467304744553618e-05,
509
+ "loss": 0.2336,
510
+ "step": 1950
511
+ },
512
+ {
513
+ "epoch": 24.990712074303406,
514
+ "grad_norm": 0.93059903383255,
515
+ "learning_rate": 1.101635078182802e-05,
516
+ "loss": 0.2327,
517
+ "step": 2000
518
+ },
519
+ {
520
+ "epoch": 24.990712074303406,
521
+ "eval_accuracy": 0.9068294889190411,
522
+ "eval_f1": 0.904968418219579,
523
+ "eval_loss": 0.23764155805110931,
524
+ "eval_precision": 0.9110196660671599,
525
+ "eval_recall": 0.9068294889190411,
526
+ "eval_runtime": 10.0796,
527
+ "eval_samples_per_second": 219.354,
528
+ "eval_steps_per_second": 27.481,
529
+ "step": 2000
530
+ },
531
+ {
532
+ "epoch": 25.61919504643963,
533
+ "grad_norm": 1.0239897966384888,
534
+ "learning_rate": 1.0563297583678877e-05,
535
+ "loss": 0.2351,
536
+ "step": 2050
537
+ },
538
+ {
539
+ "epoch": 26.24767801857585,
540
+ "grad_norm": 1.6756315231323242,
541
+ "learning_rate": 1.0109080914941825e-05,
542
+ "loss": 0.2563,
543
+ "step": 2100
544
+ },
545
+ {
546
+ "epoch": 26.24767801857585,
547
+ "eval_accuracy": 0.9027589326096789,
548
+ "eval_f1": 0.9005087315122816,
549
+ "eval_loss": 0.2420978993177414,
550
+ "eval_precision": 0.9082977649283199,
551
+ "eval_recall": 0.9027589326096789,
552
+ "eval_runtime": 10.0674,
553
+ "eval_samples_per_second": 219.621,
554
+ "eval_steps_per_second": 27.515,
555
+ "step": 2100
556
+ },
557
+ {
558
+ "epoch": 26.86687306501548,
559
+ "grad_norm": 2.134220838546753,
560
+ "learning_rate": 9.65463894355851e-06,
561
+ "loss": 0.2373,
562
+ "step": 2150
563
+ },
564
+ {
565
+ "epoch": 27.4953560371517,
566
+ "grad_norm": 0.8266007304191589,
567
+ "learning_rate": 9.200910302824964e-06,
568
+ "loss": 0.2348,
569
+ "step": 2200
570
+ },
571
+ {
572
+ "epoch": 27.4953560371517,
573
+ "eval_accuracy": 0.9109000452284034,
574
+ "eval_f1": 0.9095386566495041,
575
+ "eval_loss": 0.22126996517181396,
576
+ "eval_precision": 0.9132386550038264,
577
+ "eval_recall": 0.9109000452284034,
578
+ "eval_runtime": 10.0881,
579
+ "eval_samples_per_second": 219.169,
580
+ "eval_steps_per_second": 27.458,
581
+ "step": 2200
582
+ },
583
+ {
584
+ "epoch": 28.123839009287927,
585
+ "grad_norm": 1.1402108669281006,
586
+ "learning_rate": 8.74883215267881e-06,
587
+ "loss": 0.2366,
588
+ "step": 2250
589
+ },
590
+ {
591
+ "epoch": 28.743034055727556,
592
+ "grad_norm": 1.5788310766220093,
593
+ "learning_rate": 8.299338244029646e-06,
594
+ "loss": 0.2427,
595
+ "step": 2300
596
+ },
597
+ {
598
+ "epoch": 28.743034055727556,
599
+ "eval_accuracy": 0.9077340569877883,
600
+ "eval_f1": 0.9059575898869645,
601
+ "eval_loss": 0.23078913986682892,
602
+ "eval_precision": 0.9116313077973748,
603
+ "eval_recall": 0.9077340569877883,
604
+ "eval_runtime": 10.0773,
605
+ "eval_samples_per_second": 219.404,
606
+ "eval_steps_per_second": 27.488,
607
+ "step": 2300
608
+ },
609
+ {
610
+ "epoch": 29.371517027863778,
611
+ "grad_norm": 1.0100857019424438,
612
+ "learning_rate": 7.853356990130625e-06,
613
+ "loss": 0.2462,
614
+ "step": 2350
615
+ },
616
+ {
617
+ "epoch": 29.990712074303406,
618
+ "grad_norm": 1.6099952459335327,
619
+ "learning_rate": 7.411809548974792e-06,
620
+ "loss": 0.2166,
621
+ "step": 2400
622
+ },
623
+ {
624
+ "epoch": 29.990712074303406,
625
+ "eval_accuracy": 0.9140660334690185,
626
+ "eval_f1": 0.9127673040043155,
627
+ "eval_loss": 0.21517515182495117,
628
+ "eval_precision": 0.9164570638030407,
629
+ "eval_recall": 0.9140660334690185,
630
+ "eval_runtime": 10.0698,
631
+ "eval_samples_per_second": 219.568,
632
+ "eval_steps_per_second": 27.508,
633
+ "step": 2400
634
+ },
635
+ {
636
+ "epoch": 30.61919504643963,
637
+ "grad_norm": 2.7743020057678223,
638
+ "learning_rate": 6.975607920676901e-06,
639
+ "loss": 0.2315,
640
+ "step": 2450
641
+ },
642
+ {
643
+ "epoch": 31.24767801857585,
644
+ "grad_norm": 2.1687729358673096,
645
+ "learning_rate": 6.545653063770458e-06,
646
+ "loss": 0.2345,
647
+ "step": 2500
648
+ },
649
+ {
650
+ "epoch": 31.24767801857585,
651
+ "eval_accuracy": 0.9068294889190411,
652
+ "eval_f1": 0.9049003940630713,
653
+ "eval_loss": 0.22831733524799347,
654
+ "eval_precision": 0.9113708908998591,
655
+ "eval_recall": 0.9068294889190411,
656
+ "eval_runtime": 10.1159,
657
+ "eval_samples_per_second": 218.567,
658
+ "eval_steps_per_second": 27.383,
659
+ "step": 2500
660
+ },
661
+ {
662
+ "epoch": 31.86687306501548,
663
+ "grad_norm": 2.7222249507904053,
664
+ "learning_rate": 6.122833034310794e-06,
665
+ "loss": 0.2283,
666
+ "step": 2550
667
+ },
668
+ {
669
+ "epoch": 32.4953560371517,
670
+ "grad_norm": 1.5686038732528687,
671
+ "learning_rate": 5.708021151627712e-06,
672
+ "loss": 0.2355,
673
+ "step": 2600
674
+ },
675
+ {
676
+ "epoch": 32.4953560371517,
677
+ "eval_accuracy": 0.9118046132971506,
678
+ "eval_f1": 0.9103077963732922,
679
+ "eval_loss": 0.21731863915920258,
680
+ "eval_precision": 0.9148692757405781,
681
+ "eval_recall": 0.9118046132971506,
682
+ "eval_runtime": 10.0717,
683
+ "eval_samples_per_second": 219.526,
684
+ "eval_steps_per_second": 27.503,
685
+ "step": 2600
686
+ },
687
+ {
688
+ "epoch": 33.12383900928793,
689
+ "grad_norm": 2.4550020694732666,
690
+ "learning_rate": 5.302074194516291e-06,
691
+ "loss": 0.2207,
692
+ "step": 2650
693
+ },
694
+ {
695
+ "epoch": 33.743034055727556,
696
+ "grad_norm": 2.3107786178588867,
697
+ "learning_rate": 4.9058306315915826e-06,
698
+ "loss": 0.2291,
699
+ "step": 2700
700
+ },
701
+ {
702
+ "epoch": 33.743034055727556,
703
+ "eval_accuracy": 0.9127091813658977,
704
+ "eval_f1": 0.9112873759668951,
705
+ "eval_loss": 0.214884415268898,
706
+ "eval_precision": 0.9155228611707341,
707
+ "eval_recall": 0.9127091813658977,
708
+ "eval_runtime": 10.1219,
709
+ "eval_samples_per_second": 218.437,
710
+ "eval_steps_per_second": 27.366,
711
+ "step": 2700
712
+ },
713
+ {
714
+ "epoch": 34.371517027863774,
715
+ "grad_norm": 1.5490918159484863,
716
+ "learning_rate": 4.52010888946231e-06,
717
+ "loss": 0.2344,
718
+ "step": 2750
719
+ },
720
+ {
721
+ "epoch": 34.9907120743034,
722
+ "grad_norm": 1.1831104755401611,
723
+ "learning_rate": 4.1457056623005954e-06,
724
+ "loss": 0.2319,
725
+ "step": 2800
726
+ },
727
+ {
728
+ "epoch": 34.9907120743034,
729
+ "eval_accuracy": 0.9140660334690185,
730
+ "eval_f1": 0.912709884455563,
731
+ "eval_loss": 0.2123376727104187,
732
+ "eval_precision": 0.9167245062463679,
733
+ "eval_recall": 0.9140660334690185,
734
+ "eval_runtime": 10.0267,
735
+ "eval_samples_per_second": 220.512,
736
+ "eval_steps_per_second": 27.626,
737
+ "step": 2800
738
+ },
739
+ {
740
+ "epoch": 35.61919504643963,
741
+ "grad_norm": 1.047865629196167,
742
+ "learning_rate": 3.7833942662992286e-06,
743
+ "loss": 0.2253,
744
+ "step": 2850
745
+ },
746
+ {
747
+ "epoch": 36.247678018575854,
748
+ "grad_norm": 1.880428433418274,
749
+ "learning_rate": 3.4339230424153225e-06,
750
+ "loss": 0.222,
751
+ "step": 2900
752
+ },
753
+ {
754
+ "epoch": 36.247678018575854,
755
+ "eval_accuracy": 0.9181365897783809,
756
+ "eval_f1": 0.9171239767848,
757
+ "eval_loss": 0.2052914798259735,
758
+ "eval_precision": 0.9196609395815928,
759
+ "eval_recall": 0.9181365897783809,
760
+ "eval_runtime": 10.0315,
761
+ "eval_samples_per_second": 220.405,
762
+ "eval_steps_per_second": 27.613,
763
+ "step": 2900
764
+ },
765
+ {
766
+ "epoch": 36.86687306501548,
767
+ "grad_norm": 1.5235450267791748,
768
+ "learning_rate": 3.098013810699404e-06,
769
+ "loss": 0.2256,
770
+ "step": 2950
771
+ },
772
+ {
773
+ "epoch": 37.4953560371517,
774
+ "grad_norm": 1.710253357887268,
775
+ "learning_rate": 2.776360379402445e-06,
776
+ "loss": 0.2235,
777
+ "step": 3000
778
+ },
779
+ {
780
+ "epoch": 37.4953560371517,
781
+ "eval_accuracy": 0.9140660334690185,
782
+ "eval_f1": 0.9127386875836824,
783
+ "eval_loss": 0.21209371089935303,
784
+ "eval_precision": 0.9165890729592928,
785
+ "eval_recall": 0.9140660334690185,
786
+ "eval_runtime": 10.0492,
787
+ "eval_samples_per_second": 220.018,
788
+ "eval_steps_per_second": 27.564,
789
+ "step": 3000
790
+ },
791
+ {
792
+ "epoch": 38.12383900928793,
793
+ "grad_norm": 1.8632524013519287,
794
+ "learning_rate": 2.469627111940258e-06,
795
+ "loss": 0.2207,
796
+ "step": 3050
797
+ },
798
+ {
799
+ "epoch": 38.743034055727556,
800
+ "grad_norm": 1.2123332023620605,
801
+ "learning_rate": 2.178447554675136e-06,
802
+ "loss": 0.2221,
803
+ "step": 3100
804
+ },
805
+ {
806
+ "epoch": 38.743034055727556,
807
+ "eval_accuracy": 0.9194934418815016,
808
+ "eval_f1": 0.9188000621156364,
809
+ "eval_loss": 0.20125794410705566,
810
+ "eval_precision": 0.9199821991038324,
811
+ "eval_recall": 0.9194934418815016,
812
+ "eval_runtime": 10.0477,
813
+ "eval_samples_per_second": 220.05,
814
+ "eval_steps_per_second": 27.568,
815
+ "step": 3100
816
+ },
817
+ {
818
+ "epoch": 39.371517027863774,
819
+ "grad_norm": 1.6572494506835938,
820
+ "learning_rate": 1.903423128348959e-06,
821
+ "loss": 0.2411,
822
+ "step": 3150
823
+ },
824
+ {
825
+ "epoch": 39.9907120743034,
826
+ "grad_norm": 1.047083854675293,
827
+ "learning_rate": 1.6451218858706374e-06,
828
+ "loss": 0.2262,
829
+ "step": 3200
830
+ },
831
+ {
832
+ "epoch": 39.9907120743034,
833
+ "eval_accuracy": 0.9222071460877431,
834
+ "eval_f1": 0.9213530294983667,
835
+ "eval_loss": 0.20286186039447784,
836
+ "eval_precision": 0.9233953907433776,
837
+ "eval_recall": 0.9222071460877431,
838
+ "eval_runtime": 10.0854,
839
+ "eval_samples_per_second": 219.227,
840
+ "eval_steps_per_second": 27.465,
841
+ "step": 3200
842
+ },
843
+ {
844
+ "epoch": 40.61919504643963,
845
+ "grad_norm": 1.171499490737915,
846
+ "learning_rate": 1.4040773390235463e-06,
847
+ "loss": 0.2213,
848
+ "step": 3250
849
+ },
850
+ {
851
+ "epoch": 41.247678018575854,
852
+ "grad_norm": 2.6431760787963867,
853
+ "learning_rate": 1.1807873565164507e-06,
854
+ "loss": 0.2171,
855
+ "step": 3300
856
+ },
857
+ {
858
+ "epoch": 41.247678018575854,
859
+ "eval_accuracy": 0.9181365897783809,
860
+ "eval_f1": 0.9169933978820265,
861
+ "eval_loss": 0.2075406163930893,
862
+ "eval_precision": 0.9202292831462632,
863
+ "eval_recall": 0.9181365897783809,
864
+ "eval_runtime": 10.0343,
865
+ "eval_samples_per_second": 220.344,
866
+ "eval_steps_per_second": 27.605,
867
+ "step": 3300
868
+ },
869
+ {
870
+ "epoch": 41.86687306501548,
871
+ "grad_norm": 1.0989552736282349,
872
+ "learning_rate": 9.757131356538408e-07,
873
+ "loss": 0.2162,
874
+ "step": 3350
875
+ },
876
+ {
877
+ "epoch": 42.4953560371517,
878
+ "grad_norm": 1.126570463180542,
879
+ "learning_rate": 7.892782497497642e-07,
880
+ "loss": 0.2268,
881
+ "step": 3400
882
+ },
883
+ {
884
+ "epoch": 42.4953560371517,
885
+ "eval_accuracy": 0.919041157847128,
886
+ "eval_f1": 0.9179885980860357,
887
+ "eval_loss": 0.2045469731092453,
888
+ "eval_precision": 0.9208108708737649,
889
+ "eval_recall": 0.919041157847128,
890
+ "eval_runtime": 10.0534,
891
+ "eval_samples_per_second": 219.925,
892
+ "eval_steps_per_second": 27.553,
893
+ "step": 3400
894
+ },
895
+ {
896
+ "epoch": 43.12383900928793,
897
+ "grad_norm": 1.0992859601974487,
898
+ "learning_rate": 6.218677732526035e-07,
899
+ "loss": 0.2284,
900
+ "step": 3450
901
+ },
902
+ {
903
+ "epoch": 43.743034055727556,
904
+ "grad_norm": 0.8807310461997986,
905
+ "learning_rate": 4.7382748638786336e-07,
906
+ "loss": 0.2222,
907
+ "step": 3500
908
+ },
909
+ {
910
+ "epoch": 43.743034055727556,
911
+ "eval_accuracy": 0.9203980099502488,
912
+ "eval_f1": 0.9193757241191305,
913
+ "eval_loss": 0.20502084493637085,
914
+ "eval_precision": 0.9221532972644962,
915
+ "eval_recall": 0.9203980099502488,
916
+ "eval_runtime": 10.0674,
917
+ "eval_samples_per_second": 219.619,
918
+ "eval_steps_per_second": 27.514,
919
+ "step": 3500
920
+ },
921
+ {
922
+ "epoch": 44.371517027863774,
923
+ "grad_norm": 1.0601508617401123,
924
+ "learning_rate": 3.454631609617487e-07,
925
+ "loss": 0.2232,
926
+ "step": 3550
927
+ },
928
+ {
929
+ "epoch": 44.9907120743034,
930
+ "grad_norm": 1.5610824823379517,
931
+ "learning_rate": 2.370399288006664e-07,
932
+ "loss": 0.2169,
933
+ "step": 3600
934
+ },
935
+ {
936
+ "epoch": 44.9907120743034,
937
+ "eval_accuracy": 0.9176843057440073,
938
+ "eval_f1": 0.9165481260183203,
939
+ "eval_loss": 0.20695888996124268,
940
+ "eval_precision": 0.9197013630605291,
941
+ "eval_recall": 0.9176843057440073,
942
+ "eval_runtime": 10.0683,
943
+ "eval_samples_per_second": 219.6,
944
+ "eval_steps_per_second": 27.512,
945
+ "step": 3600
946
+ },
947
+ {
948
+ "epoch": 45.61919504643963,
949
+ "grad_norm": 1.097316861152649,
950
+ "learning_rate": 1.4878173413111485e-07,
951
+ "loss": 0.2269,
952
+ "step": 3650
953
+ },
954
+ {
955
+ "epoch": 46.247678018575854,
956
+ "grad_norm": 1.628519892692566,
957
+ "learning_rate": 8.087087103106461e-08,
958
+ "loss": 0.2245,
959
+ "step": 3700
960
+ },
961
+ {
962
+ "epoch": 46.247678018575854,
963
+ "eval_accuracy": 0.9181365897783809,
964
+ "eval_f1": 0.9170198637903132,
965
+ "eval_loss": 0.20643840730190277,
966
+ "eval_precision": 0.9201088948883855,
967
+ "eval_recall": 0.9181365897783809,
968
+ "eval_runtime": 10.0506,
969
+ "eval_samples_per_second": 219.986,
970
+ "eval_steps_per_second": 27.56,
971
+ "step": 3700
972
+ },
973
+ {
974
+ "epoch": 46.86687306501548,
975
+ "grad_norm": 0.9928280115127563,
976
+ "learning_rate": 3.3447606908196815e-08,
977
+ "loss": 0.212,
978
+ "step": 3750
979
+ },
980
+ {
981
+ "epoch": 47.4953560371517,
982
+ "grad_norm": 1.0919773578643799,
983
+ "learning_rate": 6.609892782699634e-09,
984
+ "loss": 0.2148,
985
+ "step": 3800
986
+ },
987
+ {
988
+ "epoch": 47.4953560371517,
989
+ "eval_accuracy": 0.9181365897783809,
990
+ "eval_f1": 0.9170198637903132,
991
+ "eval_loss": 0.20658649504184723,
992
+ "eval_precision": 0.9201088948883855,
993
+ "eval_recall": 0.9181365897783809,
994
+ "eval_runtime": 10.0329,
995
+ "eval_samples_per_second": 220.375,
996
+ "eval_steps_per_second": 27.609,
997
+ "step": 3800
998
+ },
999
+ {
1000
+ "epoch": 47.9907120743034,
1001
+ "step": 3840,
1002
+ "total_flos": 2.738563601264935e+19,
1003
+ "train_loss": 0.3012936460475127,
1004
+ "train_runtime": 3086.985,
1005
+ "train_samples_per_second": 80.203,
1006
+ "train_steps_per_second": 1.244
1007
+ }
1008
+ ],
1009
+ "logging_steps": 50,
1010
+ "max_steps": 3840,
1011
+ "num_input_tokens_seen": 0,
1012
+ "num_train_epochs": 48,
1013
+ "save_steps": 100,
1014
+ "stateful_callbacks": {
1015
+ "TrainerControl": {
1016
+ "args": {
1017
+ "should_epoch_stop": false,
1018
+ "should_evaluate": false,
1019
+ "should_log": false,
1020
+ "should_save": true,
1021
+ "should_training_stop": true
1022
+ },
1023
+ "attributes": {}
1024
+ }
1025
+ },
1026
+ "total_flos": 2.738563601264935e+19,
1027
+ "train_batch_size": 16,
1028
+ "trial_name": null,
1029
+ "trial_params": null
1030
+ }