Text Classification
Transformers
Safetensors
English
llama
text-generation-inference
Nessii013 commited on
Commit
7240e26
·
verified ·
1 Parent(s): e6faf74

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +456 -0
trainer_state.json ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 50,
6
+ "global_step": 320,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03125,
13
+ "grad_norm": 159.0,
14
+ "learning_rate": 1.8750000000000002e-05,
15
+ "loss": 1.1247,
16
+ "num_input_tokens_seen": 1465248,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.0625,
21
+ "grad_norm": 65.0,
22
+ "learning_rate": 2.9987186375809513e-05,
23
+ "loss": 0.638,
24
+ "num_input_tokens_seen": 2893200,
25
+ "step": 20
26
+ },
27
+ {
28
+ "epoch": 0.09375,
29
+ "grad_norm": 10.0625,
30
+ "learning_rate": 2.984328439990804e-05,
31
+ "loss": 0.2887,
32
+ "num_input_tokens_seen": 4373616,
33
+ "step": 30
34
+ },
35
+ {
36
+ "epoch": 0.125,
37
+ "grad_norm": 14.875,
38
+ "learning_rate": 2.9541003989089956e-05,
39
+ "loss": 0.2392,
40
+ "num_input_tokens_seen": 5755248,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 0.15625,
45
+ "grad_norm": 9.8125,
46
+ "learning_rate": 2.9083570487361445e-05,
47
+ "loss": 0.226,
48
+ "num_input_tokens_seen": 7213808,
49
+ "step": 50
50
+ },
51
+ {
52
+ "epoch": 0.1875,
53
+ "grad_norm": 19.0,
54
+ "learning_rate": 2.8475864728379682e-05,
55
+ "loss": 0.2489,
56
+ "num_input_tokens_seen": 8649584,
57
+ "step": 60
58
+ },
59
+ {
60
+ "epoch": 0.21875,
61
+ "grad_norm": 27.75,
62
+ "learning_rate": 2.7724370956763605e-05,
63
+ "loss": 0.2462,
64
+ "num_input_tokens_seen": 10054416,
65
+ "step": 70
66
+ },
67
+ {
68
+ "epoch": 0.25,
69
+ "grad_norm": 12.5,
70
+ "learning_rate": 2.6837107640945904e-05,
71
+ "loss": 0.2133,
72
+ "num_input_tokens_seen": 11521216,
73
+ "step": 80
74
+ },
75
+ {
76
+ "epoch": 0.28125,
77
+ "grad_norm": 8.625,
78
+ "learning_rate": 2.5823541915795932e-05,
79
+ "loss": 0.2035,
80
+ "num_input_tokens_seen": 12912576,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 0.3125,
85
+ "grad_norm": 4.25,
86
+ "learning_rate": 2.469448856791411e-05,
87
+ "loss": 0.2226,
88
+ "num_input_tokens_seen": 14369728,
89
+ "step": 100
90
+ },
91
+ {
92
+ "epoch": 0.34375,
93
+ "grad_norm": 22.625,
94
+ "learning_rate": 2.3461994641428768e-05,
95
+ "loss": 0.2314,
96
+ "num_input_tokens_seen": 15796256,
97
+ "step": 110
98
+ },
99
+ {
100
+ "epoch": 0.375,
101
+ "grad_norm": 19.875,
102
+ "learning_rate": 2.2139210895556104e-05,
103
+ "loss": 0.2564,
104
+ "num_input_tokens_seen": 17240400,
105
+ "step": 120
106
+ },
107
+ {
108
+ "epoch": 0.40625,
109
+ "grad_norm": 7.78125,
110
+ "learning_rate": 2.074025148547635e-05,
111
+ "loss": 0.2442,
112
+ "num_input_tokens_seen": 18663216,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 0.4375,
117
+ "grad_norm": 12.125,
118
+ "learning_rate": 1.928004336373658e-05,
119
+ "loss": 0.2115,
120
+ "num_input_tokens_seen": 20069056,
121
+ "step": 140
122
+ },
123
+ {
124
+ "epoch": 0.46875,
125
+ "grad_norm": 21.0,
126
+ "learning_rate": 1.777416700907338e-05,
127
+ "loss": 0.1656,
128
+ "num_input_tokens_seen": 21471248,
129
+ "step": 150
130
+ },
131
+ {
132
+ "epoch": 0.5,
133
+ "grad_norm": 10.6875,
134
+ "learning_rate": 1.623869018208499e-05,
135
+ "loss": 0.1906,
136
+ "num_input_tokens_seen": 22898704,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 0.53125,
141
+ "grad_norm": 8.4375,
142
+ "learning_rate": 1.4689996481586688e-05,
143
+ "loss": 0.1883,
144
+ "num_input_tokens_seen": 24365264,
145
+ "step": 170
146
+ },
147
+ {
148
+ "epoch": 0.5625,
149
+ "grad_norm": 14.0,
150
+ "learning_rate": 1.3144610530959784e-05,
151
+ "loss": 0.2115,
152
+ "num_input_tokens_seen": 25870176,
153
+ "step": 180
154
+ },
155
+ {
156
+ "epoch": 0.59375,
157
+ "grad_norm": 16.25,
158
+ "learning_rate": 1.1619021659762912e-05,
159
+ "loss": 0.1644,
160
+ "num_input_tokens_seen": 27346880,
161
+ "step": 190
162
+ },
163
+ {
164
+ "epoch": 0.625,
165
+ "grad_norm": 8.375,
166
+ "learning_rate": 1.0129507961929749e-05,
167
+ "loss": 0.2046,
168
+ "num_input_tokens_seen": 28825968,
169
+ "step": 200
170
+ },
171
+ {
172
+ "epoch": 0.65625,
173
+ "grad_norm": 13.3125,
174
+ "learning_rate": 8.691962607859386e-06,
175
+ "loss": 0.1767,
176
+ "num_input_tokens_seen": 30278096,
177
+ "step": 210
178
+ },
179
+ {
180
+ "epoch": 0.6875,
181
+ "grad_norm": 6.96875,
182
+ "learning_rate": 7.321724263655989e-06,
183
+ "loss": 0.1739,
184
+ "num_input_tokens_seen": 31701248,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 0.71875,
189
+ "grad_norm": 3.765625,
190
+ "learning_rate": 6.0334134269513865e-06,
191
+ "loss": 0.1648,
192
+ "num_input_tokens_seen": 33175280,
193
+ "step": 230
194
+ },
195
+ {
196
+ "epoch": 0.75,
197
+ "grad_norm": 37.5,
198
+ "learning_rate": 4.840776425613887e-06,
199
+ "loss": 0.1662,
200
+ "num_input_tokens_seen": 34680976,
201
+ "step": 240
202
+ },
203
+ {
204
+ "epoch": 0.78125,
205
+ "grad_norm": 6.3125,
206
+ "learning_rate": 3.756538743883111e-06,
207
+ "loss": 0.1867,
208
+ "num_input_tokens_seen": 36137360,
209
+ "step": 250
210
+ },
211
+ {
212
+ "epoch": 0.8125,
213
+ "grad_norm": 3.46875,
214
+ "learning_rate": 2.792269240947076e-06,
215
+ "loss": 0.1601,
216
+ "num_input_tokens_seen": 37577552,
217
+ "step": 260
218
+ },
219
+ {
220
+ "epoch": 0.84375,
221
+ "grad_norm": 6.0,
222
+ "learning_rate": 1.958256710754496e-06,
223
+ "loss": 0.1679,
224
+ "num_input_tokens_seen": 39032832,
225
+ "step": 270
226
+ },
227
+ {
228
+ "epoch": 0.875,
229
+ "grad_norm": 8.125,
230
+ "learning_rate": 1.2634001001741375e-06,
231
+ "loss": 0.1518,
232
+ "num_input_tokens_seen": 40447376,
233
+ "step": 280
234
+ },
235
+ {
236
+ "epoch": 0.90625,
237
+ "grad_norm": 5.96875,
238
+ "learning_rate": 7.151135568777839e-07,
239
+ "loss": 0.171,
240
+ "num_input_tokens_seen": 41890144,
241
+ "step": 290
242
+ },
243
+ {
244
+ "epoch": 0.9375,
245
+ "grad_norm": 17.5,
246
+ "learning_rate": 3.192473200896828e-07,
247
+ "loss": 0.1702,
248
+ "num_input_tokens_seen": 43291456,
249
+ "step": 300
250
+ },
251
+ {
252
+ "epoch": 0.96875,
253
+ "grad_norm": 20.5,
254
+ "learning_rate": 8.002529830136163e-08,
255
+ "loss": 0.1595,
256
+ "num_input_tokens_seen": 44726672,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 1.0,
261
+ "grad_norm": 13.3125,
262
+ "learning_rate": 0.0,
263
+ "loss": 0.1722,
264
+ "num_input_tokens_seen": 46209888,
265
+ "step": 320
266
+ },
267
+ {
268
+ "epoch": 1.0,
269
+ "eval_cnn_balanced_accuracy": 0.5759533564450047,
270
+ "eval_cnn_f1_score": 0.2608695652173913,
271
+ "eval_cnn_loss": 0.31287845969200134,
272
+ "eval_cnn_pr_auc": 0.42626378805395937,
273
+ "eval_cnn_roc_auc": 0.7679027909094093,
274
+ "eval_cnn_runtime": 10.722,
275
+ "eval_cnn_samples_per_second": 52.043,
276
+ "eval_cnn_steps_per_second": 0.839,
277
+ "num_input_tokens_seen": 46209888,
278
+ "step": 320
279
+ },
280
+ {
281
+ "epoch": 1.0,
282
+ "eval_xsum_balanced_accuracy": 0.749277038750723,
283
+ "eval_xsum_f1_score": 0.7258382642998027,
284
+ "eval_xsum_loss": 0.5868125557899475,
285
+ "eval_xsum_pr_auc": 0.7976212402287128,
286
+ "eval_xsum_roc_auc": 0.8217916586337639,
287
+ "eval_xsum_runtime": 9.7581,
288
+ "eval_xsum_samples_per_second": 57.183,
289
+ "eval_xsum_steps_per_second": 0.922,
290
+ "num_input_tokens_seen": 46209888,
291
+ "step": 320
292
+ },
293
+ {
294
+ "epoch": 1.0,
295
+ "eval_medias_balanced_accuracy": 0.7074867769288893,
296
+ "eval_medias_f1_score": 0.5704225352112676,
297
+ "eval_medias_loss": 0.45626452565193176,
298
+ "eval_medias_pr_auc": 0.6584352779944264,
299
+ "eval_medias_roc_auc": 0.7960236336159854,
300
+ "eval_medias_runtime": 9.9504,
301
+ "eval_medias_samples_per_second": 72.962,
302
+ "eval_medias_steps_per_second": 1.206,
303
+ "num_input_tokens_seen": 46209888,
304
+ "step": 320
305
+ },
306
+ {
307
+ "epoch": 1.0,
308
+ "eval_meetb_balanced_accuracy": 0.7702572347266881,
309
+ "eval_meetb_f1_score": 0.6498194945848376,
310
+ "eval_meetb_loss": 0.316259503364563,
311
+ "eval_meetb_pr_auc": 0.7402648937803408,
312
+ "eval_meetb_roc_auc": 0.8713504823151126,
313
+ "eval_meetb_runtime": 10.6662,
314
+ "eval_meetb_samples_per_second": 72.378,
315
+ "eval_meetb_steps_per_second": 1.219,
316
+ "num_input_tokens_seen": 46209888,
317
+ "step": 320
318
+ },
319
+ {
320
+ "epoch": 1.0,
321
+ "eval_wice_balanced_accuracy": 0.8154247364773681,
322
+ "eval_wice_f1_score": 0.8679245283018868,
323
+ "eval_wice_loss": 0.4058681130409241,
324
+ "eval_wice_pr_auc": 0.9493679290266217,
325
+ "eval_wice_roc_auc": 0.8869679395995185,
326
+ "eval_wice_runtime": 123.4927,
327
+ "eval_wice_samples_per_second": 2.899,
328
+ "eval_wice_steps_per_second": 0.049,
329
+ "num_input_tokens_seen": 46209888,
330
+ "step": 320
331
+ },
332
+ {
333
+ "epoch": 1.0,
334
+ "eval_reveal_balanced_accuracy": 0.8941412213740458,
335
+ "eval_reveal_f1_score": 0.9196141479099679,
336
+ "eval_reveal_loss": 0.2713985741138458,
337
+ "eval_reveal_pr_auc": 0.9884053490966663,
338
+ "eval_reveal_roc_auc": 0.960442748091603,
339
+ "eval_reveal_runtime": 7.8148,
340
+ "eval_reveal_samples_per_second": 218.815,
341
+ "eval_reveal_steps_per_second": 3.455,
342
+ "num_input_tokens_seen": 46209888,
343
+ "step": 320
344
+ },
345
+ {
346
+ "epoch": 1.0,
347
+ "eval_claim_verify_balanced_accuracy": 0.7654793545023335,
348
+ "eval_claim_verify_f1_score": 0.6621160409556314,
349
+ "eval_claim_verify_loss": 0.47898775339126587,
350
+ "eval_claim_verify_pr_auc": 0.6755500302254687,
351
+ "eval_claim_verify_roc_auc": 0.8519738375912951,
352
+ "eval_claim_verify_runtime": 261.7647,
353
+ "eval_claim_verify_samples_per_second": 4.156,
354
+ "eval_claim_verify_steps_per_second": 0.065,
355
+ "num_input_tokens_seen": 46209888,
356
+ "step": 320
357
+ },
358
+ {
359
+ "epoch": 1.0,
360
+ "eval_fact_check_balanced_accuracy": 0.7718129805113534,
361
+ "eval_fact_check_f1_score": 0.9008264462809917,
362
+ "eval_fact_check_loss": 0.39186760783195496,
363
+ "eval_fact_check_pr_auc": 0.9477958353480682,
364
+ "eval_fact_check_roc_auc": 0.8752134364384051,
365
+ "eval_fact_check_runtime": 5.283,
366
+ "eval_fact_check_samples_per_second": 296.421,
367
+ "eval_fact_check_steps_per_second": 4.732,
368
+ "num_input_tokens_seen": 46209888,
369
+ "step": 320
370
+ },
371
+ {
372
+ "epoch": 1.0,
373
+ "eval_expertqa_balanced_accuracy": 0.5946286054753636,
374
+ "eval_expertqa_f1_score": 0.36883320281910725,
375
+ "eval_expertqa_loss": 1.1040500402450562,
376
+ "eval_expertqa_pr_auc": 0.2890662808738864,
377
+ "eval_expertqa_roc_auc": 0.6258950520788967,
378
+ "eval_expertqa_runtime": 536.4175,
379
+ "eval_expertqa_samples_per_second": 6.901,
380
+ "eval_expertqa_steps_per_second": 0.108,
381
+ "num_input_tokens_seen": 46209888,
382
+ "step": 320
383
+ },
384
+ {
385
+ "epoch": 1.0,
386
+ "eval_lfqa_balanced_accuracy": 0.8775737079235312,
387
+ "eval_lfqa_f1_score": 0.8557275541795665,
388
+ "eval_lfqa_loss": 0.27868813276290894,
389
+ "eval_lfqa_pr_auc": 0.9362703923849833,
390
+ "eval_lfqa_roc_auc": 0.9523769464424847,
391
+ "eval_lfqa_runtime": 12.7663,
392
+ "eval_lfqa_samples_per_second": 149.691,
393
+ "eval_lfqa_steps_per_second": 2.35,
394
+ "num_input_tokens_seen": 46209888,
395
+ "step": 320
396
+ },
397
+ {
398
+ "epoch": 1.0,
399
+ "eval_ragtruth_balanced_accuracy": 0.7859350741200968,
400
+ "eval_ragtruth_f1_score": 0.5078776645041705,
401
+ "eval_ragtruth_loss": 0.26025664806365967,
402
+ "eval_ragtruth_pr_auc": 0.4400226298715947,
403
+ "eval_ragtruth_roc_auc": 0.8972721245465168,
404
+ "eval_ragtruth_runtime": 149.8477,
405
+ "eval_ragtruth_samples_per_second": 109.251,
406
+ "eval_ragtruth_steps_per_second": 1.708,
407
+ "num_input_tokens_seen": 46209888,
408
+ "step": 320
409
+ },
410
+ {
411
+ "epoch": 1.0,
412
+ "eval_halloumi_synthetic_balanced_accuracy": 0.7681937112954458,
413
+ "eval_halloumi_synthetic_f1_score": 0.6775431861804223,
414
+ "eval_halloumi_synthetic_loss": 0.34176450967788696,
415
+ "eval_halloumi_synthetic_pr_auc": 0.7894696954251237,
416
+ "eval_halloumi_synthetic_roc_auc": 0.9044560224857751,
417
+ "eval_halloumi_synthetic_runtime": 38.8927,
418
+ "eval_halloumi_synthetic_samples_per_second": 53.712,
419
+ "eval_halloumi_synthetic_steps_per_second": 0.848,
420
+ "num_input_tokens_seen": 46209888,
421
+ "step": 320
422
+ },
423
+ {
424
+ "epoch": 1.0,
425
+ "num_input_tokens_seen": 46209888,
426
+ "step": 320,
427
+ "total_flos": 2.418947678713938e+17,
428
+ "train_loss": 0.2418923556804657,
429
+ "train_runtime": 3758.6466,
430
+ "train_samples_per_second": 10.887,
431
+ "train_steps_per_second": 0.085,
432
+ "train_tokens_per_second": 1545.744
433
+ }
434
+ ],
435
+ "logging_steps": 10,
436
+ "max_steps": 320,
437
+ "num_input_tokens_seen": 46209888,
438
+ "num_train_epochs": 1,
439
+ "save_steps": 0,
440
+ "stateful_callbacks": {
441
+ "TrainerControl": {
442
+ "args": {
443
+ "should_epoch_stop": false,
444
+ "should_evaluate": false,
445
+ "should_log": false,
446
+ "should_save": false,
447
+ "should_training_stop": false
448
+ },
449
+ "attributes": {}
450
+ }
451
+ },
452
+ "total_flos": 2.418947678713938e+17,
453
+ "train_batch_size": 16,
454
+ "trial_name": null,
455
+ "trial_params": null
456
+ }