hossay commited on
Commit
34d11b8
·
1 Parent(s): 35e862d

🍻 cheers

Browse files
README.md CHANGED
@@ -24,10 +24,10 @@ model-index:
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
- value: 0.8171064604185623
28
  - name: F1
29
  type: f1
30
- value: 0.7841031149301826
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -37,12 +37,12 @@ should probably proofread and complete it, then remove this comment. -->
37
 
38
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the stool-image dataset.
39
  It achieves the following results on the evaluation set:
40
- - Loss: 0.4538
41
- - Auroc: 0.8897
42
- - Accuracy: 0.8171
43
- - Sensitivity: 0.8111
44
- - Specificty: 0.8213
45
- - F1: 0.7841
46
 
47
  ## Model description
48
 
@@ -67,19 +67,15 @@ The following hyperparameters were used during training:
67
  - seed: 42
68
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
69
  - lr_scheduler_type: linear
70
- - num_epochs: 100
71
  - mixed_precision_training: Native AMP
72
 
73
  ### Training results
74
 
75
  | Training Loss | Epoch | Step | Validation Loss | Auroc | Accuracy | Sensitivity | Specificty | F1 |
76
  |:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:-----------:|:----------:|:------:|
77
- | 0.5303 | 0.98 | 100 | 0.4327 | 0.8826 | 0.7942 | 0.7191 | 0.8607 | 0.7665 |
78
- | 0.3909 | 1.96 | 200 | 0.5196 | 0.8675 | 0.8047 | 0.8539 | 0.7612 | 0.8042 |
79
- | 0.5328 | 2.94 | 300 | 0.4421 | 0.8864 | 0.8074 | 0.7528 | 0.8557 | 0.7859 |
80
- | 0.4834 | 3.92 | 400 | 0.4721 | 0.8596 | 0.7757 | 0.7135 | 0.8308 | 0.7493 |
81
- | 0.4209 | 4.9 | 500 | 0.4797 | 0.8625 | 0.7863 | 0.6798 | 0.8806 | 0.7492 |
82
- | 0.4567 | 5.88 | 600 | 0.5150 | 0.8688 | 0.7942 | 0.6011 | 0.9652 | 0.7329 |
83
 
84
 
85
  ### Framework versions
 
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
+ value: 0.827115559599636
28
  - name: F1
29
  type: f1
30
+ value: 0.7978723404255319
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
37
 
38
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the stool-image dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 0.3761
41
+ - Auroc: 0.9189
42
+ - Accuracy: 0.8271
43
+ - Sensitivity: 0.8333
44
+ - Specificty: 0.8228
45
+ - F1: 0.7979
46
 
47
  ## Model description
48
 
 
67
  - seed: 42
68
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
69
  - lr_scheduler_type: linear
70
+ - num_epochs: 2
71
  - mixed_precision_training: Native AMP
72
 
73
  ### Training results
74
 
75
  | Training Loss | Epoch | Step | Validation Loss | Auroc | Accuracy | Sensitivity | Specificty | F1 |
76
  |:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|:-----------:|:----------:|:------:|
77
+ | 0.3755 | 0.98 | 100 | 0.4455 | 0.8677 | 0.7757 | 0.6966 | 0.8458 | 0.7447 |
78
+ | 0.309 | 1.96 | 200 | 0.4360 | 0.8754 | 0.8127 | 0.7528 | 0.8657 | 0.7906 |
 
 
 
 
79
 
80
 
81
  ### Framework versions
all_results.json CHANGED
@@ -1,16 +1,17 @@
1
  {
2
- "epoch": 5.88,
3
- "eval_accuracy": 0.8171064604185623,
4
- "eval_auroc": 0.8896644410203732,
5
- "eval_f1": 0.7841031149301826,
6
- "eval_loss": 0.45378199219703674,
7
- "eval_runtime": 63.9212,
8
- "eval_samples_per_second": 17.193,
9
- "eval_sensitivity": 0.8111111111111111,
10
- "eval_specificty": 0.8212634822804314,
11
- "eval_steps_per_second": 2.159,
12
- "train_loss": 0.4638279656569163,
13
- "train_runtime": 457.0802,
14
- "train_samples_per_second": 355.517,
15
- "train_steps_per_second": 22.316
 
16
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9320388349514563,
4
+ "eval_auroc": 0.9441448382126347,
5
+ "eval_eval_specificty": 0.8863636363636364,
6
+ "eval_f1": 0.9176470588235294,
7
+ "eval_loss": 0.37609395384788513,
8
+ "eval_runtime": 70.0505,
9
+ "eval_samples_per_second": 15.689,
10
+ "eval_sensitivity": 0.8863636363636364,
11
+ "eval_specificty": 0.8228043143297381,
12
+ "eval_steps_per_second": 1.97,
13
+ "train_loss": 0.4438221442933176,
14
+ "train_runtime": 152.958,
15
+ "train_samples_per_second": 21.248,
16
+ "train_steps_per_second": 1.334
17
  }
eval_results.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
- "epoch": 5.88,
3
- "eval_accuracy": 0.8171064604185623,
4
- "eval_auroc": 0.8896644410203732,
5
- "eval_f1": 0.7841031149301826,
6
- "eval_loss": 0.45378199219703674,
7
- "eval_runtime": 63.9212,
8
- "eval_samples_per_second": 17.193,
9
- "eval_sensitivity": 0.8111111111111111,
10
- "eval_specificty": 0.8212634822804314,
11
- "eval_steps_per_second": 2.159
 
12
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9320388349514563,
4
+ "eval_auroc": 0.9441448382126347,
5
+ "eval_eval_specificty": 0.8863636363636364,
6
+ "eval_f1": 0.9176470588235294,
7
+ "eval_loss": 0.37609395384788513,
8
+ "eval_runtime": 70.0505,
9
+ "eval_samples_per_second": 15.689,
10
+ "eval_sensitivity": 0.8863636363636364,
11
+ "eval_specificty": 0.8228043143297381,
12
+ "eval_steps_per_second": 1.97
13
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bce917709538915003c0c8fe4efef00dae13bd7d04517b013edb5b0d28d71e17
3
  size 343223968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e65a419570bcc528d2259195bf6cef77c24c492a4522dbe45e38c726a6a8a1
3
  size 343223968
runs/Jan05_17-26-06_DESKTOP-BDBS5RV/events.out.tfevents.1704443688.DESKTOP-BDBS5RV CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09b62303b6c67ec8004a1373b7d8162f85881f93156735bc5dc4c83946ad955c
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371e35b19c483e082c64794a04cb3d55c6f3e5fe391e4302a4aa9cb8102a4a95
3
+ size 567
runs/Jan06_12-28-05_DESKTOP-BDBS5RV/events.out.tfevents.1704511685.DESKTOP-BDBS5RV ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe0d6bce62a3e37322c977b8cff2445b9932eb02a04b027b55d5b5cae9e816a
3
+ size 8853
runs/Jan06_12-28-05_DESKTOP-BDBS5RV/events.out.tfevents.1704511909.DESKTOP-BDBS5RV ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e59688d86c65619588f4e21bee0cdc0790862a16c985c09988785780ba640c
3
+ size 40
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5.88,
3
- "train_loss": 0.4638279656569163,
4
- "train_runtime": 457.0802,
5
- "train_samples_per_second": 355.517,
6
- "train_steps_per_second": 22.316
7
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "train_loss": 0.4438221442933176,
4
+ "train_runtime": 152.958,
5
+ "train_samples_per_second": 21.248,
6
+ "train_steps_per_second": 1.334
7
  }
trainer_state.json CHANGED
@@ -1,467 +1,175 @@
1
  {
2
- "best_metric": 0.8863966683436749,
3
- "best_model_checkpoint": "./stool-condition-classification\\checkpoint-300",
4
- "epoch": 5.882352941176471,
5
  "eval_steps": 100,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.1,
13
- "learning_rate": 0.00019980392156862745,
14
- "loss": 0.6109,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.2,
19
- "learning_rate": 0.00019960784313725492,
20
- "loss": 0.5518,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.29,
25
- "learning_rate": 0.00019941176470588236,
26
- "loss": 0.4854,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.39,
31
- "learning_rate": 0.0001992156862745098,
32
- "loss": 0.5984,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.49,
37
- "learning_rate": 0.00019901960784313727,
38
- "loss": 0.5832,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.59,
43
- "learning_rate": 0.00019882352941176472,
44
- "loss": 0.5199,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.69,
49
- "learning_rate": 0.00019862745098039218,
50
- "loss": 0.5347,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.78,
55
- "learning_rate": 0.00019843137254901963,
56
- "loss": 0.5159,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.88,
61
- "learning_rate": 0.00019823529411764707,
62
- "loss": 0.5069,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.98,
67
- "learning_rate": 0.00019803921568627454,
68
- "loss": 0.5303,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.98,
73
- "eval_accuracy": 0.7941952506596306,
74
- "eval_auroc": 0.8826094247861815,
75
- "eval_f1": 0.7664670658682635,
76
- "eval_loss": 0.43268465995788574,
77
- "eval_runtime": 19.5335,
78
- "eval_samples_per_second": 19.403,
79
- "eval_sensitivity": 0.7191011235955056,
80
- "eval_specificty": 0.8606965174129353,
81
- "eval_steps_per_second": 2.457,
82
  "step": 100
83
  },
84
  {
85
  "epoch": 1.08,
86
- "learning_rate": 0.00019784313725490198,
87
- "loss": 0.514,
88
  "step": 110
89
  },
90
  {
91
  "epoch": 1.18,
92
- "learning_rate": 0.00019764705882352942,
93
- "loss": 0.3919,
94
  "step": 120
95
  },
96
  {
97
  "epoch": 1.27,
98
- "learning_rate": 0.0001974509803921569,
99
- "loss": 0.5246,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 1.37,
104
- "learning_rate": 0.00019725490196078433,
105
- "loss": 0.5262,
106
  "step": 140
107
  },
108
  {
109
  "epoch": 1.47,
110
- "learning_rate": 0.00019705882352941177,
111
- "loss": 0.4092,
112
  "step": 150
113
  },
114
  {
115
  "epoch": 1.57,
116
- "learning_rate": 0.00019686274509803922,
117
- "loss": 0.6038,
118
  "step": 160
119
  },
120
  {
121
  "epoch": 1.67,
122
- "learning_rate": 0.00019666666666666666,
123
- "loss": 0.5079,
124
  "step": 170
125
  },
126
  {
127
  "epoch": 1.76,
128
- "learning_rate": 0.00019647058823529413,
129
- "loss": 0.4992,
130
  "step": 180
131
  },
132
  {
133
  "epoch": 1.86,
134
- "learning_rate": 0.00019627450980392157,
135
- "loss": 0.4788,
136
  "step": 190
137
  },
138
  {
139
  "epoch": 1.96,
140
- "learning_rate": 0.000196078431372549,
141
- "loss": 0.3909,
142
  "step": 200
143
  },
144
  {
145
  "epoch": 1.96,
146
- "eval_accuracy": 0.8047493403693932,
147
- "eval_auroc": 0.8674604505562076,
148
- "eval_f1": 0.8042328042328041,
149
- "eval_loss": 0.5196136832237244,
150
- "eval_runtime": 19.2864,
151
- "eval_samples_per_second": 19.651,
152
- "eval_sensitivity": 0.8539325842696629,
153
- "eval_specificty": 0.7611940298507462,
154
- "eval_steps_per_second": 2.489,
155
- "step": 200
156
- },
157
- {
158
- "epoch": 2.06,
159
- "learning_rate": 0.00019588235294117648,
160
- "loss": 0.4421,
161
- "step": 210
162
- },
163
- {
164
- "epoch": 2.16,
165
- "learning_rate": 0.00019568627450980392,
166
- "loss": 0.4479,
167
- "step": 220
168
- },
169
- {
170
- "epoch": 2.25,
171
- "learning_rate": 0.00019549019607843136,
172
- "loss": 0.4655,
173
- "step": 230
174
- },
175
- {
176
- "epoch": 2.35,
177
- "learning_rate": 0.00019529411764705883,
178
- "loss": 0.4479,
179
- "step": 240
180
- },
181
- {
182
- "epoch": 2.45,
183
- "learning_rate": 0.00019509803921568628,
184
- "loss": 0.3894,
185
- "step": 250
186
- },
187
- {
188
- "epoch": 2.55,
189
- "learning_rate": 0.00019490196078431372,
190
- "loss": 0.4315,
191
- "step": 260
192
- },
193
- {
194
- "epoch": 2.65,
195
- "learning_rate": 0.0001947058823529412,
196
- "loss": 0.4783,
197
- "step": 270
198
- },
199
- {
200
- "epoch": 2.75,
201
- "learning_rate": 0.00019450980392156863,
202
- "loss": 0.3792,
203
- "step": 280
204
- },
205
- {
206
- "epoch": 2.84,
207
- "learning_rate": 0.0001943137254901961,
208
- "loss": 0.5301,
209
- "step": 290
210
- },
211
- {
212
- "epoch": 2.94,
213
- "learning_rate": 0.00019411764705882354,
214
- "loss": 0.5328,
215
- "step": 300
216
- },
217
- {
218
- "epoch": 2.94,
219
- "eval_accuracy": 0.8073878627968337,
220
- "eval_auroc": 0.8863966683436749,
221
- "eval_f1": 0.7859237536656891,
222
- "eval_loss": 0.4421083927154541,
223
- "eval_runtime": 19.1397,
224
- "eval_samples_per_second": 19.802,
225
  "eval_sensitivity": 0.7528089887640449,
226
- "eval_specificty": 0.8557213930348259,
227
- "eval_steps_per_second": 2.508,
228
- "step": 300
229
- },
230
- {
231
- "epoch": 3.04,
232
- "learning_rate": 0.00019392156862745098,
233
- "loss": 0.4437,
234
- "step": 310
235
- },
236
- {
237
- "epoch": 3.14,
238
- "learning_rate": 0.00019372549019607845,
239
- "loss": 0.3859,
240
- "step": 320
241
- },
242
- {
243
- "epoch": 3.24,
244
- "learning_rate": 0.0001935294117647059,
245
- "loss": 0.4995,
246
- "step": 330
247
- },
248
- {
249
- "epoch": 3.33,
250
- "learning_rate": 0.00019333333333333333,
251
- "loss": 0.4282,
252
- "step": 340
253
- },
254
- {
255
- "epoch": 3.43,
256
- "learning_rate": 0.0001931372549019608,
257
- "loss": 0.4742,
258
- "step": 350
259
- },
260
- {
261
- "epoch": 3.53,
262
- "learning_rate": 0.00019294117647058825,
263
- "loss": 0.4219,
264
- "step": 360
265
- },
266
- {
267
- "epoch": 3.63,
268
- "learning_rate": 0.0001927450980392157,
269
- "loss": 0.4531,
270
- "step": 370
271
- },
272
- {
273
- "epoch": 3.73,
274
- "learning_rate": 0.00019254901960784316,
275
- "loss": 0.545,
276
- "step": 380
277
- },
278
- {
279
- "epoch": 3.82,
280
- "learning_rate": 0.0001923529411764706,
281
- "loss": 0.4216,
282
- "step": 390
283
- },
284
- {
285
- "epoch": 3.92,
286
- "learning_rate": 0.00019215686274509807,
287
- "loss": 0.4834,
288
- "step": 400
289
- },
290
- {
291
- "epoch": 3.92,
292
- "eval_accuracy": 0.7757255936675461,
293
- "eval_auroc": 0.8596483872770976,
294
- "eval_f1": 0.7492625368731564,
295
- "eval_loss": 0.4720683693885803,
296
- "eval_runtime": 19.0669,
297
- "eval_samples_per_second": 19.877,
298
- "eval_sensitivity": 0.7134831460674157,
299
- "eval_specificty": 0.8308457711442786,
300
- "eval_steps_per_second": 2.517,
301
- "step": 400
302
- },
303
- {
304
- "epoch": 4.02,
305
- "learning_rate": 0.0001919607843137255,
306
- "loss": 0.3281,
307
- "step": 410
308
- },
309
- {
310
- "epoch": 4.12,
311
- "learning_rate": 0.00019176470588235295,
312
- "loss": 0.5168,
313
- "step": 420
314
- },
315
- {
316
- "epoch": 4.22,
317
- "learning_rate": 0.00019156862745098042,
318
- "loss": 0.5219,
319
- "step": 430
320
- },
321
- {
322
- "epoch": 4.31,
323
- "learning_rate": 0.00019137254901960786,
324
- "loss": 0.4131,
325
- "step": 440
326
- },
327
- {
328
- "epoch": 4.41,
329
- "learning_rate": 0.0001911764705882353,
330
- "loss": 0.4494,
331
- "step": 450
332
- },
333
- {
334
- "epoch": 4.51,
335
- "learning_rate": 0.00019098039215686277,
336
- "loss": 0.4206,
337
- "step": 460
338
- },
339
- {
340
- "epoch": 4.61,
341
- "learning_rate": 0.00019078431372549022,
342
- "loss": 0.3932,
343
- "step": 470
344
- },
345
- {
346
- "epoch": 4.71,
347
- "learning_rate": 0.00019058823529411766,
348
- "loss": 0.4607,
349
- "step": 480
350
- },
351
- {
352
- "epoch": 4.8,
353
- "learning_rate": 0.0001903921568627451,
354
- "loss": 0.3892,
355
- "step": 490
356
- },
357
- {
358
- "epoch": 4.9,
359
- "learning_rate": 0.00019019607843137254,
360
- "loss": 0.4209,
361
- "step": 500
362
- },
363
- {
364
- "epoch": 4.9,
365
- "eval_accuracy": 0.7862796833773087,
366
- "eval_auroc": 0.8624853261780983,
367
- "eval_f1": 0.7492260061919503,
368
- "eval_loss": 0.47969380021095276,
369
- "eval_runtime": 18.951,
370
- "eval_samples_per_second": 19.999,
371
- "eval_sensitivity": 0.6797752808988764,
372
- "eval_specificty": 0.8805970149253731,
373
- "eval_steps_per_second": 2.533,
374
- "step": 500
375
- },
376
- {
377
- "epoch": 5.0,
378
- "learning_rate": 0.00019,
379
- "loss": 0.3681,
380
- "step": 510
381
- },
382
- {
383
- "epoch": 5.1,
384
- "learning_rate": 0.00018980392156862745,
385
- "loss": 0.397,
386
- "step": 520
387
- },
388
- {
389
- "epoch": 5.2,
390
- "learning_rate": 0.0001896078431372549,
391
- "loss": 0.3833,
392
- "step": 530
393
- },
394
- {
395
- "epoch": 5.29,
396
- "learning_rate": 0.00018941176470588236,
397
- "loss": 0.4202,
398
- "step": 540
399
- },
400
- {
401
- "epoch": 5.39,
402
- "learning_rate": 0.0001892156862745098,
403
- "loss": 0.4304,
404
- "step": 550
405
- },
406
- {
407
- "epoch": 5.49,
408
- "learning_rate": 0.00018901960784313725,
409
- "loss": 0.3883,
410
- "step": 560
411
- },
412
- {
413
- "epoch": 5.59,
414
- "learning_rate": 0.00018882352941176472,
415
- "loss": 0.4737,
416
- "step": 570
417
- },
418
- {
419
- "epoch": 5.69,
420
- "learning_rate": 0.00018862745098039216,
421
- "loss": 0.4661,
422
- "step": 580
423
- },
424
- {
425
- "epoch": 5.78,
426
- "learning_rate": 0.0001884313725490196,
427
- "loss": 0.3469,
428
- "step": 590
429
- },
430
- {
431
- "epoch": 5.88,
432
- "learning_rate": 0.00018823529411764707,
433
- "loss": 0.4567,
434
- "step": 600
435
- },
436
- {
437
- "epoch": 5.88,
438
- "eval_accuracy": 0.7941952506596306,
439
- "eval_auroc": 0.8687741069931243,
440
- "eval_f1": 0.7328767123287672,
441
- "eval_loss": 0.514951765537262,
442
- "eval_runtime": 18.7983,
443
- "eval_samples_per_second": 20.161,
444
- "eval_sensitivity": 0.601123595505618,
445
- "eval_specificty": 0.9651741293532339,
446
- "eval_steps_per_second": 2.553,
447
- "step": 600
448
  },
449
  {
450
- "epoch": 5.88,
451
- "step": 600,
452
- "total_flos": 7.412108806635725e+17,
453
- "train_loss": 0.4638279656569163,
454
- "train_runtime": 457.0802,
455
- "train_samples_per_second": 355.517,
456
- "train_steps_per_second": 22.316
457
  }
458
  ],
459
  "logging_steps": 10,
460
- "max_steps": 10200,
461
  "num_input_tokens_seen": 0,
462
- "num_train_epochs": 100,
463
  "save_steps": 100,
464
- "total_flos": 7.412108806635725e+17,
465
  "train_batch_size": 16,
466
  "trial_name": null,
467
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8754262395885739,
3
+ "best_model_checkpoint": "./stool-condition-classification\\checkpoint-200",
4
+ "epoch": 2.0,
5
  "eval_steps": 100,
6
+ "global_step": 204,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.1,
13
+ "learning_rate": 0.00019019607843137254,
14
+ "loss": 0.6003,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.2,
19
+ "learning_rate": 0.0001803921568627451,
20
+ "loss": 0.5057,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.29,
25
+ "learning_rate": 0.00017058823529411766,
26
+ "loss": 0.492,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.39,
31
+ "learning_rate": 0.00016078431372549022,
32
+ "loss": 0.5363,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.49,
37
+ "learning_rate": 0.00015098039215686275,
38
+ "loss": 0.5434,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.59,
43
+ "learning_rate": 0.0001411764705882353,
44
+ "loss": 0.4924,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.69,
49
+ "learning_rate": 0.00013137254901960784,
50
+ "loss": 0.4675,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.78,
55
+ "learning_rate": 0.00012156862745098039,
56
+ "loss": 0.4547,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.88,
61
+ "learning_rate": 0.00011176470588235294,
62
+ "loss": 0.4565,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.98,
67
+ "learning_rate": 0.00010196078431372549,
68
+ "loss": 0.3755,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.98,
73
+ "eval_accuracy": 0.7757255936675461,
74
+ "eval_auroc": 0.8676561015148975,
75
+ "eval_f1": 0.7447447447447447,
76
+ "eval_loss": 0.4455398917198181,
77
+ "eval_runtime": 19.6185,
78
+ "eval_samples_per_second": 19.318,
79
+ "eval_sensitivity": 0.6966292134831461,
80
+ "eval_specificty": 0.845771144278607,
81
+ "eval_steps_per_second": 2.447,
82
  "step": 100
83
  },
84
  {
85
  "epoch": 1.08,
86
+ "learning_rate": 9.215686274509804e-05,
87
+ "loss": 0.4789,
88
  "step": 110
89
  },
90
  {
91
  "epoch": 1.18,
92
+ "learning_rate": 8.23529411764706e-05,
93
+ "loss": 0.3842,
94
  "step": 120
95
  },
96
  {
97
  "epoch": 1.27,
98
+ "learning_rate": 7.254901960784314e-05,
99
+ "loss": 0.5317,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 1.37,
104
+ "learning_rate": 6.274509803921569e-05,
105
+ "loss": 0.4017,
106
  "step": 140
107
  },
108
  {
109
  "epoch": 1.47,
110
+ "learning_rate": 5.294117647058824e-05,
111
+ "loss": 0.3627,
112
  "step": 150
113
  },
114
  {
115
  "epoch": 1.57,
116
+ "learning_rate": 4.313725490196079e-05,
117
+ "loss": 0.4424,
118
  "step": 160
119
  },
120
  {
121
  "epoch": 1.67,
122
+ "learning_rate": 3.3333333333333335e-05,
123
+ "loss": 0.3926,
124
  "step": 170
125
  },
126
  {
127
  "epoch": 1.76,
128
+ "learning_rate": 2.3529411764705884e-05,
129
+ "loss": 0.4004,
130
  "step": 180
131
  },
132
  {
133
  "epoch": 1.86,
134
+ "learning_rate": 1.3725490196078432e-05,
135
+ "loss": 0.3161,
136
  "step": 190
137
  },
138
  {
139
  "epoch": 1.96,
140
+ "learning_rate": 3.92156862745098e-06,
141
+ "loss": 0.309,
142
  "step": 200
143
  },
144
  {
145
  "epoch": 1.96,
146
+ "eval_accuracy": 0.8126649076517151,
147
+ "eval_auroc": 0.8754262395885739,
148
+ "eval_f1": 0.7905604719764012,
149
+ "eval_loss": 0.4360436201095581,
150
+ "eval_runtime": 19.2175,
151
+ "eval_samples_per_second": 19.722,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  "eval_sensitivity": 0.7528089887640449,
153
+ "eval_specificty": 0.8656716417910447,
154
+ "eval_steps_per_second": 2.498,
155
+ "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  },
157
  {
158
+ "epoch": 2.0,
159
+ "step": 204,
160
+ "total_flos": 2.51848966247424e+17,
161
+ "train_loss": 0.4438221442933176,
162
+ "train_runtime": 152.958,
163
+ "train_samples_per_second": 21.248,
164
+ "train_steps_per_second": 1.334
165
  }
166
  ],
167
  "logging_steps": 10,
168
+ "max_steps": 204,
169
  "num_input_tokens_seen": 0,
170
+ "num_train_epochs": 2,
171
  "save_steps": 100,
172
+ "total_flos": 2.51848966247424e+17,
173
  "train_batch_size": 16,
174
  "trial_name": null,
175
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00021469e2fc2da6e352c64ba5c1eff722ad7949ad69e804b54677deb2c22f06
3
  size 4283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fca6169b13c98d2024034ca2a018b9c8536fee89f8afeb3b4867f7e487a0d5b
3
  size 4283