schnell commited on
Commit
e5aca6d
1 Parent(s): 23dd097

Training in progress, epoch 14

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cf4bfff09a16707370556956675459f9d75876bfb14a22ccca62770b5a95bbc
3
  size 236470789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1767ad24a3383984ce8730d69f87ce5710e95d8d3ba7a78f51e031b4074e752b
3
  size 236470789
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:612de748233d159437078bb7f4ee519273e837c5c8f59ae0d256c06a48bde03c
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f558a9b6175c80a1c75dcb143fa35aa44a75cbdb0c9e7f8f52c48de2931aed8
3
  size 118243218
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfa02c10b9cd4c99264361fef392b6d9708225ffc3e69c7351c226b900a6c196
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd94082d220dbe1afff71cc1b8a20fec6bc1ac2bb822cbc323c364b293dc63a
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ba5415d56f47c80da2bd5540c9633f62d6ea0cd4624ff0eb7758d31be968154
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a01837036b51fcb751768f243bbc8ef9cd258f274881e125d9a37bb0fe81a367
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52058bdd1f404d63cb045dcfb79f7694db4c45d09eda931f288ba1a544e087e7
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbabbf8f48167d120f969a2bc0bc43b8158630bffc734ccac6b6b36381684d2b
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.0,
5
- "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10959,11 +10959,854 @@
10959
  "eval_samples_per_second": 416.857,
10960
  "eval_steps_per_second": 26.054,
10961
  "step": 903149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
- "total_flos": 6.837275613018931e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.0,
5
+ "global_step": 972622,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10959
  "eval_samples_per_second": 416.857,
10960
  "eval_steps_per_second": 26.054,
10961
  "step": 903149
10962
+ },
10963
+ {
10964
+ "epoch": 13.01,
10965
+ "learning_rate": 7.221244268585879e-06,
10966
+ "loss": 1.7015,
10967
+ "step": 903500
10968
+ },
10969
+ {
10970
+ "epoch": 13.01,
10971
+ "learning_rate": 7.1693175268331445e-06,
10972
+ "loss": 1.7008,
10973
+ "step": 904000
10974
+ },
10975
+ {
10976
+ "epoch": 13.02,
10977
+ "learning_rate": 7.1173907850804094e-06,
10978
+ "loss": 1.7008,
10979
+ "step": 904500
10980
+ },
10981
+ {
10982
+ "epoch": 13.03,
10983
+ "learning_rate": 7.065567896811179e-06,
10984
+ "loss": 1.7008,
10985
+ "step": 905000
10986
+ },
10987
+ {
10988
+ "epoch": 13.03,
10989
+ "learning_rate": 7.013641155058444e-06,
10990
+ "loss": 1.7018,
10991
+ "step": 905500
10992
+ },
10993
+ {
10994
+ "epoch": 13.04,
10995
+ "learning_rate": 6.961714413305709e-06,
10996
+ "loss": 1.7048,
10997
+ "step": 906000
10998
+ },
10999
+ {
11000
+ "epoch": 13.05,
11001
+ "learning_rate": 6.909787671552974e-06,
11002
+ "loss": 1.7036,
11003
+ "step": 906500
11004
+ },
11005
+ {
11006
+ "epoch": 13.06,
11007
+ "learning_rate": 6.857860929800238e-06,
11008
+ "loss": 1.7012,
11009
+ "step": 907000
11010
+ },
11011
+ {
11012
+ "epoch": 13.06,
11013
+ "learning_rate": 6.805934188047503e-06,
11014
+ "loss": 1.6996,
11015
+ "step": 907500
11016
+ },
11017
+ {
11018
+ "epoch": 13.07,
11019
+ "learning_rate": 6.754111299778273e-06,
11020
+ "loss": 1.701,
11021
+ "step": 908000
11022
+ },
11023
+ {
11024
+ "epoch": 13.08,
11025
+ "learning_rate": 6.702184558025538e-06,
11026
+ "loss": 1.7037,
11027
+ "step": 908500
11028
+ },
11029
+ {
11030
+ "epoch": 13.08,
11031
+ "learning_rate": 6.650257816272802e-06,
11032
+ "loss": 1.7042,
11033
+ "step": 909000
11034
+ },
11035
+ {
11036
+ "epoch": 13.09,
11037
+ "learning_rate": 6.598331074520067e-06,
11038
+ "loss": 1.704,
11039
+ "step": 909500
11040
+ },
11041
+ {
11042
+ "epoch": 13.1,
11043
+ "learning_rate": 6.546508186250838e-06,
11044
+ "loss": 1.7035,
11045
+ "step": 910000
11046
+ },
11047
+ {
11048
+ "epoch": 13.11,
11049
+ "learning_rate": 6.494581444498102e-06,
11050
+ "loss": 1.7007,
11051
+ "step": 910500
11052
+ },
11053
+ {
11054
+ "epoch": 13.11,
11055
+ "learning_rate": 6.4426547027453665e-06,
11056
+ "loss": 1.703,
11057
+ "step": 911000
11058
+ },
11059
+ {
11060
+ "epoch": 13.12,
11061
+ "learning_rate": 6.390727960992631e-06,
11062
+ "loss": 1.7008,
11063
+ "step": 911500
11064
+ },
11065
+ {
11066
+ "epoch": 13.13,
11067
+ "learning_rate": 6.338905072723401e-06,
11068
+ "loss": 1.7057,
11069
+ "step": 912000
11070
+ },
11071
+ {
11072
+ "epoch": 13.13,
11073
+ "learning_rate": 6.286978330970667e-06,
11074
+ "loss": 1.7022,
11075
+ "step": 912500
11076
+ },
11077
+ {
11078
+ "epoch": 13.14,
11079
+ "learning_rate": 6.235051589217932e-06,
11080
+ "loss": 1.7011,
11081
+ "step": 913000
11082
+ },
11083
+ {
11084
+ "epoch": 13.15,
11085
+ "learning_rate": 6.183124847465197e-06,
11086
+ "loss": 1.7006,
11087
+ "step": 913500
11088
+ },
11089
+ {
11090
+ "epoch": 13.16,
11091
+ "learning_rate": 6.131198105712461e-06,
11092
+ "loss": 1.6992,
11093
+ "step": 914000
11094
+ },
11095
+ {
11096
+ "epoch": 13.16,
11097
+ "learning_rate": 6.079375217443231e-06,
11098
+ "loss": 1.702,
11099
+ "step": 914500
11100
+ },
11101
+ {
11102
+ "epoch": 13.17,
11103
+ "learning_rate": 6.027448475690496e-06,
11104
+ "loss": 1.6989,
11105
+ "step": 915000
11106
+ },
11107
+ {
11108
+ "epoch": 13.18,
11109
+ "learning_rate": 5.975521733937761e-06,
11110
+ "loss": 1.7008,
11111
+ "step": 915500
11112
+ },
11113
+ {
11114
+ "epoch": 13.18,
11115
+ "learning_rate": 5.923594992185026e-06,
11116
+ "loss": 1.7023,
11117
+ "step": 916000
11118
+ },
11119
+ {
11120
+ "epoch": 13.19,
11121
+ "learning_rate": 5.871668250432291e-06,
11122
+ "loss": 1.7007,
11123
+ "step": 916500
11124
+ },
11125
+ {
11126
+ "epoch": 13.2,
11127
+ "learning_rate": 5.819845362163061e-06,
11128
+ "loss": 1.6995,
11129
+ "step": 917000
11130
+ },
11131
+ {
11132
+ "epoch": 13.21,
11133
+ "learning_rate": 5.767918620410325e-06,
11134
+ "loss": 1.7002,
11135
+ "step": 917500
11136
+ },
11137
+ {
11138
+ "epoch": 13.21,
11139
+ "learning_rate": 5.71599187865759e-06,
11140
+ "loss": 1.7031,
11141
+ "step": 918000
11142
+ },
11143
+ {
11144
+ "epoch": 13.22,
11145
+ "learning_rate": 5.664065136904855e-06,
11146
+ "loss": 1.6987,
11147
+ "step": 918500
11148
+ },
11149
+ {
11150
+ "epoch": 13.23,
11151
+ "learning_rate": 5.612138395152119e-06,
11152
+ "loss": 1.7016,
11153
+ "step": 919000
11154
+ },
11155
+ {
11156
+ "epoch": 13.24,
11157
+ "learning_rate": 5.56031550688289e-06,
11158
+ "loss": 1.6996,
11159
+ "step": 919500
11160
+ },
11161
+ {
11162
+ "epoch": 13.24,
11163
+ "learning_rate": 5.5083887651301545e-06,
11164
+ "loss": 1.7036,
11165
+ "step": 920000
11166
+ },
11167
+ {
11168
+ "epoch": 13.25,
11169
+ "learning_rate": 5.456462023377419e-06,
11170
+ "loss": 1.7015,
11171
+ "step": 920500
11172
+ },
11173
+ {
11174
+ "epoch": 13.26,
11175
+ "learning_rate": 5.404535281624684e-06,
11176
+ "loss": 1.7019,
11177
+ "step": 921000
11178
+ },
11179
+ {
11180
+ "epoch": 13.26,
11181
+ "learning_rate": 5.352712393355455e-06,
11182
+ "loss": 1.7019,
11183
+ "step": 921500
11184
+ },
11185
+ {
11186
+ "epoch": 13.27,
11187
+ "learning_rate": 5.30078565160272e-06,
11188
+ "loss": 1.7024,
11189
+ "step": 922000
11190
+ },
11191
+ {
11192
+ "epoch": 13.28,
11193
+ "learning_rate": 5.248858909849984e-06,
11194
+ "loss": 1.7003,
11195
+ "step": 922500
11196
+ },
11197
+ {
11198
+ "epoch": 13.29,
11199
+ "learning_rate": 5.196932168097249e-06,
11200
+ "loss": 1.6995,
11201
+ "step": 923000
11202
+ },
11203
+ {
11204
+ "epoch": 13.29,
11205
+ "learning_rate": 5.145109279828019e-06,
11206
+ "loss": 1.6998,
11207
+ "step": 923500
11208
+ },
11209
+ {
11210
+ "epoch": 13.3,
11211
+ "learning_rate": 5.093182538075283e-06,
11212
+ "loss": 1.6999,
11213
+ "step": 924000
11214
+ },
11215
+ {
11216
+ "epoch": 13.31,
11217
+ "learning_rate": 5.041255796322548e-06,
11218
+ "loss": 1.6995,
11219
+ "step": 924500
11220
+ },
11221
+ {
11222
+ "epoch": 13.31,
11223
+ "learning_rate": 4.989329054569814e-06,
11224
+ "loss": 1.6965,
11225
+ "step": 925000
11226
+ },
11227
+ {
11228
+ "epoch": 13.32,
11229
+ "learning_rate": 4.9375061663005835e-06,
11230
+ "loss": 1.7033,
11231
+ "step": 925500
11232
+ },
11233
+ {
11234
+ "epoch": 13.33,
11235
+ "learning_rate": 4.885579424547848e-06,
11236
+ "loss": 1.6986,
11237
+ "step": 926000
11238
+ },
11239
+ {
11240
+ "epoch": 13.34,
11241
+ "learning_rate": 4.833652682795113e-06,
11242
+ "loss": 1.6984,
11243
+ "step": 926500
11244
+ },
11245
+ {
11246
+ "epoch": 13.34,
11247
+ "learning_rate": 4.781725941042377e-06,
11248
+ "loss": 1.7015,
11249
+ "step": 927000
11250
+ },
11251
+ {
11252
+ "epoch": 13.35,
11253
+ "learning_rate": 4.729799199289642e-06,
11254
+ "loss": 1.6982,
11255
+ "step": 927500
11256
+ },
11257
+ {
11258
+ "epoch": 13.36,
11259
+ "learning_rate": 4.677976311020413e-06,
11260
+ "loss": 1.7004,
11261
+ "step": 928000
11262
+ },
11263
+ {
11264
+ "epoch": 13.36,
11265
+ "learning_rate": 4.626049569267677e-06,
11266
+ "loss": 1.702,
11267
+ "step": 928500
11268
+ },
11269
+ {
11270
+ "epoch": 13.37,
11271
+ "learning_rate": 4.5741228275149425e-06,
11272
+ "loss": 1.7002,
11273
+ "step": 929000
11274
+ },
11275
+ {
11276
+ "epoch": 13.38,
11277
+ "learning_rate": 4.5221960857622074e-06,
11278
+ "loss": 1.7009,
11279
+ "step": 929500
11280
+ },
11281
+ {
11282
+ "epoch": 13.39,
11283
+ "learning_rate": 4.4702693440094715e-06,
11284
+ "loss": 1.6987,
11285
+ "step": 930000
11286
+ },
11287
+ {
11288
+ "epoch": 13.39,
11289
+ "learning_rate": 4.418446455740242e-06,
11290
+ "loss": 1.7007,
11291
+ "step": 930500
11292
+ },
11293
+ {
11294
+ "epoch": 13.4,
11295
+ "learning_rate": 4.366519713987507e-06,
11296
+ "loss": 1.6964,
11297
+ "step": 931000
11298
+ },
11299
+ {
11300
+ "epoch": 13.41,
11301
+ "learning_rate": 4.314592972234771e-06,
11302
+ "loss": 1.6999,
11303
+ "step": 931500
11304
+ },
11305
+ {
11306
+ "epoch": 13.42,
11307
+ "learning_rate": 4.262666230482036e-06,
11308
+ "loss": 1.7002,
11309
+ "step": 932000
11310
+ },
11311
+ {
11312
+ "epoch": 13.42,
11313
+ "learning_rate": 4.210843342212806e-06,
11314
+ "loss": 1.6982,
11315
+ "step": 932500
11316
+ },
11317
+ {
11318
+ "epoch": 13.43,
11319
+ "learning_rate": 4.158916600460071e-06,
11320
+ "loss": 1.697,
11321
+ "step": 933000
11322
+ },
11323
+ {
11324
+ "epoch": 13.44,
11325
+ "learning_rate": 4.106989858707336e-06,
11326
+ "loss": 1.7032,
11327
+ "step": 933500
11328
+ },
11329
+ {
11330
+ "epoch": 13.44,
11331
+ "learning_rate": 4.055063116954601e-06,
11332
+ "loss": 1.7021,
11333
+ "step": 934000
11334
+ },
11335
+ {
11336
+ "epoch": 13.45,
11337
+ "learning_rate": 4.003136375201865e-06,
11338
+ "loss": 1.7011,
11339
+ "step": 934500
11340
+ },
11341
+ {
11342
+ "epoch": 13.46,
11343
+ "learning_rate": 3.9513134869326356e-06,
11344
+ "loss": 1.7007,
11345
+ "step": 935000
11346
+ },
11347
+ {
11348
+ "epoch": 13.47,
11349
+ "learning_rate": 3.8993867451799005e-06,
11350
+ "loss": 1.6997,
11351
+ "step": 935500
11352
+ },
11353
+ {
11354
+ "epoch": 13.47,
11355
+ "learning_rate": 3.847460003427165e-06,
11356
+ "loss": 1.7,
11357
+ "step": 936000
11358
+ },
11359
+ {
11360
+ "epoch": 13.48,
11361
+ "learning_rate": 3.79553326167443e-06,
11362
+ "loss": 1.7009,
11363
+ "step": 936500
11364
+ },
11365
+ {
11366
+ "epoch": 13.49,
11367
+ "learning_rate": 3.7436065199216947e-06,
11368
+ "loss": 1.698,
11369
+ "step": 937000
11370
+ },
11371
+ {
11372
+ "epoch": 13.49,
11373
+ "learning_rate": 3.6917836316524652e-06,
11374
+ "loss": 1.703,
11375
+ "step": 937500
11376
+ },
11377
+ {
11378
+ "epoch": 13.5,
11379
+ "learning_rate": 3.6398568898997293e-06,
11380
+ "loss": 1.6989,
11381
+ "step": 938000
11382
+ },
11383
+ {
11384
+ "epoch": 13.51,
11385
+ "learning_rate": 3.5879301481469946e-06,
11386
+ "loss": 1.6989,
11387
+ "step": 938500
11388
+ },
11389
+ {
11390
+ "epoch": 13.52,
11391
+ "learning_rate": 3.5360034063942595e-06,
11392
+ "loss": 1.6994,
11393
+ "step": 939000
11394
+ },
11395
+ {
11396
+ "epoch": 13.52,
11397
+ "learning_rate": 3.484180518125029e-06,
11398
+ "loss": 1.7032,
11399
+ "step": 939500
11400
+ },
11401
+ {
11402
+ "epoch": 13.53,
11403
+ "learning_rate": 3.432253776372294e-06,
11404
+ "loss": 1.7012,
11405
+ "step": 940000
11406
+ },
11407
+ {
11408
+ "epoch": 13.54,
11409
+ "learning_rate": 3.380327034619559e-06,
11410
+ "loss": 1.6949,
11411
+ "step": 940500
11412
+ },
11413
+ {
11414
+ "epoch": 13.54,
11415
+ "learning_rate": 3.3284002928668234e-06,
11416
+ "loss": 1.6991,
11417
+ "step": 941000
11418
+ },
11419
+ {
11420
+ "epoch": 13.55,
11421
+ "learning_rate": 3.2764735511140883e-06,
11422
+ "loss": 1.6962,
11423
+ "step": 941500
11424
+ },
11425
+ {
11426
+ "epoch": 13.56,
11427
+ "learning_rate": 3.224650662844859e-06,
11428
+ "loss": 1.6984,
11429
+ "step": 942000
11430
+ },
11431
+ {
11432
+ "epoch": 13.57,
11433
+ "learning_rate": 3.1727239210921233e-06,
11434
+ "loss": 1.6974,
11435
+ "step": 942500
11436
+ },
11437
+ {
11438
+ "epoch": 13.57,
11439
+ "learning_rate": 3.120797179339388e-06,
11440
+ "loss": 1.6989,
11441
+ "step": 943000
11442
+ },
11443
+ {
11444
+ "epoch": 13.58,
11445
+ "learning_rate": 3.0688704375866527e-06,
11446
+ "loss": 1.6986,
11447
+ "step": 943500
11448
+ },
11449
+ {
11450
+ "epoch": 13.59,
11451
+ "learning_rate": 3.017047549317423e-06,
11452
+ "loss": 1.6972,
11453
+ "step": 944000
11454
+ },
11455
+ {
11456
+ "epoch": 13.6,
11457
+ "learning_rate": 2.9651208075646876e-06,
11458
+ "loss": 1.6985,
11459
+ "step": 944500
11460
+ },
11461
+ {
11462
+ "epoch": 13.6,
11463
+ "learning_rate": 2.913194065811953e-06,
11464
+ "loss": 1.6983,
11465
+ "step": 945000
11466
+ },
11467
+ {
11468
+ "epoch": 13.61,
11469
+ "learning_rate": 2.8612673240592174e-06,
11470
+ "loss": 1.7019,
11471
+ "step": 945500
11472
+ },
11473
+ {
11474
+ "epoch": 13.62,
11475
+ "learning_rate": 2.809444435789988e-06,
11476
+ "loss": 1.6978,
11477
+ "step": 946000
11478
+ },
11479
+ {
11480
+ "epoch": 13.62,
11481
+ "learning_rate": 2.7575176940372524e-06,
11482
+ "loss": 1.6995,
11483
+ "step": 946500
11484
+ },
11485
+ {
11486
+ "epoch": 13.63,
11487
+ "learning_rate": 2.705590952284517e-06,
11488
+ "loss": 1.6951,
11489
+ "step": 947000
11490
+ },
11491
+ {
11492
+ "epoch": 13.64,
11493
+ "learning_rate": 2.6536642105317818e-06,
11494
+ "loss": 1.7,
11495
+ "step": 947500
11496
+ },
11497
+ {
11498
+ "epoch": 13.65,
11499
+ "learning_rate": 2.6017374687790467e-06,
11500
+ "loss": 1.6994,
11501
+ "step": 948000
11502
+ },
11503
+ {
11504
+ "epoch": 13.65,
11505
+ "learning_rate": 2.5499145805098167e-06,
11506
+ "loss": 1.6966,
11507
+ "step": 948500
11508
+ },
11509
+ {
11510
+ "epoch": 13.66,
11511
+ "learning_rate": 2.4979878387570816e-06,
11512
+ "loss": 1.6966,
11513
+ "step": 949000
11514
+ },
11515
+ {
11516
+ "epoch": 13.67,
11517
+ "learning_rate": 2.4460610970043465e-06,
11518
+ "loss": 1.697,
11519
+ "step": 949500
11520
+ },
11521
+ {
11522
+ "epoch": 13.67,
11523
+ "learning_rate": 2.394134355251611e-06,
11524
+ "loss": 1.6988,
11525
+ "step": 950000
11526
+ },
11527
+ {
11528
+ "epoch": 13.68,
11529
+ "learning_rate": 2.342207613498876e-06,
11530
+ "loss": 1.6992,
11531
+ "step": 950500
11532
+ },
11533
+ {
11534
+ "epoch": 13.69,
11535
+ "learning_rate": 2.290280871746141e-06,
11536
+ "loss": 1.6961,
11537
+ "step": 951000
11538
+ },
11539
+ {
11540
+ "epoch": 13.7,
11541
+ "learning_rate": 2.238457983476911e-06,
11542
+ "loss": 1.7023,
11543
+ "step": 951500
11544
+ },
11545
+ {
11546
+ "epoch": 13.7,
11547
+ "learning_rate": 2.1865312417241758e-06,
11548
+ "loss": 1.7008,
11549
+ "step": 952000
11550
+ },
11551
+ {
11552
+ "epoch": 13.71,
11553
+ "learning_rate": 2.1346044999714403e-06,
11554
+ "loss": 1.7013,
11555
+ "step": 952500
11556
+ },
11557
+ {
11558
+ "epoch": 13.72,
11559
+ "learning_rate": 2.082677758218705e-06,
11560
+ "loss": 1.6989,
11561
+ "step": 953000
11562
+ },
11563
+ {
11564
+ "epoch": 13.72,
11565
+ "learning_rate": 2.0307510164659696e-06,
11566
+ "loss": 1.6959,
11567
+ "step": 953500
11568
+ },
11569
+ {
11570
+ "epoch": 13.73,
11571
+ "learning_rate": 1.97892812819674e-06,
11572
+ "loss": 1.6983,
11573
+ "step": 954000
11574
+ },
11575
+ {
11576
+ "epoch": 13.74,
11577
+ "learning_rate": 1.9270013864440046e-06,
11578
+ "loss": 1.7009,
11579
+ "step": 954500
11580
+ },
11581
+ {
11582
+ "epoch": 13.75,
11583
+ "learning_rate": 1.8750746446912697e-06,
11584
+ "loss": 1.6965,
11585
+ "step": 955000
11586
+ },
11587
+ {
11588
+ "epoch": 13.75,
11589
+ "learning_rate": 1.8231479029385344e-06,
11590
+ "loss": 1.697,
11591
+ "step": 955500
11592
+ },
11593
+ {
11594
+ "epoch": 13.76,
11595
+ "learning_rate": 1.7713250146693047e-06,
11596
+ "loss": 1.6969,
11597
+ "step": 956000
11598
+ },
11599
+ {
11600
+ "epoch": 13.77,
11601
+ "learning_rate": 1.7193982729165694e-06,
11602
+ "loss": 1.6936,
11603
+ "step": 956500
11604
+ },
11605
+ {
11606
+ "epoch": 13.78,
11607
+ "learning_rate": 1.667471531163834e-06,
11608
+ "loss": 1.7003,
11609
+ "step": 957000
11610
+ },
11611
+ {
11612
+ "epoch": 13.78,
11613
+ "learning_rate": 1.6155447894110987e-06,
11614
+ "loss": 1.6998,
11615
+ "step": 957500
11616
+ },
11617
+ {
11618
+ "epoch": 13.79,
11619
+ "learning_rate": 1.563721901141869e-06,
11620
+ "loss": 1.6969,
11621
+ "step": 958000
11622
+ },
11623
+ {
11624
+ "epoch": 13.8,
11625
+ "learning_rate": 1.511795159389134e-06,
11626
+ "loss": 1.6965,
11627
+ "step": 958500
11628
+ },
11629
+ {
11630
+ "epoch": 13.8,
11631
+ "learning_rate": 1.4598684176363986e-06,
11632
+ "loss": 1.6967,
11633
+ "step": 959000
11634
+ },
11635
+ {
11636
+ "epoch": 13.81,
11637
+ "learning_rate": 1.4079416758836633e-06,
11638
+ "loss": 1.6951,
11639
+ "step": 959500
11640
+ },
11641
+ {
11642
+ "epoch": 13.82,
11643
+ "learning_rate": 1.3561187876144336e-06,
11644
+ "loss": 1.6965,
11645
+ "step": 960000
11646
+ },
11647
+ {
11648
+ "epoch": 13.83,
11649
+ "learning_rate": 1.3041920458616985e-06,
11650
+ "loss": 1.696,
11651
+ "step": 960500
11652
+ },
11653
+ {
11654
+ "epoch": 13.83,
11655
+ "learning_rate": 1.2522653041089632e-06,
11656
+ "loss": 1.6984,
11657
+ "step": 961000
11658
+ },
11659
+ {
11660
+ "epoch": 13.84,
11661
+ "learning_rate": 1.2003385623562278e-06,
11662
+ "loss": 1.6992,
11663
+ "step": 961500
11664
+ },
11665
+ {
11666
+ "epoch": 13.85,
11667
+ "learning_rate": 1.1484118206034927e-06,
11668
+ "loss": 1.6976,
11669
+ "step": 962000
11670
+ },
11671
+ {
11672
+ "epoch": 13.85,
11673
+ "learning_rate": 1.0965889323342628e-06,
11674
+ "loss": 1.6984,
11675
+ "step": 962500
11676
+ },
11677
+ {
11678
+ "epoch": 13.86,
11679
+ "learning_rate": 1.0446621905815277e-06,
11680
+ "loss": 1.6995,
11681
+ "step": 963000
11682
+ },
11683
+ {
11684
+ "epoch": 13.87,
11685
+ "learning_rate": 9.927354488287924e-07,
11686
+ "loss": 1.6993,
11687
+ "step": 963500
11688
+ },
11689
+ {
11690
+ "epoch": 13.88,
11691
+ "learning_rate": 9.408087070760572e-07,
11692
+ "loss": 1.6974,
11693
+ "step": 964000
11694
+ },
11695
+ {
11696
+ "epoch": 13.88,
11697
+ "learning_rate": 8.889858188068274e-07,
11698
+ "loss": 1.6952,
11699
+ "step": 964500
11700
+ },
11701
+ {
11702
+ "epoch": 13.89,
11703
+ "learning_rate": 8.370590770540923e-07,
11704
+ "loss": 1.6941,
11705
+ "step": 965000
11706
+ },
11707
+ {
11708
+ "epoch": 13.9,
11709
+ "learning_rate": 7.851323353013568e-07,
11710
+ "loss": 1.6948,
11711
+ "step": 965500
11712
+ },
11713
+ {
11714
+ "epoch": 13.9,
11715
+ "learning_rate": 7.332055935486216e-07,
11716
+ "loss": 1.6992,
11717
+ "step": 966000
11718
+ },
11719
+ {
11720
+ "epoch": 13.91,
11721
+ "learning_rate": 6.812788517958863e-07,
11722
+ "loss": 1.7009,
11723
+ "step": 966500
11724
+ },
11725
+ {
11726
+ "epoch": 13.92,
11727
+ "learning_rate": 6.294559635266566e-07,
11728
+ "loss": 1.6947,
11729
+ "step": 967000
11730
+ },
11731
+ {
11732
+ "epoch": 13.93,
11733
+ "learning_rate": 5.775292217739214e-07,
11734
+ "loss": 1.7006,
11735
+ "step": 967500
11736
+ },
11737
+ {
11738
+ "epoch": 13.93,
11739
+ "learning_rate": 5.256024800211861e-07,
11740
+ "loss": 1.7017,
11741
+ "step": 968000
11742
+ },
11743
+ {
11744
+ "epoch": 13.94,
11745
+ "learning_rate": 4.736757382684509e-07,
11746
+ "loss": 1.6951,
11747
+ "step": 968500
11748
+ },
11749
+ {
11750
+ "epoch": 13.95,
11751
+ "learning_rate": 4.218528499992211e-07,
11752
+ "loss": 1.6955,
11753
+ "step": 969000
11754
+ },
11755
+ {
11756
+ "epoch": 13.96,
11757
+ "learning_rate": 3.6992610824648584e-07,
11758
+ "loss": 1.6968,
11759
+ "step": 969500
11760
+ },
11761
+ {
11762
+ "epoch": 13.96,
11763
+ "learning_rate": 3.1799936649375064e-07,
11764
+ "loss": 1.6982,
11765
+ "step": 970000
11766
+ },
11767
+ {
11768
+ "epoch": 13.97,
11769
+ "learning_rate": 2.660726247410154e-07,
11770
+ "loss": 1.6923,
11771
+ "step": 970500
11772
+ },
11773
+ {
11774
+ "epoch": 13.98,
11775
+ "learning_rate": 2.1414588298828015e-07,
11776
+ "loss": 1.6925,
11777
+ "step": 971000
11778
+ },
11779
+ {
11780
+ "epoch": 13.98,
11781
+ "learning_rate": 1.6232299471905037e-07,
11782
+ "loss": 1.695,
11783
+ "step": 971500
11784
+ },
11785
+ {
11786
+ "epoch": 13.99,
11787
+ "learning_rate": 1.1039625296631512e-07,
11788
+ "loss": 1.6977,
11789
+ "step": 972000
11790
+ },
11791
+ {
11792
+ "epoch": 14.0,
11793
+ "learning_rate": 5.846951121357988e-08,
11794
+ "loss": 1.7,
11795
+ "step": 972500
11796
+ },
11797
+ {
11798
+ "epoch": 14.0,
11799
+ "eval_accuracy": 0.6690181349948218,
11800
+ "eval_loss": 1.5665007829666138,
11801
+ "eval_runtime": 1297.3439,
11802
+ "eval_samples_per_second": 415.416,
11803
+ "eval_steps_per_second": 25.964,
11804
+ "step": 972622
11805
  }
11806
  ],
11807
  "max_steps": 972622,
11808
  "num_train_epochs": 14,
11809
+ "total_flos": 7.363280880004055e+18,
11810
  "trial_name": null,
11811
  "trial_params": null
11812
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:612de748233d159437078bb7f4ee519273e837c5c8f59ae0d256c06a48bde03c
3
  size 118243218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f558a9b6175c80a1c75dcb143fa35aa44a75cbdb0c9e7f8f52c48de2931aed8
3
  size 118243218
runs/Feb22_09-35-52_ubuntu-2004/events.out.tfevents.1677026198.ubuntu-2004.1870487.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e26da7adb61568c20149e3a3b8d6acebeecb52046b416908a25f52f3d232dca1
3
- size 296897
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1625624de4e425f7b80c826a541c861aee58acfb64ef0613b7acf8e2792d039
3
+ size 319466