schnell commited on
Commit
204d033
β€’
1 Parent(s): eaf38e5

Training in progress, epoch 14

Browse files
last-checkpoint/{global_step903149 β†’ global_step972622}/mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c45f51f60dd53a36accb7dff00efcd14005b0ad85e5652ba519d6ed9009e1ab9
3
  size 59134503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b892cfc2606581bb05617d24cadf81a767a4e9b5b0e9df181a2da7422f0295
3
  size 59134503
last-checkpoint/{global_step903149 β†’ global_step972622}/zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7dc2692551085198feb77bdf63849fc453c6674466105393d4dc9b779ca5e4e
3
  size 118216675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01f9f5653a775f5acfff6af17a4890c21c7108830ae201ce175e1721204f9f9a
3
  size 118216675
last-checkpoint/{global_step903149 β†’ global_step972622}/zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08321c98c4c6da61e476a24bee5605d7495845df4ff3462c9586edbf8f1c3137
3
  size 118217955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f035455ecf2d2a36e6ccbb70d10e84090a59d42eafa837fe0b678751a53efc5
3
  size 118217955
last-checkpoint/{global_step903149 β†’ global_step972622}/zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:871544693e17f413bdf18ae374b96f9a9d666695ff7e4dab6065caa1c236dce4
3
  size 118221091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a67282a7ac391f3c4496d15be11ad246270c606175c227b2d26620a681dbec09
3
  size 118221091
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step903149
 
1
+ global_step972622
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2096fe3cee8a81b52b84f4f48f2f179a567c9a1d5dfb3efce794725caec30498
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2837d9f4f86873de1b0ebc8b7e038d8643cfa3a63d6d33fae61e8a5ac3ad6681
3
  size 59121639
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a965c79b19c9423619c3017659ec9eade4522d54819e29ff234db7b92eed58a7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4baf9e81d7dd32b697103d7f8d9f24586d55681755d7640ad9be4065acc87e20
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed95eaae8bc143301073d33f539287e879203324a4279d89fb2f9fe3eb919cf2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e7c5f715167d0e98e35b987ffd74b7f8cb9a6a78666d3c50f16549b876aac9
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63cfc42f43b70b453977c4b1f3d034e1c2eaab41705054ddee26c26cd6a73a91
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29213f42ad279da924d48eb6a9147e1ebdf480183b188e5e89922d01bc2def28
3
  size 14503
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.0,
5
- "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10959,11 +10959,854 @@
10959
  "eval_samples_per_second": 825.676,
10960
  "eval_steps_per_second": 34.404,
10961
  "step": 903149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
- "total_flos": 6.473347929208783e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.0,
5
+ "global_step": 972622,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10959
  "eval_samples_per_second": 825.676,
10960
  "eval_steps_per_second": 34.404,
10961
  "step": 903149
10962
+ },
10963
+ {
10964
+ "epoch": 13.01,
10965
+ "learning_rate": 7.366016024592505e-06,
10966
+ "loss": 1.7045,
10967
+ "step": 903500
10968
+ },
10969
+ {
10970
+ "epoch": 13.01,
10971
+ "learning_rate": 7.314296989806781e-06,
10972
+ "loss": 1.7035,
10973
+ "step": 904000
10974
+ },
10975
+ {
10976
+ "epoch": 13.02,
10977
+ "learning_rate": 7.2623702480540456e-06,
10978
+ "loss": 1.6998,
10979
+ "step": 904500
10980
+ },
10981
+ {
10982
+ "epoch": 13.03,
10983
+ "learning_rate": 7.210651213268322e-06,
10984
+ "loss": 1.7022,
10985
+ "step": 905000
10986
+ },
10987
+ {
10988
+ "epoch": 13.03,
10989
+ "learning_rate": 7.1587244715155865e-06,
10990
+ "loss": 1.7006,
10991
+ "step": 905500
10992
+ },
10993
+ {
10994
+ "epoch": 13.04,
10995
+ "learning_rate": 7.107005436729862e-06,
10996
+ "loss": 1.7002,
10997
+ "step": 906000
10998
+ },
10999
+ {
11000
+ "epoch": 13.05,
11001
+ "learning_rate": 7.055078694977127e-06,
11002
+ "loss": 1.703,
11003
+ "step": 906500
11004
+ },
11005
+ {
11006
+ "epoch": 13.06,
11007
+ "learning_rate": 7.003359660191403e-06,
11008
+ "loss": 1.7053,
11009
+ "step": 907000
11010
+ },
11011
+ {
11012
+ "epoch": 13.06,
11013
+ "learning_rate": 6.951432918438668e-06,
11014
+ "loss": 1.6992,
11015
+ "step": 907500
11016
+ },
11017
+ {
11018
+ "epoch": 13.07,
11019
+ "learning_rate": 6.899713883652943e-06,
11020
+ "loss": 1.7002,
11021
+ "step": 908000
11022
+ },
11023
+ {
11024
+ "epoch": 13.08,
11025
+ "learning_rate": 6.8477871419002086e-06,
11026
+ "loss": 1.7036,
11027
+ "step": 908500
11028
+ },
11029
+ {
11030
+ "epoch": 13.08,
11031
+ "learning_rate": 6.796068107114484e-06,
11032
+ "loss": 1.6992,
11033
+ "step": 909000
11034
+ },
11035
+ {
11036
+ "epoch": 13.09,
11037
+ "learning_rate": 6.744141365361748e-06,
11038
+ "loss": 1.7025,
11039
+ "step": 909500
11040
+ },
11041
+ {
11042
+ "epoch": 13.1,
11043
+ "learning_rate": 6.692422330576023e-06,
11044
+ "loss": 1.6985,
11045
+ "step": 910000
11046
+ },
11047
+ {
11048
+ "epoch": 13.11,
11049
+ "learning_rate": 6.640495588823288e-06,
11050
+ "loss": 1.7017,
11051
+ "step": 910500
11052
+ },
11053
+ {
11054
+ "epoch": 13.11,
11055
+ "learning_rate": 6.588776554037564e-06,
11056
+ "loss": 1.6991,
11057
+ "step": 911000
11058
+ },
11059
+ {
11060
+ "epoch": 13.12,
11061
+ "learning_rate": 6.536849812284829e-06,
11062
+ "loss": 1.7015,
11063
+ "step": 911500
11064
+ },
11065
+ {
11066
+ "epoch": 13.13,
11067
+ "learning_rate": 6.485130777499104e-06,
11068
+ "loss": 1.703,
11069
+ "step": 912000
11070
+ },
11071
+ {
11072
+ "epoch": 13.13,
11073
+ "learning_rate": 6.433204035746369e-06,
11074
+ "loss": 1.6997,
11075
+ "step": 912500
11076
+ },
11077
+ {
11078
+ "epoch": 13.14,
11079
+ "learning_rate": 6.381485000960645e-06,
11080
+ "loss": 1.7033,
11081
+ "step": 913000
11082
+ },
11083
+ {
11084
+ "epoch": 13.15,
11085
+ "learning_rate": 6.32955825920791e-06,
11086
+ "loss": 1.7016,
11087
+ "step": 913500
11088
+ },
11089
+ {
11090
+ "epoch": 13.16,
11091
+ "learning_rate": 6.277839224422186e-06,
11092
+ "loss": 1.7034,
11093
+ "step": 914000
11094
+ },
11095
+ {
11096
+ "epoch": 13.16,
11097
+ "learning_rate": 6.22591248266945e-06,
11098
+ "loss": 1.7012,
11099
+ "step": 914500
11100
+ },
11101
+ {
11102
+ "epoch": 13.17,
11103
+ "learning_rate": 6.174193447883726e-06,
11104
+ "loss": 1.7021,
11105
+ "step": 915000
11106
+ },
11107
+ {
11108
+ "epoch": 13.18,
11109
+ "learning_rate": 6.12226670613099e-06,
11110
+ "loss": 1.7031,
11111
+ "step": 915500
11112
+ },
11113
+ {
11114
+ "epoch": 13.18,
11115
+ "learning_rate": 6.070547671345266e-06,
11116
+ "loss": 1.6961,
11117
+ "step": 916000
11118
+ },
11119
+ {
11120
+ "epoch": 13.19,
11121
+ "learning_rate": 6.018620929592531e-06,
11122
+ "loss": 1.701,
11123
+ "step": 916500
11124
+ },
11125
+ {
11126
+ "epoch": 13.2,
11127
+ "learning_rate": 5.966901894806807e-06,
11128
+ "loss": 1.7007,
11129
+ "step": 917000
11130
+ },
11131
+ {
11132
+ "epoch": 13.21,
11133
+ "learning_rate": 5.914975153054072e-06,
11134
+ "loss": 1.6968,
11135
+ "step": 917500
11136
+ },
11137
+ {
11138
+ "epoch": 13.21,
11139
+ "learning_rate": 5.863256118268347e-06,
11140
+ "loss": 1.7013,
11141
+ "step": 918000
11142
+ },
11143
+ {
11144
+ "epoch": 13.22,
11145
+ "learning_rate": 5.811329376515611e-06,
11146
+ "loss": 1.7012,
11147
+ "step": 918500
11148
+ },
11149
+ {
11150
+ "epoch": 13.23,
11151
+ "learning_rate": 5.7596103417298875e-06,
11152
+ "loss": 1.706,
11153
+ "step": 919000
11154
+ },
11155
+ {
11156
+ "epoch": 13.24,
11157
+ "learning_rate": 5.707683599977152e-06,
11158
+ "loss": 1.6966,
11159
+ "step": 919500
11160
+ },
11161
+ {
11162
+ "epoch": 13.24,
11163
+ "learning_rate": 5.655964565191428e-06,
11164
+ "loss": 1.7014,
11165
+ "step": 920000
11166
+ },
11167
+ {
11168
+ "epoch": 13.25,
11169
+ "learning_rate": 5.604037823438693e-06,
11170
+ "loss": 1.6998,
11171
+ "step": 920500
11172
+ },
11173
+ {
11174
+ "epoch": 13.26,
11175
+ "learning_rate": 5.5523187886529685e-06,
11176
+ "loss": 1.7011,
11177
+ "step": 921000
11178
+ },
11179
+ {
11180
+ "epoch": 13.26,
11181
+ "learning_rate": 5.5003920469002334e-06,
11182
+ "loss": 1.6975,
11183
+ "step": 921500
11184
+ },
11185
+ {
11186
+ "epoch": 13.27,
11187
+ "learning_rate": 5.4486730121145095e-06,
11188
+ "loss": 1.7014,
11189
+ "step": 922000
11190
+ },
11191
+ {
11192
+ "epoch": 13.28,
11193
+ "learning_rate": 5.3967462703617736e-06,
11194
+ "loss": 1.6976,
11195
+ "step": 922500
11196
+ },
11197
+ {
11198
+ "epoch": 13.29,
11199
+ "learning_rate": 5.34502723557605e-06,
11200
+ "loss": 1.6994,
11201
+ "step": 923000
11202
+ },
11203
+ {
11204
+ "epoch": 13.29,
11205
+ "learning_rate": 5.2931004938233145e-06,
11206
+ "loss": 1.6963,
11207
+ "step": 923500
11208
+ },
11209
+ {
11210
+ "epoch": 13.3,
11211
+ "learning_rate": 5.24138145903759e-06,
11212
+ "loss": 1.7022,
11213
+ "step": 924000
11214
+ },
11215
+ {
11216
+ "epoch": 13.31,
11217
+ "learning_rate": 5.189454717284855e-06,
11218
+ "loss": 1.699,
11219
+ "step": 924500
11220
+ },
11221
+ {
11222
+ "epoch": 13.31,
11223
+ "learning_rate": 5.137735682499131e-06,
11224
+ "loss": 1.6969,
11225
+ "step": 925000
11226
+ },
11227
+ {
11228
+ "epoch": 13.32,
11229
+ "learning_rate": 5.085808940746395e-06,
11230
+ "loss": 1.6995,
11231
+ "step": 925500
11232
+ },
11233
+ {
11234
+ "epoch": 13.33,
11235
+ "learning_rate": 5.034089905960672e-06,
11236
+ "loss": 1.7032,
11237
+ "step": 926000
11238
+ },
11239
+ {
11240
+ "epoch": 13.34,
11241
+ "learning_rate": 4.982163164207936e-06,
11242
+ "loss": 1.7019,
11243
+ "step": 926500
11244
+ },
11245
+ {
11246
+ "epoch": 13.34,
11247
+ "learning_rate": 4.930444129422211e-06,
11248
+ "loss": 1.6999,
11249
+ "step": 927000
11250
+ },
11251
+ {
11252
+ "epoch": 13.35,
11253
+ "learning_rate": 4.878517387669476e-06,
11254
+ "loss": 1.6992,
11255
+ "step": 927500
11256
+ },
11257
+ {
11258
+ "epoch": 13.36,
11259
+ "learning_rate": 4.826798352883752e-06,
11260
+ "loss": 1.6981,
11261
+ "step": 928000
11262
+ },
11263
+ {
11264
+ "epoch": 13.36,
11265
+ "learning_rate": 4.774871611131017e-06,
11266
+ "loss": 1.6997,
11267
+ "step": 928500
11268
+ },
11269
+ {
11270
+ "epoch": 13.37,
11271
+ "learning_rate": 4.723152576345293e-06,
11272
+ "loss": 1.699,
11273
+ "step": 929000
11274
+ },
11275
+ {
11276
+ "epoch": 13.38,
11277
+ "learning_rate": 4.671225834592557e-06,
11278
+ "loss": 1.7004,
11279
+ "step": 929500
11280
+ },
11281
+ {
11282
+ "epoch": 13.39,
11283
+ "learning_rate": 4.619506799806832e-06,
11284
+ "loss": 1.6985,
11285
+ "step": 930000
11286
+ },
11287
+ {
11288
+ "epoch": 13.39,
11289
+ "learning_rate": 4.567580058054097e-06,
11290
+ "loss": 1.7005,
11291
+ "step": 930500
11292
+ },
11293
+ {
11294
+ "epoch": 13.4,
11295
+ "learning_rate": 4.515861023268373e-06,
11296
+ "loss": 1.6992,
11297
+ "step": 931000
11298
+ },
11299
+ {
11300
+ "epoch": 13.41,
11301
+ "learning_rate": 4.463934281515638e-06,
11302
+ "loss": 1.6971,
11303
+ "step": 931500
11304
+ },
11305
+ {
11306
+ "epoch": 13.42,
11307
+ "learning_rate": 4.412215246729914e-06,
11308
+ "loss": 1.6986,
11309
+ "step": 932000
11310
+ },
11311
+ {
11312
+ "epoch": 13.42,
11313
+ "learning_rate": 4.360288504977179e-06,
11314
+ "loss": 1.6984,
11315
+ "step": 932500
11316
+ },
11317
+ {
11318
+ "epoch": 13.43,
11319
+ "learning_rate": 4.308569470191454e-06,
11320
+ "loss": 1.6983,
11321
+ "step": 933000
11322
+ },
11323
+ {
11324
+ "epoch": 13.44,
11325
+ "learning_rate": 4.256642728438719e-06,
11326
+ "loss": 1.7026,
11327
+ "step": 933500
11328
+ },
11329
+ {
11330
+ "epoch": 13.44,
11331
+ "learning_rate": 4.204923693652994e-06,
11332
+ "loss": 1.696,
11333
+ "step": 934000
11334
+ },
11335
+ {
11336
+ "epoch": 13.45,
11337
+ "learning_rate": 4.152996951900259e-06,
11338
+ "loss": 1.7,
11339
+ "step": 934500
11340
+ },
11341
+ {
11342
+ "epoch": 13.46,
11343
+ "learning_rate": 4.101277917114535e-06,
11344
+ "loss": 1.6985,
11345
+ "step": 935000
11346
+ },
11347
+ {
11348
+ "epoch": 13.47,
11349
+ "learning_rate": 4.0493511753618e-06,
11350
+ "loss": 1.6992,
11351
+ "step": 935500
11352
+ },
11353
+ {
11354
+ "epoch": 13.47,
11355
+ "learning_rate": 3.997632140576075e-06,
11356
+ "loss": 1.6999,
11357
+ "step": 936000
11358
+ },
11359
+ {
11360
+ "epoch": 13.48,
11361
+ "learning_rate": 3.94570539882334e-06,
11362
+ "loss": 1.7012,
11363
+ "step": 936500
11364
+ },
11365
+ {
11366
+ "epoch": 13.49,
11367
+ "learning_rate": 3.893986364037616e-06,
11368
+ "loss": 1.6972,
11369
+ "step": 937000
11370
+ },
11371
+ {
11372
+ "epoch": 13.49,
11373
+ "learning_rate": 3.84205962228488e-06,
11374
+ "loss": 1.6928,
11375
+ "step": 937500
11376
+ },
11377
+ {
11378
+ "epoch": 13.5,
11379
+ "learning_rate": 3.790340587499156e-06,
11380
+ "loss": 1.6992,
11381
+ "step": 938000
11382
+ },
11383
+ {
11384
+ "epoch": 13.51,
11385
+ "learning_rate": 3.7384138457464213e-06,
11386
+ "loss": 1.6987,
11387
+ "step": 938500
11388
+ },
11389
+ {
11390
+ "epoch": 13.52,
11391
+ "learning_rate": 3.686694810960697e-06,
11392
+ "loss": 1.6999,
11393
+ "step": 939000
11394
+ },
11395
+ {
11396
+ "epoch": 13.52,
11397
+ "learning_rate": 3.634768069207962e-06,
11398
+ "loss": 1.701,
11399
+ "step": 939500
11400
+ },
11401
+ {
11402
+ "epoch": 13.53,
11403
+ "learning_rate": 3.5830490344222375e-06,
11404
+ "loss": 1.7007,
11405
+ "step": 940000
11406
+ },
11407
+ {
11408
+ "epoch": 13.54,
11409
+ "learning_rate": 3.5311222926695015e-06,
11410
+ "loss": 1.6933,
11411
+ "step": 940500
11412
+ },
11413
+ {
11414
+ "epoch": 13.54,
11415
+ "learning_rate": 3.479403257883778e-06,
11416
+ "loss": 1.6991,
11417
+ "step": 941000
11418
+ },
11419
+ {
11420
+ "epoch": 13.55,
11421
+ "learning_rate": 3.4274765161310425e-06,
11422
+ "loss": 1.698,
11423
+ "step": 941500
11424
+ },
11425
+ {
11426
+ "epoch": 13.56,
11427
+ "learning_rate": 3.375757481345318e-06,
11428
+ "loss": 1.697,
11429
+ "step": 942000
11430
+ },
11431
+ {
11432
+ "epoch": 13.57,
11433
+ "learning_rate": 3.323830739592583e-06,
11434
+ "loss": 1.7021,
11435
+ "step": 942500
11436
+ },
11437
+ {
11438
+ "epoch": 13.57,
11439
+ "learning_rate": 3.2721117048068587e-06,
11440
+ "loss": 1.6991,
11441
+ "step": 943000
11442
+ },
11443
+ {
11444
+ "epoch": 13.58,
11445
+ "learning_rate": 3.2201849630541236e-06,
11446
+ "loss": 1.7001,
11447
+ "step": 943500
11448
+ },
11449
+ {
11450
+ "epoch": 13.59,
11451
+ "learning_rate": 3.1684659282683992e-06,
11452
+ "loss": 1.7013,
11453
+ "step": 944000
11454
+ },
11455
+ {
11456
+ "epoch": 13.6,
11457
+ "learning_rate": 3.116539186515664e-06,
11458
+ "loss": 1.7002,
11459
+ "step": 944500
11460
+ },
11461
+ {
11462
+ "epoch": 13.6,
11463
+ "learning_rate": 3.0648201517299398e-06,
11464
+ "loss": 1.7008,
11465
+ "step": 945000
11466
+ },
11467
+ {
11468
+ "epoch": 13.61,
11469
+ "learning_rate": 3.0128934099772042e-06,
11470
+ "loss": 1.6958,
11471
+ "step": 945500
11472
+ },
11473
+ {
11474
+ "epoch": 13.62,
11475
+ "learning_rate": 2.96117437519148e-06,
11476
+ "loss": 1.6979,
11477
+ "step": 946000
11478
+ },
11479
+ {
11480
+ "epoch": 13.62,
11481
+ "learning_rate": 2.9092476334387448e-06,
11482
+ "loss": 1.6968,
11483
+ "step": 946500
11484
+ },
11485
+ {
11486
+ "epoch": 13.63,
11487
+ "learning_rate": 2.8575285986530204e-06,
11488
+ "loss": 1.6985,
11489
+ "step": 947000
11490
+ },
11491
+ {
11492
+ "epoch": 13.64,
11493
+ "learning_rate": 2.8056018569002853e-06,
11494
+ "loss": 1.6994,
11495
+ "step": 947500
11496
+ },
11497
+ {
11498
+ "epoch": 13.65,
11499
+ "learning_rate": 2.753882822114561e-06,
11500
+ "loss": 1.6951,
11501
+ "step": 948000
11502
+ },
11503
+ {
11504
+ "epoch": 13.65,
11505
+ "learning_rate": 2.701956080361826e-06,
11506
+ "loss": 1.698,
11507
+ "step": 948500
11508
+ },
11509
+ {
11510
+ "epoch": 13.66,
11511
+ "learning_rate": 2.6502370455761015e-06,
11512
+ "loss": 1.7016,
11513
+ "step": 949000
11514
+ },
11515
+ {
11516
+ "epoch": 13.67,
11517
+ "learning_rate": 2.598310303823366e-06,
11518
+ "loss": 1.6996,
11519
+ "step": 949500
11520
+ },
11521
+ {
11522
+ "epoch": 13.67,
11523
+ "learning_rate": 2.5465912690376416e-06,
11524
+ "loss": 1.7003,
11525
+ "step": 950000
11526
+ },
11527
+ {
11528
+ "epoch": 13.68,
11529
+ "learning_rate": 2.494664527284907e-06,
11530
+ "loss": 1.7003,
11531
+ "step": 950500
11532
+ },
11533
+ {
11534
+ "epoch": 13.69,
11535
+ "learning_rate": 2.4429454924991826e-06,
11536
+ "loss": 1.6981,
11537
+ "step": 951000
11538
+ },
11539
+ {
11540
+ "epoch": 13.7,
11541
+ "learning_rate": 2.391018750746447e-06,
11542
+ "loss": 1.6989,
11543
+ "step": 951500
11544
+ },
11545
+ {
11546
+ "epoch": 13.7,
11547
+ "learning_rate": 2.3392997159607227e-06,
11548
+ "loss": 1.6978,
11549
+ "step": 952000
11550
+ },
11551
+ {
11552
+ "epoch": 13.71,
11553
+ "learning_rate": 2.287372974207987e-06,
11554
+ "loss": 1.6977,
11555
+ "step": 952500
11556
+ },
11557
+ {
11558
+ "epoch": 13.72,
11559
+ "learning_rate": 2.2356539394222632e-06,
11560
+ "loss": 1.6967,
11561
+ "step": 953000
11562
+ },
11563
+ {
11564
+ "epoch": 13.72,
11565
+ "learning_rate": 2.183727197669528e-06,
11566
+ "loss": 1.6991,
11567
+ "step": 953500
11568
+ },
11569
+ {
11570
+ "epoch": 13.73,
11571
+ "learning_rate": 2.1320081628838038e-06,
11572
+ "loss": 1.6996,
11573
+ "step": 954000
11574
+ },
11575
+ {
11576
+ "epoch": 13.74,
11577
+ "learning_rate": 2.0800814211310682e-06,
11578
+ "loss": 1.6975,
11579
+ "step": 954500
11580
+ },
11581
+ {
11582
+ "epoch": 13.75,
11583
+ "learning_rate": 2.0283623863453443e-06,
11584
+ "loss": 1.6967,
11585
+ "step": 955000
11586
+ },
11587
+ {
11588
+ "epoch": 13.75,
11589
+ "learning_rate": 1.9764356445926088e-06,
11590
+ "loss": 1.6981,
11591
+ "step": 955500
11592
+ },
11593
+ {
11594
+ "epoch": 13.76,
11595
+ "learning_rate": 1.9247166098068844e-06,
11596
+ "loss": 1.6951,
11597
+ "step": 956000
11598
+ },
11599
+ {
11600
+ "epoch": 13.77,
11601
+ "learning_rate": 1.872789868054149e-06,
11602
+ "loss": 1.6957,
11603
+ "step": 956500
11604
+ },
11605
+ {
11606
+ "epoch": 13.78,
11607
+ "learning_rate": 1.8210708332684252e-06,
11608
+ "loss": 1.6952,
11609
+ "step": 957000
11610
+ },
11611
+ {
11612
+ "epoch": 13.78,
11613
+ "learning_rate": 1.7691440915156898e-06,
11614
+ "loss": 1.6982,
11615
+ "step": 957500
11616
+ },
11617
+ {
11618
+ "epoch": 13.79,
11619
+ "learning_rate": 1.7174250567299655e-06,
11620
+ "loss": 1.6967,
11621
+ "step": 958000
11622
+ },
11623
+ {
11624
+ "epoch": 13.8,
11625
+ "learning_rate": 1.6654983149772302e-06,
11626
+ "loss": 1.6964,
11627
+ "step": 958500
11628
+ },
11629
+ {
11630
+ "epoch": 13.8,
11631
+ "learning_rate": 1.6137792801915058e-06,
11632
+ "loss": 1.7014,
11633
+ "step": 959000
11634
+ },
11635
+ {
11636
+ "epoch": 13.81,
11637
+ "learning_rate": 1.5618525384387705e-06,
11638
+ "loss": 1.6958,
11639
+ "step": 959500
11640
+ },
11641
+ {
11642
+ "epoch": 13.82,
11643
+ "learning_rate": 1.5101335036530463e-06,
11644
+ "loss": 1.6981,
11645
+ "step": 960000
11646
+ },
11647
+ {
11648
+ "epoch": 13.83,
11649
+ "learning_rate": 1.458206761900311e-06,
11650
+ "loss": 1.6965,
11651
+ "step": 960500
11652
+ },
11653
+ {
11654
+ "epoch": 13.83,
11655
+ "learning_rate": 1.4064877271145869e-06,
11656
+ "loss": 1.6983,
11657
+ "step": 961000
11658
+ },
11659
+ {
11660
+ "epoch": 13.84,
11661
+ "learning_rate": 1.3545609853618516e-06,
11662
+ "loss": 1.6996,
11663
+ "step": 961500
11664
+ },
11665
+ {
11666
+ "epoch": 13.85,
11667
+ "learning_rate": 1.3028419505761274e-06,
11668
+ "loss": 1.6969,
11669
+ "step": 962000
11670
+ },
11671
+ {
11672
+ "epoch": 13.85,
11673
+ "learning_rate": 1.250915208823392e-06,
11674
+ "loss": 1.6967,
11675
+ "step": 962500
11676
+ },
11677
+ {
11678
+ "epoch": 13.86,
11679
+ "learning_rate": 1.1991961740376677e-06,
11680
+ "loss": 1.696,
11681
+ "step": 963000
11682
+ },
11683
+ {
11684
+ "epoch": 13.87,
11685
+ "learning_rate": 1.1472694322849324e-06,
11686
+ "loss": 1.6942,
11687
+ "step": 963500
11688
+ },
11689
+ {
11690
+ "epoch": 13.88,
11691
+ "learning_rate": 1.0955503974992083e-06,
11692
+ "loss": 1.6953,
11693
+ "step": 964000
11694
+ },
11695
+ {
11696
+ "epoch": 13.88,
11697
+ "learning_rate": 1.043623655746473e-06,
11698
+ "loss": 1.7009,
11699
+ "step": 964500
11700
+ },
11701
+ {
11702
+ "epoch": 13.89,
11703
+ "learning_rate": 9.919046209607486e-07,
11704
+ "loss": 1.6978,
11705
+ "step": 965000
11706
+ },
11707
+ {
11708
+ "epoch": 13.9,
11709
+ "learning_rate": 9.399778792080134e-07,
11710
+ "loss": 1.6983,
11711
+ "step": 965500
11712
+ },
11713
+ {
11714
+ "epoch": 13.9,
11715
+ "learning_rate": 8.88258844422289e-07,
11716
+ "loss": 1.7025,
11717
+ "step": 966000
11718
+ },
11719
+ {
11720
+ "epoch": 13.91,
11721
+ "learning_rate": 8.363321026695538e-07,
11722
+ "loss": 1.6945,
11723
+ "step": 966500
11724
+ },
11725
+ {
11726
+ "epoch": 13.92,
11727
+ "learning_rate": 7.846130678838295e-07,
11728
+ "loss": 1.6922,
11729
+ "step": 967000
11730
+ },
11731
+ {
11732
+ "epoch": 13.93,
11733
+ "learning_rate": 7.326863261310943e-07,
11734
+ "loss": 1.6949,
11735
+ "step": 967500
11736
+ },
11737
+ {
11738
+ "epoch": 13.93,
11739
+ "learning_rate": 6.8096729134537e-07,
11740
+ "loss": 1.6979,
11741
+ "step": 968000
11742
+ },
11743
+ {
11744
+ "epoch": 13.94,
11745
+ "learning_rate": 6.290405495926348e-07,
11746
+ "loss": 1.695,
11747
+ "step": 968500
11748
+ },
11749
+ {
11750
+ "epoch": 13.95,
11751
+ "learning_rate": 5.773215148069104e-07,
11752
+ "loss": 1.6957,
11753
+ "step": 969000
11754
+ },
11755
+ {
11756
+ "epoch": 13.96,
11757
+ "learning_rate": 5.253947730541751e-07,
11758
+ "loss": 1.6958,
11759
+ "step": 969500
11760
+ },
11761
+ {
11762
+ "epoch": 13.96,
11763
+ "learning_rate": 4.736757382684509e-07,
11764
+ "loss": 1.6954,
11765
+ "step": 970000
11766
+ },
11767
+ {
11768
+ "epoch": 13.97,
11769
+ "learning_rate": 4.217489965157156e-07,
11770
+ "loss": 1.6977,
11771
+ "step": 970500
11772
+ },
11773
+ {
11774
+ "epoch": 13.98,
11775
+ "learning_rate": 3.700299617299913e-07,
11776
+ "loss": 1.6977,
11777
+ "step": 971000
11778
+ },
11779
+ {
11780
+ "epoch": 13.98,
11781
+ "learning_rate": 3.181032199772561e-07,
11782
+ "loss": 1.6974,
11783
+ "step": 971500
11784
+ },
11785
+ {
11786
+ "epoch": 13.99,
11787
+ "learning_rate": 2.663841851915318e-07,
11788
+ "loss": 1.6946,
11789
+ "step": 972000
11790
+ },
11791
+ {
11792
+ "epoch": 14.0,
11793
+ "learning_rate": 2.1445744343879657e-07,
11794
+ "loss": 1.6982,
11795
+ "step": 972500
11796
+ },
11797
+ {
11798
+ "epoch": 14.0,
11799
+ "eval_accuracy": 0.6685586444048556,
11800
+ "eval_loss": 1.5693359375,
11801
+ "eval_runtime": 652.5674,
11802
+ "eval_samples_per_second": 825.872,
11803
+ "eval_steps_per_second": 34.412,
11804
+ "step": 972622
11805
  }
11806
  ],
11807
  "max_steps": 972622,
11808
  "num_train_epochs": 14,
11809
+ "total_flos": 6.971418159270068e+18,
11810
  "trial_name": null,
11811
  "trial_params": null
11812
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2096fe3cee8a81b52b84f4f48f2f179a567c9a1d5dfb3efce794725caec30498
3
  size 59121639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2837d9f4f86873de1b0ebc8b7e038d8643cfa3a63d6d33fae61e8a5ac3ad6681
3
  size 59121639
runs/Feb22_11-16-27_user-SYS-5049A-TR/events.out.tfevents.1677032209.user-SYS-5049A-TR.55703.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccb158c383ae1eaea03391569adbbdae78112733ef482267e5adbf014ed5f3f2
3
- size 296582
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e35a2e6674dc9c66cbaf301b8bdd152690d11b5f9471e21af120b0ab4b53d1
3
+ size 319151