error577 commited on
Commit
ffd5671
·
verified ·
1 Parent(s): af2bf9e

Training in progress, step 1540, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8ca0cba6e6a99c2851249a2e6ed60e10abe6b88020013ba9ee3f0e7f1c70fe
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41429cf5571ce18285a141d961caa3f0f825a72c254ea213d3abcf32d26cf102
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c16ff90267c588b5044fb7db4096eea96f708905cc9ffa37cc90fc09b54759b
3
  size 511723621
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944e69c6bc8e57b2bf73b9346c469498e8ae2c8baaba15889acd8b4c7669601e
3
  size 511723621
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05e8f9f3a20550c9a15ff7fdae2661fabd0e379d66918792caa6d978fd6c6e88
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a52571f49e27c7eab31d97001843fe6934ba9e93fcb85a79308d038f084c2a
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:570bee8fc5bb0a7fce804ef10812bf516398a83bcb0b67c8617738fc33ea82b8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45489b655fbbe1ef592059bf1880ab60c4a3129300be9869b9bec372ecbdf27e
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 1480,
3
- "best_metric": 1.8547732830047607,
4
- "best_model_checkpoint": "miner_id_24/checkpoint-1480",
5
- "epoch": 0.08647664561643056,
6
  "eval_steps": 20,
7
- "global_step": 1520,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -11264,6 +11264,154 @@
11264
  "eval_samples_per_second": 2.182,
11265
  "eval_steps_per_second": 1.091,
11266
  "step": 1520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11267
  }
11268
  ],
11269
  "logging_steps": 1,
@@ -11278,7 +11426,7 @@
11278
  "early_stopping_threshold": 0.0
11279
  },
11280
  "attributes": {
11281
- "early_stopping_patience_counter": 2
11282
  }
11283
  },
11284
  "TrainerControl": {
@@ -11292,7 +11440,7 @@
11292
  "attributes": {}
11293
  }
11294
  },
11295
- "total_flos": 2.1751194217925837e+18,
11296
  "train_batch_size": 2,
11297
  "trial_name": null,
11298
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1540,
3
+ "best_metric": 1.8535445928573608,
4
+ "best_model_checkpoint": "miner_id_24/checkpoint-1540",
5
+ "epoch": 0.08761449621664676,
6
  "eval_steps": 20,
7
+ "global_step": 1540,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
11264
  "eval_samples_per_second": 2.182,
11265
  "eval_steps_per_second": 1.091,
11266
  "step": 1520
11267
+ },
11268
+ {
11269
+ "epoch": 0.08653353814644137,
11270
+ "grad_norm": 0.43319040536880493,
11271
+ "learning_rate": 0.0001995954582663373,
11272
+ "loss": 1.9505,
11273
+ "step": 1521
11274
+ },
11275
+ {
11276
+ "epoch": 0.08659043067645218,
11277
+ "grad_norm": 0.457925409078598,
11278
+ "learning_rate": 0.00019959492263420728,
11279
+ "loss": 1.7886,
11280
+ "step": 1522
11281
+ },
11282
+ {
11283
+ "epoch": 0.086647323206463,
11284
+ "grad_norm": 0.4349290728569031,
11285
+ "learning_rate": 0.00019959438664843054,
11286
+ "loss": 1.9586,
11287
+ "step": 1523
11288
+ },
11289
+ {
11290
+ "epoch": 0.0867042157364738,
11291
+ "grad_norm": 0.4642198979854584,
11292
+ "learning_rate": 0.0001995938503090089,
11293
+ "loss": 1.7877,
11294
+ "step": 1524
11295
+ },
11296
+ {
11297
+ "epoch": 0.08676110826648462,
11298
+ "grad_norm": 0.4763820171356201,
11299
+ "learning_rate": 0.00019959331361594438,
11300
+ "loss": 1.5194,
11301
+ "step": 1525
11302
+ },
11303
+ {
11304
+ "epoch": 0.08681800079649542,
11305
+ "grad_norm": 0.4941880702972412,
11306
+ "learning_rate": 0.00019959277656923883,
11307
+ "loss": 1.8568,
11308
+ "step": 1526
11309
+ },
11310
+ {
11311
+ "epoch": 0.08687489332650623,
11312
+ "grad_norm": 0.4190940260887146,
11313
+ "learning_rate": 0.00019959223916889412,
11314
+ "loss": 1.7104,
11315
+ "step": 1527
11316
+ },
11317
+ {
11318
+ "epoch": 0.08693178585651704,
11319
+ "grad_norm": 0.5554546117782593,
11320
+ "learning_rate": 0.0001995917014149122,
11321
+ "loss": 1.6767,
11322
+ "step": 1528
11323
+ },
11324
+ {
11325
+ "epoch": 0.08698867838652785,
11326
+ "grad_norm": 0.5352862477302551,
11327
+ "learning_rate": 0.00019959116330729498,
11328
+ "loss": 1.8475,
11329
+ "step": 1529
11330
+ },
11331
+ {
11332
+ "epoch": 0.08704557091653865,
11333
+ "grad_norm": 0.4974481165409088,
11334
+ "learning_rate": 0.00019959062484604438,
11335
+ "loss": 1.8802,
11336
+ "step": 1530
11337
+ },
11338
+ {
11339
+ "epoch": 0.08710246344654947,
11340
+ "grad_norm": 0.5199314951896667,
11341
+ "learning_rate": 0.00019959008603116226,
11342
+ "loss": 1.7377,
11343
+ "step": 1531
11344
+ },
11345
+ {
11346
+ "epoch": 0.08715935597656027,
11347
+ "grad_norm": 0.49977415800094604,
11348
+ "learning_rate": 0.0001995895468626506,
11349
+ "loss": 1.8371,
11350
+ "step": 1532
11351
+ },
11352
+ {
11353
+ "epoch": 0.08721624850657109,
11354
+ "grad_norm": 0.49237269163131714,
11355
+ "learning_rate": 0.0001995890073405113,
11356
+ "loss": 1.954,
11357
+ "step": 1533
11358
+ },
11359
+ {
11360
+ "epoch": 0.0872731410365819,
11361
+ "grad_norm": 0.4401033818721771,
11362
+ "learning_rate": 0.00019958846746474622,
11363
+ "loss": 1.7293,
11364
+ "step": 1534
11365
+ },
11366
+ {
11367
+ "epoch": 0.0873300335665927,
11368
+ "grad_norm": 0.5493918061256409,
11369
+ "learning_rate": 0.00019958792723535733,
11370
+ "loss": 1.7184,
11371
+ "step": 1535
11372
+ },
11373
+ {
11374
+ "epoch": 0.08738692609660352,
11375
+ "grad_norm": 0.4993182420730591,
11376
+ "learning_rate": 0.00019958738665234655,
11377
+ "loss": 1.885,
11378
+ "step": 1536
11379
+ },
11380
+ {
11381
+ "epoch": 0.08744381862661432,
11382
+ "grad_norm": 0.4622989892959595,
11383
+ "learning_rate": 0.00019958684571571574,
11384
+ "loss": 1.6466,
11385
+ "step": 1537
11386
+ },
11387
+ {
11388
+ "epoch": 0.08750071115662514,
11389
+ "grad_norm": 0.5067393779754639,
11390
+ "learning_rate": 0.0001995863044254669,
11391
+ "loss": 1.664,
11392
+ "step": 1538
11393
+ },
11394
+ {
11395
+ "epoch": 0.08755760368663594,
11396
+ "grad_norm": 0.5939873456954956,
11397
+ "learning_rate": 0.00019958576278160191,
11398
+ "loss": 2.0013,
11399
+ "step": 1539
11400
+ },
11401
+ {
11402
+ "epoch": 0.08761449621664676,
11403
+ "grad_norm": 0.48256736993789673,
11404
+ "learning_rate": 0.0001995852207841227,
11405
+ "loss": 1.8087,
11406
+ "step": 1540
11407
+ },
11408
+ {
11409
+ "epoch": 0.08761449621664676,
11410
+ "eval_loss": 1.8535445928573608,
11411
+ "eval_runtime": 129.0453,
11412
+ "eval_samples_per_second": 2.185,
11413
+ "eval_steps_per_second": 1.093,
11414
+ "step": 1540
11415
  }
11416
  ],
11417
  "logging_steps": 1,
 
11426
  "early_stopping_threshold": 0.0
11427
  },
11428
  "attributes": {
11429
+ "early_stopping_patience_counter": 0
11430
  }
11431
  },
11432
  "TrainerControl": {
 
11440
  "attributes": {}
11441
  }
11442
  },
11443
+ "total_flos": 2.2037394141845914e+18,
11444
  "train_batch_size": 2,
11445
  "trial_name": null,
11446
  "trial_params": null