prxy5605 commited on
Commit
e4df702
·
verified ·
1 Parent(s): 83ab925

Training in progress, step 74, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c80fabba16dc0d7e036f5ae552235fc143ef01162865bc26d99027e726421b
3
  size 402688040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:424858a895f2a90f8f28e9d7ed3f3fce59d16625f079ed50fd05a9e3bce873b3
3
  size 402688040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a53cce6e2bc5ec7677fbb8519558f4d7b2a5243877618148af0fde97cfb70fb
3
  size 204773716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5b712189bca3b983faff78c85f33cda5a85aaae3a5787a7dc718844ca8af34e
3
  size 204773716
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c04b784607ecfebf2f8907cd8551c4fff661eeec6d7725d9cb07ae92a91a882d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b44d4076e0e7faa8598105a6ac2bf1d1cc7a099a32a53df9176c59ce4827def
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfdd42b3078aa2721ae65762bacf3e311ccf189e9060581e232d41dfc610dfff
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7105a509bdfde59e6e4d893f7ae8de5118e3beb17226743671d553509dc9662b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.1070915460586548,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 2.061224489795918,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -373,6 +373,174 @@
373
  "eval_samples_per_second": 20.797,
374
  "eval_steps_per_second": 5.447,
375
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 1,
@@ -396,12 +564,12 @@
396
  "should_evaluate": false,
397
  "should_log": false,
398
  "should_save": true,
399
- "should_training_stop": false
400
  },
401
  "attributes": {}
402
  }
403
  },
404
- "total_flos": 3.7584000516096e+16,
405
  "train_batch_size": 8,
406
  "trial_name": null,
407
  "trial_params": null
 
1
  {
2
  "best_metric": 1.1070915460586548,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 3.0510204081632653,
5
  "eval_steps": 50,
6
+ "global_step": 74,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
373
  "eval_samples_per_second": 20.797,
374
  "eval_steps_per_second": 5.447,
375
  "step": 50
376
+ },
377
+ {
378
+ "epoch": 2.1020408163265305,
379
+ "grad_norm": 2.2608895301818848,
380
+ "learning_rate": 2.8622245328485907e-05,
381
+ "loss": 3.3243,
382
+ "step": 51
383
+ },
384
+ {
385
+ "epoch": 2.142857142857143,
386
+ "grad_norm": 5.559860706329346,
387
+ "learning_rate": 2.6430163158700115e-05,
388
+ "loss": 3.5716,
389
+ "step": 52
390
+ },
391
+ {
392
+ "epoch": 2.183673469387755,
393
+ "grad_norm": 2.4335508346557617,
394
+ "learning_rate": 2.4294862790338917e-05,
395
+ "loss": 3.1162,
396
+ "step": 53
397
+ },
398
+ {
399
+ "epoch": 2.2244897959183674,
400
+ "grad_norm": 2.707456350326538,
401
+ "learning_rate": 2.2221488349019903e-05,
402
+ "loss": 4.0959,
403
+ "step": 54
404
+ },
405
+ {
406
+ "epoch": 2.2653061224489797,
407
+ "grad_norm": 2.8806371688842773,
408
+ "learning_rate": 2.0215034775378332e-05,
409
+ "loss": 3.0917,
410
+ "step": 55
411
+ },
412
+ {
413
+ "epoch": 2.306122448979592,
414
+ "grad_norm": 2.399998664855957,
415
+ "learning_rate": 1.8280335791817733e-05,
416
+ "loss": 2.7012,
417
+ "step": 56
418
+ },
419
+ {
420
+ "epoch": 2.3469387755102042,
421
+ "grad_norm": 2.528128147125244,
422
+ "learning_rate": 1.6422052257649078e-05,
423
+ "loss": 3.2158,
424
+ "step": 57
425
+ },
426
+ {
427
+ "epoch": 2.387755102040816,
428
+ "grad_norm": 2.2674336433410645,
429
+ "learning_rate": 1.4644660940672627e-05,
430
+ "loss": 3.2289,
431
+ "step": 58
432
+ },
433
+ {
434
+ "epoch": 2.4285714285714284,
435
+ "grad_norm": 2.6586036682128906,
436
+ "learning_rate": 1.2952443732252057e-05,
437
+ "loss": 3.2352,
438
+ "step": 59
439
+ },
440
+ {
441
+ "epoch": 2.4693877551020407,
442
+ "grad_norm": 3.082754373550415,
443
+ "learning_rate": 1.134947733186315e-05,
444
+ "loss": 3.1058,
445
+ "step": 60
446
+ },
447
+ {
448
+ "epoch": 2.510204081632653,
449
+ "grad_norm": 2.3137409687042236,
450
+ "learning_rate": 9.83962342596776e-06,
451
+ "loss": 3.1489,
452
+ "step": 61
453
+ },
454
+ {
455
+ "epoch": 2.5510204081632653,
456
+ "grad_norm": 3.146894693374634,
457
+ "learning_rate": 8.426519384872733e-06,
458
+ "loss": 3.7389,
459
+ "step": 62
460
+ },
461
+ {
462
+ "epoch": 2.5918367346938775,
463
+ "grad_norm": 2.5860869884490967,
464
+ "learning_rate": 7.1135694999864e-06,
465
+ "loss": 2.8921,
466
+ "step": 63
467
+ },
468
+ {
469
+ "epoch": 2.63265306122449,
470
+ "grad_norm": 2.651083469390869,
471
+ "learning_rate": 5.903936782582253e-06,
472
+ "loss": 3.3957,
473
+ "step": 64
474
+ },
475
+ {
476
+ "epoch": 2.673469387755102,
477
+ "grad_norm": 2.846827507019043,
478
+ "learning_rate": 4.800535343827833e-06,
479
+ "loss": 2.759,
480
+ "step": 65
481
+ },
482
+ {
483
+ "epoch": 2.7142857142857144,
484
+ "grad_norm": 3.2200374603271484,
485
+ "learning_rate": 3.8060233744356633e-06,
486
+ "loss": 2.8369,
487
+ "step": 66
488
+ },
489
+ {
490
+ "epoch": 2.7551020408163263,
491
+ "grad_norm": 3.225104808807373,
492
+ "learning_rate": 2.9227967408489653e-06,
493
+ "loss": 3.5187,
494
+ "step": 67
495
+ },
496
+ {
497
+ "epoch": 2.795918367346939,
498
+ "grad_norm": 2.8337438106536865,
499
+ "learning_rate": 2.152983213389559e-06,
500
+ "loss": 2.9697,
501
+ "step": 68
502
+ },
503
+ {
504
+ "epoch": 2.836734693877551,
505
+ "grad_norm": 2.468045949935913,
506
+ "learning_rate": 1.4984373402728014e-06,
507
+ "loss": 2.9315,
508
+ "step": 69
509
+ },
510
+ {
511
+ "epoch": 2.877551020408163,
512
+ "grad_norm": 3.3334028720855713,
513
+ "learning_rate": 9.607359798384785e-07,
514
+ "loss": 3.4555,
515
+ "step": 70
516
+ },
517
+ {
518
+ "epoch": 2.9183673469387754,
519
+ "grad_norm": 3.127939224243164,
520
+ "learning_rate": 5.411745017609493e-07,
521
+ "loss": 3.4373,
522
+ "step": 71
523
+ },
524
+ {
525
+ "epoch": 2.9591836734693877,
526
+ "grad_norm": 3.0887768268585205,
527
+ "learning_rate": 2.407636663901591e-07,
528
+ "loss": 2.9134,
529
+ "step": 72
530
+ },
531
+ {
532
+ "epoch": 3.010204081632653,
533
+ "grad_norm": 2.975400924682617,
534
+ "learning_rate": 6.022718974137975e-08,
535
+ "loss": 3.4059,
536
+ "step": 73
537
+ },
538
+ {
539
+ "epoch": 3.0510204081632653,
540
+ "grad_norm": 2.2996485233306885,
541
+ "learning_rate": 0.0,
542
+ "loss": 3.0027,
543
+ "step": 74
544
  }
545
  ],
546
  "logging_steps": 1,
 
564
  "should_evaluate": false,
565
  "should_log": false,
566
  "should_save": true,
567
+ "should_training_stop": true
568
  },
569
  "attributes": {}
570
  }
571
  },
572
+ "total_flos": 5.562432076382208e+16,
573
  "train_batch_size": 8,
574
  "trial_name": null,
575
  "trial_params": null