Training in progress, step 769, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +495 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c067e9b7f288530ab39326f6e570c05f17a138323d318e18c4f5ef2a3a307d7f
 size 78480072

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf532bff9ad6147dd88328720e03b44878aa6b0931a77cd997d677bdba34e871
 size 78480072

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ebf985949d5355ffd617b0ec9baa62c507546545c51519c7db5be5e381aabb8
 size 40131524

 version https://git-lfs.github.com/spec/v1
+oid sha256:21819e721894f2463b4a3a69c9c1c6f71462d4e01d9fe1b76da32267867a5726
 size 40131524

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5eeb22bcf29c8e6133c89cd9fd221f0894b2376a305e63e6775d3448ca845c80
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:72cb262ec7962a2bec51b38ffd60bf00b445c7c161ad40a55d35f9ba16677aa8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c86598b782c348ab7928decefae2f5953cb6a5fed8d04870c2a2a6ffe78cc4ad
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bcc6b391be8ff7ed73c430f9b92a688f34c68358531300f1a61ed95351f819a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.4688050746917725,
   "best_model_checkpoint": "miner_id_24/checkpoint-700",
-  "epoch": 0.9103470698203691,
   "eval_steps": 50,
-  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5027,6 +5027,497 @@
       "eval_samples_per_second": 27.933,
       "eval_steps_per_second": 6.986,
       "step": 700
     }
   ],
   "logging_steps": 1,
@@ -5050,12 +5541,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.5096338217854566e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.4688050746917725,
   "best_model_checkpoint": "miner_id_24/checkpoint-700",
+  "epoch": 1.0004064049418842,
   "eval_steps": 50,
+  "global_step": 769,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.933,
       "eval_steps_per_second": 6.986,
       "step": 700
+    },
+    {
+      "epoch": 0.9116475656343981,
+      "grad_norm": 0.7602055668830872,
+      "learning_rate": 3.934912380055289e-06,
+      "loss": 1.4222,
+      "step": 701
+    },
+    {
+      "epoch": 0.9129480614484272,
+      "grad_norm": 0.7412660717964172,
+      "learning_rate": 3.820767937093095e-06,
+      "loss": 1.4024,
+      "step": 702
+    },
+    {
+      "epoch": 0.9142485572624564,
+      "grad_norm": 0.6979288458824158,
+      "learning_rate": 3.7082712652200867e-06,
+      "loss": 1.4652,
+      "step": 703
+    },
+    {
+      "epoch": 0.9155490530764854,
+      "grad_norm": 0.659394383430481,
+      "learning_rate": 3.5974242917625187e-06,
+      "loss": 1.4534,
+      "step": 704
+    },
+    {
+      "epoch": 0.9168495488905145,
+      "grad_norm": 0.7618691325187683,
+      "learning_rate": 3.488228915783631e-06,
+      "loss": 1.3859,
+      "step": 705
+    },
+    {
+      "epoch": 0.9181500447045436,
+      "grad_norm": 0.6013392806053162,
+      "learning_rate": 3.380687008050909e-06,
+      "loss": 1.4909,
+      "step": 706
+    },
+    {
+      "epoch": 0.9194505405185727,
+      "grad_norm": 0.6869235634803772,
+      "learning_rate": 3.2748004110041863e-06,
+      "loss": 1.3593,
+      "step": 707
+    },
+    {
+      "epoch": 0.9207510363326018,
+      "grad_norm": 0.7875847816467285,
+      "learning_rate": 3.1705709387239934e-06,
+      "loss": 1.5395,
+      "step": 708
+    },
+    {
+      "epoch": 0.9220515321466309,
+      "grad_norm": 0.7080655097961426,
+      "learning_rate": 3.068000376900515e-06,
+      "loss": 1.4826,
+      "step": 709
+    },
+    {
+      "epoch": 0.92335202796066,
+      "grad_norm": 0.7241777181625366,
+      "learning_rate": 2.9670904828030033e-06,
+      "loss": 1.4333,
+      "step": 710
+    },
+    {
+      "epoch": 0.9246525237746891,
+      "grad_norm": 0.745314359664917,
+      "learning_rate": 2.8678429852496467e-06,
+      "loss": 1.3511,
+      "step": 711
+    },
+    {
+      "epoch": 0.9259530195887182,
+      "grad_norm": 0.7951449751853943,
+      "learning_rate": 2.770259584577972e-06,
+      "loss": 1.4815,
+      "step": 712
+    },
+    {
+      "epoch": 0.9272535154027473,
+      "grad_norm": 0.6265352964401245,
+      "learning_rate": 2.6743419526157e-06,
+      "loss": 1.511,
+      "step": 713
+    },
+    {
+      "epoch": 0.9285540112167764,
+      "grad_norm": 0.6809277534484863,
+      "learning_rate": 2.580091732652101e-06,
+      "loss": 1.4647,
+      "step": 714
+    },
+    {
+      "epoch": 0.9298545070308055,
+      "grad_norm": 1.0379259586334229,
+      "learning_rate": 2.4875105394098654e-06,
+      "loss": 1.6214,
+      "step": 715
+    },
+    {
+      "epoch": 0.9311550028448345,
+      "grad_norm": 0.8921117782592773,
+      "learning_rate": 2.3965999590174095e-06,
+      "loss": 1.5787,
+      "step": 716
+    },
+    {
+      "epoch": 0.9324554986588637,
+      "grad_norm": 0.8291761875152588,
+      "learning_rate": 2.3073615489817235e-06,
+      "loss": 1.3749,
+      "step": 717
+    },
+    {
+      "epoch": 0.9337559944728928,
+      "grad_norm": 0.734061598777771,
+      "learning_rate": 2.219796838161681e-06,
+      "loss": 1.3417,
+      "step": 718
+    },
+    {
+      "epoch": 0.9350564902869218,
+      "grad_norm": 0.7294670343399048,
+      "learning_rate": 2.1339073267418464e-06,
+      "loss": 1.42,
+      "step": 719
+    },
+    {
+      "epoch": 0.936356986100951,
+      "grad_norm": 0.7059687972068787,
+      "learning_rate": 2.0496944862067656e-06,
+      "loss": 1.5228,
+      "step": 720
+    },
+    {
+      "epoch": 0.9376574819149801,
+      "grad_norm": 0.7029430270195007,
+      "learning_rate": 1.967159759315751e-06,
+      "loss": 1.4259,
+      "step": 721
+    },
+    {
+      "epoch": 0.9389579777290091,
+      "grad_norm": 0.7115610241889954,
+      "learning_rate": 1.8863045600782003e-06,
+      "loss": 1.5353,
+      "step": 722
+    },
+    {
+      "epoch": 0.9402584735430383,
+      "grad_norm": 0.6966084241867065,
+      "learning_rate": 1.8071302737293295e-06,
+      "loss": 1.4112,
+      "step": 723
+    },
+    {
+      "epoch": 0.9415589693570674,
+      "grad_norm": 0.6711810827255249,
+      "learning_rate": 1.7296382567064672e-06,
+      "loss": 1.5192,
+      "step": 724
+    },
+    {
+      "epoch": 0.9428594651710965,
+      "grad_norm": 0.7636929154396057,
+      "learning_rate": 1.6538298366257976e-06,
+      "loss": 1.3406,
+      "step": 725
+    },
+    {
+      "epoch": 0.9441599609851256,
+      "grad_norm": 0.7429101467132568,
+      "learning_rate": 1.57970631225961e-06,
+      "loss": 1.4707,
+      "step": 726
+    },
+    {
+      "epoch": 0.9454604567991547,
+      "grad_norm": 0.7741535902023315,
+      "learning_rate": 1.5072689535141072e-06,
+      "loss": 1.5783,
+      "step": 727
+    },
+    {
+      "epoch": 0.9467609526131838,
+      "grad_norm": 0.7151786684989929,
+      "learning_rate": 1.4365190014075437e-06,
+      "loss": 1.5202,
+      "step": 728
+    },
+    {
+      "epoch": 0.9480614484272128,
+      "grad_norm": 0.793623685836792,
+      "learning_rate": 1.3674576680490659e-06,
+      "loss": 1.4622,
+      "step": 729
+    },
+    {
+      "epoch": 0.949361944241242,
+      "grad_norm": 0.6933899521827698,
+      "learning_rate": 1.3000861366179062e-06,
+      "loss": 1.4376,
+      "step": 730
+    },
+    {
+      "epoch": 0.9506624400552711,
+      "grad_norm": 0.826909065246582,
+      "learning_rate": 1.234405561343066e-06,
+      "loss": 1.6414,
+      "step": 731
+    },
+    {
+      "epoch": 0.9519629358693001,
+      "grad_norm": 0.7606424689292908,
+      "learning_rate": 1.1704170674836313e-06,
+      "loss": 1.474,
+      "step": 732
+    },
+    {
+      "epoch": 0.9532634316833293,
+      "grad_norm": 0.7522351145744324,
+      "learning_rate": 1.1081217513094212e-06,
+      "loss": 1.353,
+      "step": 733
+    },
+    {
+      "epoch": 0.9545639274973584,
+      "grad_norm": 0.6792885661125183,
+      "learning_rate": 1.047520680082248e-06,
+      "loss": 1.4632,
+      "step": 734
+    },
+    {
+      "epoch": 0.9558644233113874,
+      "grad_norm": 0.7526161074638367,
+      "learning_rate": 9.886148920376203e-07,
+      "loss": 1.4683,
+      "step": 735
+    },
+    {
+      "epoch": 0.9571649191254166,
+      "grad_norm": 0.7043469548225403,
+      "learning_rate": 9.314053963669245e-07,
+      "loss": 1.64,
+      "step": 736
+    },
+    {
+      "epoch": 0.9584654149394457,
+      "grad_norm": 0.6877920627593994,
+      "learning_rate": 8.75893173200204e-07,
+      "loss": 1.4917,
+      "step": 737
+    },
+    {
+      "epoch": 0.9597659107534747,
+      "grad_norm": 0.7135722637176514,
+      "learning_rate": 8.220791735892964e-07,
+      "loss": 1.4555,
+      "step": 738
+    },
+    {
+      "epoch": 0.9610664065675039,
+      "grad_norm": 0.7366563677787781,
+      "learning_rate": 7.699643194915784e-07,
+      "loss": 1.4801,
+      "step": 739
+    },
+    {
+      "epoch": 0.962366902381533,
+      "grad_norm": 0.600986123085022,
+      "learning_rate": 7.1954950375418e-07,
+      "loss": 1.4188,
+      "step": 740
+    },
+    {
+      "epoch": 0.963667398195562,
+      "grad_norm": 0.682475745677948,
+      "learning_rate": 6.708355900986396e-07,
+      "loss": 1.4593,
+      "step": 741
+    },
+    {
+      "epoch": 0.9649678940095912,
+      "grad_norm": 0.8102880120277405,
+      "learning_rate": 6.238234131061616e-07,
+      "loss": 1.4449,
+      "step": 742
+    },
+    {
+      "epoch": 0.9662683898236203,
+      "grad_norm": 0.6956253051757812,
+      "learning_rate": 5.785137782032824e-07,
+      "loss": 1.3999,
+      "step": 743
+    },
+    {
+      "epoch": 0.9675688856376493,
+      "grad_norm": 0.7653059363365173,
+      "learning_rate": 5.349074616480931e-07,
+      "loss": 1.5632,
+      "step": 744
+    },
+    {
+      "epoch": 0.9688693814516784,
+      "grad_norm": 0.784920334815979,
+      "learning_rate": 4.93005210516928e-07,
+      "loss": 1.5078,
+      "step": 745
+    },
+    {
+      "epoch": 0.9701698772657076,
+      "grad_norm": 0.8322311043739319,
+      "learning_rate": 4.5280774269154115e-07,
+      "loss": 1.5513,
+      "step": 746
+    },
+    {
+      "epoch": 0.9714703730797366,
+      "grad_norm": 0.8491717576980591,
+      "learning_rate": 4.143157468468717e-07,
+      "loss": 1.4737,
+      "step": 747
+    },
+    {
+      "epoch": 0.9727708688937657,
+      "grad_norm": 0.6789981722831726,
+      "learning_rate": 3.775298824391982e-07,
+      "loss": 1.5004,
+      "step": 748
+    },
+    {
+      "epoch": 0.9740713647077949,
+      "grad_norm": 0.7747517824172974,
+      "learning_rate": 3.424507796948362e-07,
+      "loss": 1.5806,
+      "step": 749
+    },
+    {
+      "epoch": 0.9753718605218239,
+      "grad_norm": 0.8059523105621338,
+      "learning_rate": 3.090790395993692e-07,
+      "loss": 1.4772,
+      "step": 750
+    },
+    {
+      "epoch": 0.9753718605218239,
+      "eval_loss": 1.4685416221618652,
+      "eval_runtime": 92.6813,
+      "eval_samples_per_second": 27.956,
+      "eval_steps_per_second": 6.992,
+      "step": 750
+    },
+    {
+      "epoch": 0.976672356335853,
+      "grad_norm": 0.6508041024208069,
+      "learning_rate": 2.774152338873126e-07,
+      "loss": 1.4536,
+      "step": 751
+    },
+    {
+      "epoch": 0.9779728521498822,
+      "grad_norm": 0.7037675380706787,
+      "learning_rate": 2.474599050323989e-07,
+      "loss": 1.4824,
+      "step": 752
+    },
+    {
+      "epoch": 0.9792733479639112,
+      "grad_norm": 0.7295902967453003,
+      "learning_rate": 2.1921356623816336e-07,
+      "loss": 1.3991,
+      "step": 753
+    },
+    {
+      "epoch": 0.9805738437779403,
+      "grad_norm": 0.6926706433296204,
+      "learning_rate": 1.9267670142926187e-07,
+      "loss": 1.3409,
+      "step": 754
+    },
+    {
+      "epoch": 0.9818743395919695,
+      "grad_norm": 0.8004979491233826,
+      "learning_rate": 1.6784976524312213e-07,
+      "loss": 1.4575,
+      "step": 755
+    },
+    {
+      "epoch": 0.9831748354059985,
+      "grad_norm": 0.6473626494407654,
+      "learning_rate": 1.4473318302216098e-07,
+      "loss": 1.2736,
+      "step": 756
+    },
+    {
+      "epoch": 0.9844753312200276,
+      "grad_norm": 0.7723749876022339,
+      "learning_rate": 1.2332735080651248e-07,
+      "loss": 1.386,
+      "step": 757
+    },
+    {
+      "epoch": 0.9857758270340568,
+      "grad_norm": 0.6859511733055115,
+      "learning_rate": 1.0363263532724432e-07,
+      "loss": 1.4629,
+      "step": 758
+    },
+    {
+      "epoch": 0.9870763228480859,
+      "grad_norm": 0.7101691961288452,
+      "learning_rate": 8.564937400004081e-08,
+      "loss": 1.534,
+      "step": 759
+    },
+    {
+      "epoch": 0.9883768186621149,
+      "grad_norm": 0.7447732090950012,
+      "learning_rate": 6.9377874919474e-08,
+      "loss": 1.3709,
+      "step": 760
+    },
+    {
+      "epoch": 0.989677314476144,
+      "grad_norm": 0.6958511471748352,
+      "learning_rate": 5.4818416853674726e-08,
+      "loss": 1.3528,
+      "step": 761
+    },
+    {
+      "epoch": 0.9909778102901732,
+      "grad_norm": 0.8112965226173401,
+      "learning_rate": 4.1971249239591834e-08,
+      "loss": 1.6432,
+      "step": 762
+    },
+    {
+      "epoch": 0.9922783061042022,
+      "grad_norm": 0.7658011317253113,
+      "learning_rate": 3.0836592178717926e-08,
+      "loss": 1.5357,
+      "step": 763
+    },
+    {
+      "epoch": 0.9935788019182313,
+      "grad_norm": 0.7664233446121216,
+      "learning_rate": 2.141463643328123e-08,
+      "loss": 1.5152,
+      "step": 764
+    },
+    {
+      "epoch": 0.9948792977322605,
+      "grad_norm": 0.6668312549591064,
+      "learning_rate": 1.370554342302599e-08,
+      "loss": 1.4316,
+      "step": 765
+    },
+    {
+      "epoch": 0.9961797935462895,
+      "grad_norm": 0.7010061144828796,
+      "learning_rate": 7.709445222403577e-09,
+      "loss": 1.4363,
+      "step": 766
+    },
+    {
+      "epoch": 0.9974802893603186,
+      "grad_norm": 0.7528097629547119,
+      "learning_rate": 3.4264445583631622e-09,
+      "loss": 1.6157,
+      "step": 767
+    },
+    {
+      "epoch": 0.9987807851743478,
+      "grad_norm": 0.662280261516571,
+      "learning_rate": 8.566148085309423e-10,
+      "loss": 1.3786,
+      "step": 768
+    },
+    {
+      "epoch": 1.0004064049418842,
+      "grad_norm": 0.988018274307251,
+      "learning_rate": 0.0,
+      "loss": 1.7631,
+      "step": 769
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.052677812433715e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null