Training in progress, step 808, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +214 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7629bac28a01650e0c47aae68065dd14a3871a16dc19535a5f47d97ef7f593e3
 size 362303176

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fea8921cf12d4a5d9e2fddeda14297240eb814e6b2f5cb6284bdf0760cf8536
 size 362303176

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:56c5762af4816dd09402e536fddd14c4f886af7c31765384de8fda2510100c78
 size 724761914

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b0a27fa7ceb085a02c901324986360a90328487e76a2815c4e3396f9977dca4
 size 724761914

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f29f1a356a90bc512795986655867fd11582b804d45eacb9816a4ff5d2939220
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:11ddf4fb347b7d3d57d8a71705558f16da3c5a60f302bda9ec16f52d333df642
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18b984273ea2d45b7ffb1d047bb359d93111e41fcad70d16a1b453fd38f72636
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:98c7a0a1cf08c2ee753de1f791f907cd20c65ab05d5d3b10185646939c648d81
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ee86abee0989df8e1c5461d5ecfc6d42b43c8879a30063d7c1ee114f0c589f6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9e16866d142d224011d2a90c46b632f4935f23811dc8dbb420e91a6b2340c0e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.4764267990074442,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -366,6 +366,216 @@
       "eval_samples_per_second": 37.43,
       "eval_steps_per_second": 4.687,
       "step": 500
     }
   ],
   "logging_steps": 10,
@@ -380,12 +590,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.182556306040422e+16,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.0,
   "eval_steps": 500,
+  "global_step": 808,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 37.43,
       "eval_steps_per_second": 4.687,
       "step": 500
+    },
+    {
+      "epoch": 2.5260545905707197,
+      "grad_norm": 1.716925859451294,
+      "learning_rate": 0.0001,
+      "loss": 1.5984,
+      "step": 510
+    },
+    {
+      "epoch": 2.575682382133995,
+      "grad_norm": 1.4898698329925537,
+      "learning_rate": 0.0001,
+      "loss": 1.5929,
+      "step": 520
+    },
+    {
+      "epoch": 2.6253101736972706,
+      "grad_norm": 1.3290361166000366,
+      "learning_rate": 0.0001,
+      "loss": 1.5847,
+      "step": 530
+    },
+    {
+      "epoch": 2.674937965260546,
+      "grad_norm": 1.2288880348205566,
+      "learning_rate": 0.0001,
+      "loss": 1.5859,
+      "step": 540
+    },
+    {
+      "epoch": 2.7245657568238215,
+      "grad_norm": 1.0679349899291992,
+      "learning_rate": 0.0001,
+      "loss": 1.5851,
+      "step": 550
+    },
+    {
+      "epoch": 2.774193548387097,
+      "grad_norm": 1.0576269626617432,
+      "learning_rate": 0.0001,
+      "loss": 1.5777,
+      "step": 560
+    },
+    {
+      "epoch": 2.8238213399503724,
+      "grad_norm": 1.5344107151031494,
+      "learning_rate": 0.0001,
+      "loss": 1.5861,
+      "step": 570
+    },
+    {
+      "epoch": 2.873449131513648,
+      "grad_norm": 1.2966816425323486,
+      "learning_rate": 0.0001,
+      "loss": 1.5907,
+      "step": 580
+    },
+    {
+      "epoch": 2.9230769230769234,
+      "grad_norm": 1.2389014959335327,
+      "learning_rate": 0.0001,
+      "loss": 1.5683,
+      "step": 590
+    },
+    {
+      "epoch": 2.9727047146401984,
+      "grad_norm": 1.6558314561843872,
+      "learning_rate": 0.0001,
+      "loss": 1.5772,
+      "step": 600
+    },
+    {
+      "epoch": 3.0198511166253104,
+      "grad_norm": 1.3844249248504639,
+      "learning_rate": 0.0001,
+      "loss": 1.4848,
+      "step": 610
+    },
+    {
+      "epoch": 3.069478908188586,
+      "grad_norm": 1.4529865980148315,
+      "learning_rate": 0.0001,
+      "loss": 1.5532,
+      "step": 620
+    },
+    {
+      "epoch": 3.119106699751861,
+      "grad_norm": 2.1029598712921143,
+      "learning_rate": 0.0001,
+      "loss": 1.5762,
+      "step": 630
+    },
+    {
+      "epoch": 3.1687344913151363,
+      "grad_norm": 1.028609275817871,
+      "learning_rate": 0.0001,
+      "loss": 1.5452,
+      "step": 640
+    },
+    {
+      "epoch": 3.2183622828784118,
+      "grad_norm": 1.214414358139038,
+      "learning_rate": 0.0001,
+      "loss": 1.5548,
+      "step": 650
+    },
+    {
+      "epoch": 3.267990074441687,
+      "grad_norm": 1.6931719779968262,
+      "learning_rate": 0.0001,
+      "loss": 1.545,
+      "step": 660
+    },
+    {
+      "epoch": 3.3176178660049627,
+      "grad_norm": 1.1534652709960938,
+      "learning_rate": 0.0001,
+      "loss": 1.5385,
+      "step": 670
+    },
+    {
+      "epoch": 3.367245657568238,
+      "grad_norm": 1.2802734375,
+      "learning_rate": 0.0001,
+      "loss": 1.5327,
+      "step": 680
+    },
+    {
+      "epoch": 3.4168734491315136,
+      "grad_norm": 1.7800501585006714,
+      "learning_rate": 0.0001,
+      "loss": 1.5577,
+      "step": 690
+    },
+    {
+      "epoch": 3.466501240694789,
+      "grad_norm": 1.2474421262741089,
+      "learning_rate": 0.0001,
+      "loss": 1.5394,
+      "step": 700
+    },
+    {
+      "epoch": 3.5161290322580645,
+      "grad_norm": 1.0985565185546875,
+      "learning_rate": 0.0001,
+      "loss": 1.557,
+      "step": 710
+    },
+    {
+      "epoch": 3.56575682382134,
+      "grad_norm": 1.0926990509033203,
+      "learning_rate": 0.0001,
+      "loss": 1.566,
+      "step": 720
+    },
+    {
+      "epoch": 3.6153846153846154,
+      "grad_norm": 1.0945656299591064,
+      "learning_rate": 0.0001,
+      "loss": 1.5391,
+      "step": 730
+    },
+    {
+      "epoch": 3.665012406947891,
+      "grad_norm": 1.6432324647903442,
+      "learning_rate": 0.0001,
+      "loss": 1.5572,
+      "step": 740
+    },
+    {
+      "epoch": 3.7146401985111663,
+      "grad_norm": 1.3223881721496582,
+      "learning_rate": 0.0001,
+      "loss": 1.538,
+      "step": 750
+    },
+    {
+      "epoch": 3.764267990074442,
+      "grad_norm": 1.4920518398284912,
+      "learning_rate": 0.0001,
+      "loss": 1.5552,
+      "step": 760
+    },
+    {
+      "epoch": 3.8138957816377173,
+      "grad_norm": 0.9920836687088013,
+      "learning_rate": 0.0001,
+      "loss": 1.5224,
+      "step": 770
+    },
+    {
+      "epoch": 3.8635235732009927,
+      "grad_norm": 1.6817526817321777,
+      "learning_rate": 0.0001,
+      "loss": 1.5647,
+      "step": 780
+    },
+    {
+      "epoch": 3.9131513647642677,
+      "grad_norm": 1.54438054561615,
+      "learning_rate": 0.0001,
+      "loss": 1.5519,
+      "step": 790
+    },
+    {
+      "epoch": 3.962779156327543,
+      "grad_norm": 1.1157947778701782,
+      "learning_rate": 0.0001,
+      "loss": 1.5455,
+      "step": 800
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.36943973711872e+16,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null