Training in progress, step 1000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +123 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5dd64f18f9224c56b5b0b7932ecdd5ccf3c1ea09df9ac454d32b480f4fe38e86
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:11db584e072af7f1b6f514cdaa124b844cedc94c8b97ada1c825241d18c240cc
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4771d40822f2232ea1fc66bbb86799fa46e7f703da704326d615ffa6101825b9
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ed1a4d75b88c9c80e2c24d116905c056e83ed305fde161d122a1ed7add4dcfb
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1b14213d8f993ec42c615c8027597e3bf7668a27fbdb9035698a8baae177c92
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:00c02b4da51e8241d879990840aae408c8cee00a0a61e4693c55f47d7ad7007e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2ec0b50a82217eefa34c5afd3f9b449cdce6ba7ebf5cff729c1fc131f10aad0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7b5bf190dc871967c45091d9f1ab233b2d2ed62baca21fee5dfedb5718ffa5d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.026069615239017005,
   "eval_steps": 334,
-  "global_step": 835,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -612,6 +612,125 @@
       "learning_rate": 1.4201658676502294e-05,
       "loss": 0.7033,
       "step": 830
     }
   ],
   "logging_steps": 10,
@@ -626,12 +745,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.372527049214525e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.031221096094631143,
   "eval_steps": 334,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.4201658676502294e-05,
       "loss": 0.7033,
       "step": 830
+    },
+    {
+      "epoch": 0.02622572071949016,
+      "grad_norm": 1.3929728269577026,
+      "learning_rate": 1.2615062293021507e-05,
+      "loss": 0.852,
+      "step": 840
+    },
+    {
+      "epoch": 0.026537931680436472,
+      "grad_norm": 0.782926619052887,
+      "learning_rate": 1.1116455134507664e-05,
+      "loss": 1.0648,
+      "step": 850
+    },
+    {
+      "epoch": 0.02685014264138278,
+      "grad_norm": 1.1986958980560303,
+      "learning_rate": 9.707346171337894e-06,
+      "loss": 0.9374,
+      "step": 860
+    },
+    {
+      "epoch": 0.027162353602329093,
+      "grad_norm": 1.0474522113800049,
+      "learning_rate": 8.38915425679304e-06,
+      "loss": 0.5091,
+      "step": 870
+    },
+    {
+      "epoch": 0.027474564563275405,
+      "grad_norm": 1.569217562675476,
+      "learning_rate": 7.163206698392744e-06,
+      "loss": 0.6196,
+      "step": 880
+    },
+    {
+      "epoch": 0.027786775524221718,
+      "grad_norm": 2.387129306793213,
+      "learning_rate": 6.030737921409169e-06,
+      "loss": 0.8911,
+      "step": 890
+    },
+    {
+      "epoch": 0.028098986485168027,
+      "grad_norm": 1.0116970539093018,
+      "learning_rate": 4.992888225905468e-06,
+      "loss": 0.893,
+      "step": 900
+    },
+    {
+      "epoch": 0.02841119744611434,
+      "grad_norm": 1.056663155555725,
+      "learning_rate": 4.050702638550275e-06,
+      "loss": 0.9925,
+      "step": 910
+    },
+    {
+      "epoch": 0.02872340840706065,
+      "grad_norm": 1.0909359455108643,
+      "learning_rate": 3.2051298603643753e-06,
+      "loss": 0.634,
+      "step": 920
+    },
+    {
+      "epoch": 0.029035619368006964,
+      "grad_norm": 1.5931735038757324,
+      "learning_rate": 2.4570213114592954e-06,
+      "loss": 0.6653,
+      "step": 930
+    },
+    {
+      "epoch": 0.029347830328953273,
+      "grad_norm": 1.73000967502594,
+      "learning_rate": 1.8071302737293295e-06,
+      "loss": 0.7891,
+      "step": 940
+    },
+    {
+      "epoch": 0.029660041289899585,
+      "grad_norm": 0.5561469197273254,
+      "learning_rate": 1.2561111323605712e-06,
+      "loss": 0.9679,
+      "step": 950
+    },
+    {
+      "epoch": 0.029972252250845897,
+      "grad_norm": 1.1528609991073608,
+      "learning_rate": 8.04518716920466e-07,
+      "loss": 0.9362,
+      "step": 960
+    },
+    {
+      "epoch": 0.030284463211792206,
+      "grad_norm": 0.6554343104362488,
+      "learning_rate": 4.5280774269154115e-07,
+      "loss": 0.3348,
+      "step": 970
+    },
+    {
+      "epoch": 0.03059667417273852,
+      "grad_norm": 1.3619601726531982,
+      "learning_rate": 2.0133235281156736e-07,
+      "loss": 0.5258,
+      "step": 980
+    },
+    {
+      "epoch": 0.03090888513368483,
+      "grad_norm": 1.2936415672302246,
+      "learning_rate": 5.0345761681491746e-08,
+      "loss": 0.9126,
+      "step": 990
+    },
+    {
+      "epoch": 0.031221096094631143,
+      "grad_norm": 1.708081841468811,
+      "learning_rate": 0.0,
+      "loss": 1.0155,
+      "step": 1000
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.641564097334477e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null