Training in progress, step 835, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39d6b881d4987bf9d5bcfe8a064b139932733ad3477682bd61058412eb62fa96
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:5dd64f18f9224c56b5b0b7932ecdd5ccf3c1ea09df9ac454d32b480f4fe38e86
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2e153fff5053f18208466756773d29461ab60badd4fa692e62339d7396cbe33
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:4771d40822f2232ea1fc66bbb86799fa46e7f703da704326d615ffa6101825b9
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d9c63bd0232e7980e2988292e37bfbeecbd77efd303e088b8b34aac11fef8b3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1b14213d8f993ec42c615c8027597e3bf7668a27fbdb9035698a8baae177c92
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:156aadf840372c36f9ad329437d53efbaab7f6ea8e90a535b104c634fa163d21
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2ec0b50a82217eefa34c5afd3f9b449cdce6ba7ebf5cff729c1fc131f10aad0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.020855692191213603,
   "eval_steps": 334,
-  "global_step": 668,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -493,6 +493,125 @@
       "eval_samples_per_second": 2.461,
       "eval_steps_per_second": 2.461,
       "step": 668
     }
   ],
   "logging_steps": 10,
@@ -512,7 +631,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.0997187171477094e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.026069615239017005,
   "eval_steps": 334,
+  "global_step": 835,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.461,
       "eval_steps_per_second": 2.461,
       "step": 668
+    },
+    {
+      "epoch": 0.020918134383402866,
+      "grad_norm": 1.3406473398208618,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 0.4458,
+      "step": 670
+    },
+    {
+      "epoch": 0.021230345344349175,
+      "grad_norm": 1.6482555866241455,
+      "learning_rate": 4.727745323894976e-05,
+      "loss": 0.8605,
+      "step": 680
+    },
+    {
+      "epoch": 0.021542556305295488,
+      "grad_norm": 2.3034768104553223,
+      "learning_rate": 4.4607993613388976e-05,
+      "loss": 0.8842,
+      "step": 690
+    },
+    {
+      "epoch": 0.0218547672662418,
+      "grad_norm": 2.2205710411071777,
+      "learning_rate": 4.19943090428802e-05,
+      "loss": 1.0044,
+      "step": 700
+    },
+    {
+      "epoch": 0.022166978227188112,
+      "grad_norm": 1.0776264667510986,
+      "learning_rate": 3.943903128623335e-05,
+      "loss": 0.9987,
+      "step": 710
+    },
+    {
+      "epoch": 0.02247918918813442,
+      "grad_norm": 0.7974869012832642,
+      "learning_rate": 3.694473329154778e-05,
+      "loss": 0.4341,
+      "step": 720
+    },
+    {
+      "epoch": 0.022791400149080734,
+      "grad_norm": 1.410180926322937,
+      "learning_rate": 3.45139266054715e-05,
+      "loss": 0.7361,
+      "step": 730
+    },
+    {
+      "epoch": 0.023103611110027046,
+      "grad_norm": 1.5520089864730835,
+      "learning_rate": 3.21490588442868e-05,
+      "loss": 0.8486,
+      "step": 740
+    },
+    {
+      "epoch": 0.023415822070973355,
+      "grad_norm": 4.628422260284424,
+      "learning_rate": 2.9852511229367865e-05,
+      "loss": 1.1191,
+      "step": 750
+    },
+    {
+      "epoch": 0.023728033031919667,
+      "grad_norm": 1.1060361862182617,
+      "learning_rate": 2.7626596189492983e-05,
+      "loss": 1.018,
+      "step": 760
+    },
+    {
+      "epoch": 0.02404024399286598,
+      "grad_norm": 1.108621597290039,
+      "learning_rate": 2.5473555032424533e-05,
+      "loss": 0.4265,
+      "step": 770
+    },
+    {
+      "epoch": 0.024352454953812292,
+      "grad_norm": 1.4980049133300781,
+      "learning_rate": 2.339555568810221e-05,
+      "loss": 0.6406,
+      "step": 780
+    },
+    {
+      "epoch": 0.0246646659147586,
+      "grad_norm": 1.4313726425170898,
+      "learning_rate": 2.139469052572127e-05,
+      "loss": 0.9413,
+      "step": 790
+    },
+    {
+      "epoch": 0.024976876875704913,
+      "grad_norm": 1.4710307121276855,
+      "learning_rate": 1.947297424689414e-05,
+      "loss": 1.0135,
+      "step": 800
+    },
+    {
+      "epoch": 0.025289087836651226,
+      "grad_norm": 0.8172721266746521,
+      "learning_rate": 1.763234185701673e-05,
+      "loss": 0.9491,
+      "step": 810
+    },
+    {
+      "epoch": 0.025601298797597538,
+      "grad_norm": 1.1474344730377197,
+      "learning_rate": 1.587464671688187e-05,
+      "loss": 0.5334,
+      "step": 820
+    },
+    {
+      "epoch": 0.025913509758543847,
+      "grad_norm": 1.4124549627304077,
+      "learning_rate": 1.4201658676502294e-05,
+      "loss": 0.7033,
+      "step": 830
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6.372527049214525e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null