Training completed

Browse files

Files changed (4) hide show

README.md +1 -1
all_results.json +6 -6
train_results.json +6 -6
trainer_state.json +14 -42

README.md CHANGED Viewed

@@ -45,7 +45,7 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.03
-- training_steps: 50
 ### Training results

 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_ratio: 0.03
+- training_steps: 10
 ### Training results

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.091324200913242,
-    "total_flos": 6.970544231337165e+17,
-    "train_loss": 0.934032779932022,
-    "train_runtime": 669.5576,
-    "train_samples_per_second": 11.948,
-    "train_steps_per_second": 0.075
 }

 {
+    "epoch": 0.0182648401826484,
+    "total_flos": 1.394108846267433e+17,
+    "train_loss": 4.435050201416016,
+    "train_runtime": 166.2301,
+    "train_samples_per_second": 9.625,
+    "train_steps_per_second": 0.06
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.091324200913242,
-    "total_flos": 6.970544231337165e+17,
-    "train_loss": 0.934032779932022,
-    "train_runtime": 669.5576,
-    "train_samples_per_second": 11.948,
-    "train_steps_per_second": 0.075
 }

 {
+    "epoch": 0.0182648401826484,
+    "total_flos": 1.394108846267433e+17,
+    "train_loss": 4.435050201416016,
+    "train_runtime": 166.2301,
+    "train_samples_per_second": 9.625,
+    "train_steps_per_second": 0.06
 }

trainer_state.json CHANGED Viewed

@@ -1,63 +1,35 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.091324200913242,
   "eval_steps": 0,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0182648401826484,
-      "grad_norm": 0.20208922028541565,
       "learning_rate": 0.0001,
-      "loss": 4.4712,
       "step": 10
     },
     {
-      "epoch": 0.0365296803652968,
-      "grad_norm": 0.1714371293783188,
-      "learning_rate": 0.0001,
-      "loss": 0.0572,
-      "step": 20
-    },
-    {
-      "epoch": 0.0547945205479452,
-      "grad_norm": 0.16024631261825562,
-      "learning_rate": 0.0001,
-      "loss": 0.0547,
-      "step": 30
-    },
-    {
-      "epoch": 0.0730593607305936,
-      "grad_norm": 0.13002753257751465,
-      "learning_rate": 0.0001,
-      "loss": 0.0471,
-      "step": 40
-    },
-    {
-      "epoch": 0.091324200913242,
-      "grad_norm": 0.09344350546598434,
-      "learning_rate": 0.0001,
-      "loss": 0.04,
-      "step": 50
-    },
-    {
-      "epoch": 0.091324200913242,
-      "step": 50,
-      "total_flos": 6.970544231337165e+17,
-      "train_loss": 0.934032779932022,
-      "train_runtime": 669.5576,
-      "train_samples_per_second": 11.948,
-      "train_steps_per_second": 0.075
     }
   ],
   "logging_steps": 10,
-  "max_steps": 50,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
-  "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -70,7 +42,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.970544231337165e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0182648401826484,
   "eval_steps": 0,
+  "global_step": 10,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0182648401826484,
+      "grad_norm": 0.1791902333498001,
       "learning_rate": 0.0001,
+      "loss": 4.4351,
       "step": 10
     },
     {
+      "epoch": 0.0182648401826484,
+      "step": 10,
+      "total_flos": 1.394108846267433e+17,
+      "train_loss": 4.435050201416016,
+      "train_runtime": 166.2301,
+      "train_samples_per_second": 9.625,
+      "train_steps_per_second": 0.06
     }
   ],
   "logging_steps": 10,
+  "max_steps": 10,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
+  "save_steps": 10,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
       "attributes": {}
     }
   },
+  "total_flos": 1.394108846267433e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null