Add checkpoint -1 post-trained on curated_deepscaler

Files changed (6) hide show

adapter_config.json CHANGED Viewed

@@ -24,13 +24,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "gate_proj",
-    "up_proj",
-    "o_proj",
     "k_proj",
     "q_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "down_proj",
     "k_proj",
     "q_proj",
+    "o_proj",
+    "v_proj",
+    "up_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43c0940b5317f9418ecf7b32409151972ec393349f07dd2b9ef08b6e1b3fd1e5
 size 5838888

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4321cfce03331c3e7c762c3f27c4e0f6dea5be962f1c6ea5aeb8d3baa5e18b2
 size 5838888

all_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "total_flos": 86470459392.0,
-    "train_loss": 2.2144429683685303,
-    "train_runtime": 42.2427,
-    "train_samples_per_second": 0.024,
-    "train_steps_per_second": 0.024
 }

 {
+    "total_flos": 0,
+    "train_loss": 0.0,
+    "train_runtime": 45.6213,
+    "train_samples_per_second": 0.022,
+    "train_steps_per_second": 0.022
 }

train_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
-    "total_flos": 86470459392.0,
-    "train_loss": 2.2144429683685303,
-    "train_runtime": 42.2427,
-    "train_samples_per_second": 0.024,
-    "train_steps_per_second": 0.024
 }

 {
+    "total_flos": 0,
+    "train_loss": 0.0,
+    "train_runtime": 45.6213,
+    "train_samples_per_second": 0.022,
+    "train_steps_per_second": 0.022
 }

trainer_state.json CHANGED Viewed

@@ -1,22 +1,21 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 1,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "mean_token_accuracy": 0.6129032373428345,
-      "step": 1,
-      "total_flos": 86470459392.0,
-      "train_loss": 2.2144429683685303,
-      "train_runtime": 42.2427,
-      "train_samples_per_second": 0.024,
-      "train_steps_per_second": 0.024
     }
   ],
   "logging_steps": 500,
@@ -30,13 +29,13 @@
         "should_epoch_stop": false,
         "should_evaluate": false,
         "should_log": false,
-        "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 86470459392.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0,
   "eval_steps": 500,
+  "global_step": 0,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0,
+      "step": 0,
+      "total_flos": 0,
+      "train_loss": 0.0,
+      "train_runtime": 45.6213,
+      "train_samples_per_second": 0.022,
+      "train_steps_per_second": 0.022
     }
   ],
   "logging_steps": 500,
         "should_epoch_stop": false,
         "should_evaluate": false,
         "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11908c41f8b1b9c4f70e7760783a7ae9b8f9737413db1103364a7d8386cb90c2
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:915c2312409d247c3aacb72c3a2385bb8ecc81f5f2159d30beaa0aa717ffd4a3
 size 6840