Add checkpoint -1 post-trained on curated_deepscaler

Files changed (7) hide show

adapter_config.json CHANGED Viewed

@@ -24,13 +24,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
     "down_proj",
-    "up_proj",
     "k_proj",
-    "v_proj",
     "o_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
     "k_proj",
+    "gate_proj",
     "o_proj",
+    "q_proj",
+    "up_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ae326e349e94e856f5f2de58502b84e3053bef07e27c2f71f2317357b31c8e7
 size 5838888

 version https://git-lfs.github.com/spec/v1
+oid sha256:59591948f8a5adc97ad8214b395f457ebe72fd04954f0e26a74cfef001f093f2
 size 5838888

all_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "total_flos": 0,
     "train_loss": 0.0,
-    "train_runtime": 9995.2935,
-    "train_samples_per_second": 0.01,
     "train_steps_per_second": 0.002
 }

 {
     "total_flos": 0,
     "train_loss": 0.0,
+    "train_runtime": 10851.9382,
+    "train_samples_per_second": 0.009,
     "train_steps_per_second": 0.002
 }

tokenizer_config.json CHANGED Viewed

@@ -1206,7 +1206,7 @@
   "eos_token": "<end_of_utterance>",
   "extra_special_tokens": {},
   "legacy": false,
-  "model_max_length": 8192,
   "pad_token": "<|im_end|>",
   "processor_class": "Idefics3Processor",
   "tokenizer_class": "GPT2Tokenizer",

   "eos_token": "<end_of_utterance>",
   "extra_special_tokens": {},
   "legacy": false,
+  "model_max_length": 16384,
   "pad_token": "<|im_end|>",
   "processor_class": "Idefics3Processor",
   "tokenizer_class": "GPT2Tokenizer",

train_results.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "total_flos": 0,
     "train_loss": 0.0,
-    "train_runtime": 9995.2935,
-    "train_samples_per_second": 0.01,
     "train_steps_per_second": 0.002
 }

 {
     "total_flos": 0,
     "train_loss": 0.0,
+    "train_runtime": 10851.9382,
+    "train_samples_per_second": 0.009,
     "train_steps_per_second": 0.002
 }

trainer_state.json CHANGED Viewed

@@ -13,8 +13,8 @@
       "step": 0,
       "total_flos": 0,
       "train_loss": 0.0,
-      "train_runtime": 9995.2935,
-      "train_samples_per_second": 0.01,
       "train_steps_per_second": 0.002
     }
   ],

       "step": 0,
       "total_flos": 0,
       "train_loss": 0.0,
+      "train_runtime": 10851.9382,
+      "train_samples_per_second": 0.009,
       "train_steps_per_second": 0.002
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:377d7fcf444597f25d679c2c1c7e9f9107437e56579ed16ed0ca3a5e7bbb5a6c
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9f997bd4cbe8bc017809085f990ece8762693b2bf751a586df061b31fd18769
 size 6840