Model save

Files changed (6) hide show

README.md CHANGED Viewed

@@ -1,10 +1,9 @@
 ---
-datasets: MMInstruction/Clevr_CoGenT_TrainA_R1
 library_name: transformers
 model_name: Qwen2-VL-2B-Instruct-SFT
 tags:
 - generated_from_trainer
-- R1-V
 - trl
 - sft
 licence: license
@@ -12,7 +11,7 @@ licence: license
 # Model Card for Qwen2-VL-2B-Instruct-SFT
-This model is a fine-tuned version of [None](https://huggingface.co/None) on the [MMInstruction/Clevr_CoGenT_TrainA_R1](https://huggingface.co/datasets/MMInstruction/Clevr_CoGenT_TrainA_R1) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -36,9 +35,9 @@ This model was trained with SFT.
 ### Framework versions
 - TRL: 0.14.0
-- Transformers: 4.51.1
 - Pytorch: 2.5.1
-- Datasets: 3.2.0
 - Tokenizers: 0.21.1
 ## Citations

 ---
+base_model: Qwen/Qwen2-VL-2B-Instruct
 library_name: transformers
 model_name: Qwen2-VL-2B-Instruct-SFT
 tags:
 - generated_from_trainer
 - trl
 - sft
 licence: license
 # Model Card for Qwen2-VL-2B-Instruct-SFT
+This model is a fine-tuned version of [Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ### Framework versions
 - TRL: 0.14.0
+- Transformers: 4.52.0.dev0
 - Pytorch: 2.5.1
+- Datasets: 3.6.0
 - Tokenizers: 0.21.1
 ## Citations

all_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 1.9998919736415686,
-    "total_flos": 2.1613118366416896e+16,
-    "train_loss": 0.0653228780393709,
-    "train_runtime": 98246.8444,
-    "train_samples": 222165,
-    "train_samples_per_second": 4.523,
-    "train_steps_per_second": 0.071
 }

 {
+    "epoch": 0.9998197093715069,
+    "total_flos": 4977616761913344.0,
+    "train_loss": 0.6325558101381102,
+    "train_runtime": 63848.9812,
+    "train_samples": 221862,
+    "train_samples_per_second": 3.475,
+    "train_steps_per_second": 0.054
 }

generation_config.json CHANGED Viewed

@@ -10,6 +10,6 @@
   "temperature": 0.01,
   "top_k": 1,
   "top_p": 0.001,
-  "transformers_version": "4.51.1",
   "use_cache": false
 }

   "temperature": 0.01,
   "top_k": 1,
   "top_p": 0.001,
+  "transformers_version": "4.52.0.dev0",
   "use_cache": false
 }

preprocessor_config.json CHANGED Viewed

@@ -1,29 +1,29 @@
 {
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_rescale": true,
   "do_resize": true,
   "image_mean": [
-    0.48145466,
-    0.4578275,
-    0.40821073
   ],
-  "image_processor_type": "Qwen2VLImageProcessor",
   "image_std": [
-    0.26862954,
-    0.26130258,
-    0.27577711
   ],
-  "max_pixels": 12845056,
-  "merge_size": 2,
-  "min_pixels": 3136,
-  "patch_size": 14,
   "processor_class": "Qwen2VLProcessor",
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
-    "longest_edge": 12845056,
-    "shortest_edge": 3136
-  },
-  "temporal_patch_size": 2
 }

 {
+  "crop_size": {
+    "height": 518,
+    "width": 518
+  },
+  "do_center_crop": true,
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_rescale": true,
   "do_resize": true,
   "image_mean": [
+    0.5307,
+    0.5307,
+    0.5307
   ],
+  "image_processor_type": "BitImageProcessor",
   "image_std": [
+    0.2583,
+    0.2583,
+    0.2583
   ],
+  "merge_size": 1,
   "processor_class": "Qwen2VLProcessor",
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
+    "shortest_edge": 518
+  }
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 1.9998919736415686,
-    "total_flos": 2.1613118366416896e+16,
-    "train_loss": 0.0653228780393709,
-    "train_runtime": 98246.8444,
-    "train_samples": 222165,
-    "train_samples_per_second": 4.523,
-    "train_steps_per_second": 0.071
 }

 {
+    "epoch": 0.9998197093715069,
+    "total_flos": 4977616761913344.0,
+    "train_loss": 0.6325558101381102,
+    "train_runtime": 63848.9812,
+    "train_samples": 221862,
+    "train_samples_per_second": 3.475,
+    "train_steps_per_second": 0.054
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff