trl-internal-testing
/

tiny-Qwen2VLForConditionalGeneration

@@ -4,6 +4,7 @@
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
   "hidden_act": "silu",
   "hidden_size": 1536,
@@ -19,7 +20,9 @@
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
     "mrope_section": [
-      2
     ],
     "rope_type": "default",
     "type": "default"
@@ -32,6 +35,7 @@
     ],
     "attention_dropout": 0.0,
     "bos_token_id": 151643,
     "eos_token_id": 151645,
     "hidden_act": "silu",
     "hidden_size": 16,
@@ -39,32 +43,6 @@
     "initializer_range": 0.02,
     "intermediate_size": 8960,
     "layer_types": [
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
-      "full_attention",
       "full_attention",
       "full_attention"
     ],
@@ -85,7 +63,6 @@
     "rope_theta": 1000000.0,
     "sliding_window": null,
     "tie_word_embeddings": true,
-    "torch_dtype": "bfloat16",
     "use_cache": true,
     "use_sliding_window": false,
     "video_token_id": null,
@@ -94,8 +71,7 @@
     "vision_token_id": 151654,
     "vocab_size": 151936
   },
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.56.0.dev0",
   "use_cache": true,
   "use_sliding_window": false,
   "video_token_id": 151656,

   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
+  "dtype": "bfloat16",
   "eos_token_id": 151645,
   "hidden_act": "silu",
   "hidden_size": 1536,
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
     "mrope_section": [
+      16,
+      24,
+      24
     ],
     "rope_type": "default",
     "type": "default"
     ],
     "attention_dropout": 0.0,
     "bos_token_id": 151643,
+    "dtype": "bfloat16",
     "eos_token_id": 151645,
     "hidden_act": "silu",
     "hidden_size": 16,
     "initializer_range": 0.02,
     "intermediate_size": 8960,
     "layer_types": [
       "full_attention",
       "full_attention"
     ],
     "rope_theta": 1000000.0,
     "sliding_window": null,
     "tie_word_embeddings": true,
     "use_cache": true,
     "use_sliding_window": false,
     "video_token_id": null,
     "vision_token_id": 151654,
     "vocab_size": 151936
   },
+  "transformers_version": "4.57.0.dev0",
   "use_cache": true,
   "use_sliding_window": false,
   "video_token_id": 151656,

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
-  "transformers_version": "4.56.0.dev0"
 }

   "_from_model_config": true,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
+  "transformers_version": "4.57.0.dev0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f3c8d9619ed6dc2a99ea49a9d1395bdb72e6ae3af865c8b0a59e94b7943eaca
 size 140919824

 version https://git-lfs.github.com/spec/v1
+oid sha256:451d201aa9cf0dee2855900303993465b2102f325189c4e9135f194d78fa2edd
 size 140919824