{ "architectures": [ "MiniCPMV" ], "attention_bias": false, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "openbmb/MiniCPM-V-4--configuration_minicpm.MiniCPMVConfig", "AutoModel": "openbmb/MiniCPM-V-4--modeling_minicpmv.MiniCPMV", "AutoModelForCausalLM": "openbmb/MiniCPM-V-4--modeling_minicpmv.MiniCPMV" }, "batch_vision_input": true, "bos_token_id": 1, "drop_vision_last_layer": false, "eos_token_id": [ 2, 73440 ], "head_dim": 32, "hidden_act": "silu", "hidden_size": 128, "image_size": 448, "initializer_range": 0.1, "intermediate_size": 128, "max_position_embeddings": 32768, "mlp_bias": false, "model_type": "minicpmv", "num_attention_heads": 2, "num_hidden_layers": 2, "num_key_value_heads": 1, "pad_token_id": 2, "patch_size": 14, "pretraining_tp": 1, "query_num": 64, "rms_norm_eps": 1e-06, "rope_scaling": { "factor": 1.0, "long_factor": [ 0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756 ], "original_max_position_embeddings": 32786, "rope_type": "longrope", "short_factor": [ 0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756 ] }, "rope_theta": 10000.0, "slice_config": { "max_slice_nums": 9, "model_type": "minicpmv", "patch_size": 14, "scale_resolution": 448 }, "slice_mode": true, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.56.0.dev0", "use_cache": true, "use_image_id": true, "version": 4.0, "vision_batch_size": 16, "vision_config": { "_attn_implementation_autoset": true, "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 64, "image_size": 980, "intermediate_size": 128, "layer_norm_eps": 1e-06, "model_type": "siglip_vision_model", "num_attention_heads": 2, "num_channels": 3, "num_hidden_layers": 2, "patch_size": 14 }, "vocab_size": 73448 }