{ "auto_map": { "AutoConfig": "config_molmoe.MolmoConfig", "AutoModelForCausalLM": "modeling_molmoe.MolmoForCausalLM" }, "activation_type": "swiglu", "additional_vocab_size": 128, "alibi": false, "alibi_bias_max": 8.0, "always_start_with_space": true, "architectures": [ "OLMoForCausalLM" ], "attention_dropout": 0.0, "attention_layer_norm": true, "attention_layer_norm_with_affine": true, "attention_type": "sdpa", "attn_logit_softcapping": null, "bias_for_layer_norm": false, "block_group_size": 1, "block_type": "moe", "clip_qkv": null, "crop_mode": "overlap-and-resize-c2", "d_model": 2048, "default_inference_len": 65, "do_random_scale": false, "embedding_dropout": 0.0, "embedding_size": 50304, "final_logit_softcapping": null, "fix_image_input_idx": 2, "float32_attention": true, "gin_bindings": null, "head_dim": null, "image_feature_dropout": 0.0, "image_padding_embed": "pad_and_partial_pad", "image_pooling_2d": "attention-meanq", "image_pooling_h": 2, "image_pooling_w": 2, "image_projector": "mlp", "include_bias": false, "init_cutoff_factor": 3.0, "init_device": "meta", "init_fn": "normal", "init_std": 0.02, "initializer_range": 0.02, "layer_norm_eps": 1e-05, "layer_norm_type": "rms", "layer_norm_with_affine": true, "llm_load_path": null, "loss_token_weighting": "root_subsegments", "low_cpu_fsdp": true, "max_crops": 12, "max_position_embeddings": 32768, "max_sequence_length": 4096, "message_formatting": "role", "mlp_hidden_size": null, "mlp_ratio": 1, "model_type": "molmo", "moe_capacity_factor": 1.25, "moe_dropless": true, "moe_interleave": false, "moe_lbl_in_fp32": false, "moe_log_expert_assignment": false, "moe_loss_weight": 0.0, "moe_mlp_impl": "sparse", "moe_num_experts": 64, "moe_shared_expert": false, "moe_top_k": 8, "moe_zloss_weight": 0.0, "multi_query_attention": null, "n_heads": 16, "n_kv_heads": null, "n_layers": 16, "new_embedding_init_range": 0.02, "norm_after": false, "normalize_input_embeds": false, "overlap_margins": [ 4, 4 ], "pad_to": null, "pad_token_id": 1, "pad_tokenizer": false, "precision": "amp_bf16", "prompt_override": null, "prompt_type": "uber_model", "qkv_bias": false, "query_pre_attn_scalar": 224, "residual_dropout": 0.1, "response_attention_dropout": 0.0, "response_residual_dropout": 0.0, "rope": true, "rope_full_precision": true, "rope_impl": "llama", "rope_theta": 10000.0, "scale_logits": false, "system_prompt_kind": "demo_or_style", "transformers_version": "4.45.0.dev0", "unconditioned": false, "use_cache": true, "use_cls_feature": false, "use_col_tokens": true, "use_position_ids": true, "vision_backbone": { "attention_dropout": 0.0, "fsdp_wrap": false, "image_default_input_size": [ 336, 336 ], "image_dropout_rate": 0.0, "image_emb_dim": 1024, "image_head_dim": 64, "image_mlp_activations": "quick_gelu", "image_mlp_dim": 4096, "image_model_type": "openai", "image_norm_eps": 1e-05, "image_num_heads": 16, "image_num_key_value_heads": 16, "image_num_layers": 23, "image_num_pos": 577, "image_patch_size": 14, "image_pos_patch_size": 14, "initializer_range": 0.02, "residual_dropout": 0.0, "resize_mode": "default" }, "vit_layers": [ -2, -9 ], "vit_load_path": null, "vocab_size": 50280, "weight_tying": false }