allenai
/

MolmoE-1B-0924

Image-Text-to-Text

text-generation

Mixture of Experts

Inference Endpoints

Model card Files Files and versions Community

MolmoE-1B-0924 / config.json

Muennighoff's picture

Add

d13896f 3 days ago

No virus

3.76 kB

	{
	"auto_map": {
	"AutoConfig": "config_molmoe.MolmoConfig",
	"AutoModelForCausalLM": "modeling_molmoe.MolmoForCausalLM"
	},
	"activation_type": "swiglu",
	"additional_vocab_size": 128,
	"alibi": false,
	"alibi_bias_max": 8.0,
	"always_start_with_space": true,
	"architectures": [
	"OLMoForCausalLM"
	],
	"attention_dropout": 0.0,
	"attention_layer_norm": true,
	"attention_layer_norm_with_affine": true,
	"attention_type": "sdpa",
	"attn_logit_softcapping": null,
	"bias_for_layer_norm": false,
	"block_group_size": 1,
	"block_type": "moe",
	"clip_qkv": null,
	"crop_mode": "overlap-and-resize-c2",
	"d_model": 2048,
	"default_inference_len": 65,
	"do_random_scale": false,
	"embedding_dropout": 0.0,
	"embedding_size": 50304,
	"final_logit_softcapping": null,
	"fix_image_input_idx": 2,
	"float32_attention": true,
	"gin_bindings": null,
	"head_dim": null,
	"image_feature_dropout": 0.0,
	"image_padding_embed": "pad_and_partial_pad",
	"image_pooling_2d": "attention-meanq",
	"image_pooling_h": 2,
	"image_pooling_w": 2,
	"image_projector": "mlp",
	"include_bias": false,
	"init_cutoff_factor": 3.0,
	"init_device": "meta",
	"init_fn": "normal",
	"init_std": 0.02,
	"initializer_range": 0.02,
	"layer_norm_eps": 1e-05,
	"layer_norm_type": "rms",
	"layer_norm_with_affine": true,
	"llm_load_path": null,
	"loss_token_weighting": "root_subsegments",
	"low_cpu_fsdp": true,
	"max_crops": 12,
	"max_position_embeddings": 32768,
	"max_sequence_length": 4096,
	"message_formatting": "role",
	"mlp_hidden_size": null,
	"mlp_ratio": 1,
	"model_type": "molmo",
	"moe_capacity_factor": 1.25,
	"moe_dropless": true,
	"moe_interleave": false,
	"moe_lbl_in_fp32": false,
	"moe_log_expert_assignment": false,
	"moe_loss_weight": 0.0,
	"moe_mlp_impl": "sparse",
	"moe_num_experts": 64,
	"moe_shared_expert": false,
	"moe_top_k": 8,
	"moe_zloss_weight": 0.0,
	"multi_query_attention": null,
	"n_heads": 16,
	"n_kv_heads": null,
	"n_layers": 16,
	"new_embedding_init_range": 0.02,
	"norm_after": false,
	"normalize_input_embeds": false,
	"overlap_margins": [
	4,
	4
	],
	"pad_to": null,
	"pad_token_id": 1,
	"pad_tokenizer": false,
	"precision": "amp_bf16",
	"prompt_override": null,
	"prompt_type": "uber_model",
	"qkv_bias": false,
	"query_pre_attn_scalar": 224,
	"residual_dropout": 0.1,
	"response_attention_dropout": 0.0,
	"response_residual_dropout": 0.0,
	"rope": true,
	"rope_full_precision": true,
	"rope_impl": "llama",
	"rope_theta": 10000.0,
	"scale_logits": false,
	"system_prompt_kind": "demo_or_style",
	"tokenizer": {
	"identifier": "allenai/gpt-neox-olmo-dolma-v1_5",
	"olmo_bos_token_id": null,
	"olmo_eos_token_id": null,
	"tokenizer_adds_space": false,
	"tokenizer_dir": null,
	"truncate_direction": "right"
	},
	"transformers_version": "4.45.0.dev0",
	"unconditioned": false,
	"use_cache": true,
	"use_cls_feature": false,
	"use_col_tokens": true,
	"use_position_ids": true,
	"vision_backbone": {
	"attention_dropout": 0.0,
	"fsdp_wrap": false,
	"image_default_input_size": [
	336,
	336
	],
	"image_dropout_rate": 0.0,
	"image_emb_dim": 1024,
	"image_head_dim": 64,
	"image_mlp_activations": "quick_gelu",
	"image_mlp_dim": 4096,
	"image_model_type": "openai",
	"image_norm_eps": 1e-05,
	"image_num_heads": 16,
	"image_num_key_value_heads": 16,
	"image_num_layers": 23,
	"image_num_pos": 577,
	"image_patch_size": 14,
	"image_pos_patch_size": 14,
	"initializer_range": 0.02,
	"residual_dropout": 0.0,
	"resize_mode": "default"
	},
	"vit_layers": [
	-2,
	-9
	],
	"vit_load_path": null,
	"vocab_size": 50280,
	"weight_tying": false
	}