|
{ |
|
"add_classification_head": false, |
|
"anyres_pooling_size": 2, |
|
"anyres_vit_max_image_size": null, |
|
"anyres_vit_two_views": false, |
|
"architectures": [ |
|
"HunYuanMoEV1ForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.1, |
|
"attention_head_dim": 128, |
|
"auto_map": { |
|
"AutoConfig": "configuration_hunyuan.HunYuanConfig", |
|
"AutoModel": "hunyuan.HunYuanModel", |
|
"AutoModelForCausalLM": "hunyuan.HunYuanMoEV1ForCausalLM" |
|
}, |
|
"bos_token_id": 1, |
|
"cla_share_factor": 2, |
|
"class_num": 0, |
|
"dense_list": [ |
|
4096, |
|
0 |
|
], |
|
"eod_token_id": 127967, |
|
"eos_token_id": 127960, |
|
"group_limited_greedy": false, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"im_end_id": 6, |
|
"im_newline_id": 12, |
|
"im_start_id": 5, |
|
"image_token_id": 9, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"kv_lora_rank": null, |
|
"mask_init_id": 13, |
|
"max_position_embeddings": 32768, |
|
"mlp_bias": false, |
|
"model_type": "hunyuan", |
|
"moe_drop_tokens": false, |
|
"moe_intermediate_size": [ |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072, |
|
3072 |
|
], |
|
"moe_layer_num_skipped": 0, |
|
"moe_random_routing_dropped_token": false, |
|
"moe_topk": [ |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8 |
|
], |
|
"n_group": null, |
|
"norm_topk_prob": true, |
|
"norm_type": "rms", |
|
"num_attention_heads": 32, |
|
"num_experts": 64, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 8, |
|
"num_media_embeds": 257, |
|
"num_shared_expert": [ |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1 |
|
], |
|
"org_vocab_size": 128167, |
|
"pad_id": 127961, |
|
"pad_token_id": 127961, |
|
"pool_type": "last", |
|
"position_embedding_xdrope": false, |
|
"pretraining_tp": 1, |
|
"q_lora_rank": null, |
|
"qk_nope_head_dim": null, |
|
"qk_rope_head_dim": null, |
|
"quantization_config": { |
|
"bits": 4, |
|
"checkpoint_format": "gptq", |
|
"desc_act": true, |
|
"group_size": 128, |
|
"model_file_base_name": "model", |
|
"quant_method": "gptq", |
|
"static_groups": true, |
|
"sym": true, |
|
"true_sequential": true |
|
}, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": { |
|
"alpha": 1000.0, |
|
"beta_fast": 32, |
|
"beta_slow": 1, |
|
"factor": 1.0, |
|
"mscale": 1.0, |
|
"mscale_all_dim": 1.0, |
|
"type": "dynamic" |
|
}, |
|
"rope_theta": 10000.0, |
|
"routed_scaling_factor": 1.0, |
|
"sep_token_id": 127962, |
|
"skip_cls_token": false, |
|
"text_end_id": 8, |
|
"text_start_id": 7, |
|
"tie_word_embeddings": true, |
|
"topk_group": null, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.41.2", |
|
"use_cache": true, |
|
"use_cla": false, |
|
"use_mixed_mlp_moe": true, |
|
"use_mla": false, |
|
"use_qk_norm": true, |
|
"use_rotary_pos_emb": true, |
|
"v_head_dim": null, |
|
"video_end_id": 11, |
|
"video_start_id": 10, |
|
"vit_add_patchemb_bias": false, |
|
"vit_input_resolution": 224, |
|
"vit_mapping_type": "resampler", |
|
"vit_norm_type": "fused", |
|
"vit_patch": 1, |
|
"vit_path": null, |
|
"vit_remove_prenorm": false, |
|
"vit_token": 64, |
|
"vit_type": null, |
|
"vit_used_rms_norm": false, |
|
"vocab_size": 128167, |
|
"xdrope_section": null |
|
} |
|
|