|
{ |
|
"add_classification_head": false, |
|
"architectures": [ |
|
"HunYuanDenseV1ForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.1, |
|
"attention_head_dim": 128, |
|
"bos_token_id": 1, |
|
"cla_share_factor": 2, |
|
"class_num": 0, |
|
"dense_list": [ |
|
4096, |
|
0 |
|
], |
|
"eod_token_id": 127967, |
|
"eos_token_id": 127960, |
|
"head_dim": 128, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"im_end_id": 5, |
|
"im_newline_id": 11, |
|
"im_start_id": 4, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 14336, |
|
"mask_init_id": 12, |
|
"max_position_embeddings": 32768, |
|
"mlp_bias": false, |
|
"model_type": "hunyuan_v1_dense", |
|
"norm_type": "rms", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 8, |
|
"org_vocab_size": 128167, |
|
"pad_id": 127961, |
|
"pad_token_id": 127961, |
|
"pool_type": "last", |
|
"pretraining_tp": 1, |
|
"quantization_config": { |
|
"bits": 4, |
|
"checkpoint_format": "gptq", |
|
"desc_act": true, |
|
"group_size": 128, |
|
"quant_method": "gptq", |
|
"static_groups": true, |
|
"sym": true, |
|
"true_sequential": true |
|
}, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": { |
|
"alpha": 1000.0, |
|
"beta_fast": 32, |
|
"beta_slow": 1, |
|
"factor": 1.0, |
|
"mscale": 1.0, |
|
"mscale_all_dim": 1.0, |
|
"type": "dynamic" |
|
}, |
|
"rope_theta": 10000.0, |
|
"sep_token_id": 127962, |
|
"text_end_id": 7, |
|
"text_start_id": 6, |
|
"tie_word_embeddings": true, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.4", |
|
"use_cache": false, |
|
"use_cla": false, |
|
"use_qk_norm": true, |
|
"use_rotary_pos_emb": true, |
|
"vocab_size": 128167 |
|
} |
|
|