zhanghanxiao commited on
Commit
ecc6c8f
·
verified ·
1 Parent(s): dc506a5

Upload configuration_bailing_moe_v2.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_bailing_moe_v2.py +2 -4
configuration_bailing_moe_v2.py CHANGED
@@ -5,8 +5,6 @@ from transformers.configuration_utils import PretrainedConfig
5
 
6
  class BailingMoeV2Config(PretrainedConfig):
7
 
8
- model_type = "bailing_moe"
9
-
10
  def __init__(
11
  self,
12
  vocab_size=157184,
@@ -41,7 +39,7 @@ class BailingMoeV2Config(PretrainedConfig):
41
  head_dim=128,
42
  output_router_logits=False,
43
  use_qk_norm=True,
44
- num_mtp_layers=0,
45
  mtp_loss_scaling_factor=0,
46
  moe_router_enable_expert_bias=True,
47
  routed_scaling_factor=1.0,
@@ -60,7 +58,7 @@ class BailingMoeV2Config(PretrainedConfig):
60
  self.embedding_dropout = embedding_dropout
61
  self.attention_dropout = attention_dropout
62
  self.output_dropout = output_dropout
63
- self.num_mtp_layers = num_mtp_layers
64
  self.mtp_loss_scaling_factor = mtp_loss_scaling_factor
65
  self.initializer_range = initializer_range
66
  self.max_position_embeddings = max_position_embeddings
 
5
 
6
  class BailingMoeV2Config(PretrainedConfig):
7
 
 
 
8
  def __init__(
9
  self,
10
  vocab_size=157184,
 
39
  head_dim=128,
40
  output_router_logits=False,
41
  use_qk_norm=True,
42
+ num_nextn_predict_layers=0,
43
  mtp_loss_scaling_factor=0,
44
  moe_router_enable_expert_bias=True,
45
  routed_scaling_factor=1.0,
 
58
  self.embedding_dropout = embedding_dropout
59
  self.attention_dropout = attention_dropout
60
  self.output_dropout = output_dropout
61
+ self.num_nextn_predict_layers = num_nextn_predict_layers
62
  self.mtp_loss_scaling_factor = mtp_loss_scaling_factor
63
  self.initializer_range = initializer_range
64
  self.max_position_embeddings = max_position_embeddings