ZekeWang commited on
Commit
43241fc
1 Parent(s): 286732b

Update Nanbeige Chat Model

Browse files
Files changed (1) hide show
  1. config.json +12 -8
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
 
2
  "architectures": [
3
- "NanbeigeForCausalLM"
4
  ],
5
- "auto_map": {
6
- "AutoConfig": "configuration_nanbeige.NanbeigeConfig",
7
- "AutoModelForCausalLM": "modeling_nanbeige.NanbeigeForCausalLM"
8
- },
9
  "bos_token_id": 1,
 
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
12
  "hidden_size": 5120,
@@ -14,14 +14,18 @@
14
  "intermediate_size": 13824,
15
  "max_length": 4096,
16
  "max_position_embeddings": 4096,
17
- "model_type": "nanbeige",
18
  "num_attention_heads": 40,
19
  "num_hidden_layers": 48,
 
20
  "pad_token_id": 0,
 
21
  "rms_norm_eps": 1e-05,
 
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
- "transformers_version": "4.28.1",
25
  "use_cache": true,
26
- "vocab_size": 59136
27
  }
 
1
  {
2
+ "_name_or_path": "//code/chenzongchao/SFT/DeepSpeed-Chat/SFT_train_v1/nanbeige-16b-cool-cl-sft-0226-bs320/epoch_1step_999/",
3
  "architectures": [
4
+ "LlamaForCausalLM"
5
  ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
 
 
8
  "bos_token_id": 1,
9
+ "dropout": 0.0,
10
  "eos_token_id": 2,
11
  "hidden_act": "silu",
12
  "hidden_size": 5120,
 
14
  "intermediate_size": 13824,
15
  "max_length": 4096,
16
  "max_position_embeddings": 4096,
17
+ "model_type": "llama",
18
  "num_attention_heads": 40,
19
  "num_hidden_layers": 48,
20
+ "num_key_value_heads": 40,
21
  "pad_token_id": 0,
22
+ "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 10000.0,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.35.0",
29
  "use_cache": true,
30
+ "vocab_size": 59392
31
  }