nvedant07 commited on
Commit
19cfab2
·
verified ·
1 Parent(s): ab6170d

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +21 -1
config.json CHANGED
@@ -1,4 +1,7 @@
1
  {
 
 
 
2
  "auto_map": {
3
  "AutoConfig": "config.HATArchitectureConfig",
4
  "AutoModelForCausalLM": "model.HATForCausalLM"
@@ -6,6 +9,9 @@
6
  "backbone_config": {
7
  "hidden_size": 4096,
8
  "intermediate_size": 14336,
 
 
 
9
  "max_position_embeddings": 32900,
10
  "mlp_bias": false,
11
  "num_attention_heads": 32,
@@ -31,12 +37,17 @@
31
  "hidden_size": 1024,
32
  "hidden_size_kv": 4096,
33
  "hidden_size_q": 1024,
 
 
34
  "num_attention_heads": 8,
35
  "word_window_size": 1
36
  },
37
  "cross_attn_every_layer": true,
38
  "hidden_size": 1024,
39
  "intermediate_size": 2816,
 
 
 
40
  "max_position_embeddings": 262144,
41
  "mlp_bias": false,
42
  "num_attention_heads": 8,
@@ -58,11 +69,16 @@
58
  "hidden_size": 4096,
59
  "hidden_size_kv": 1024,
60
  "hidden_size_q": 4096,
 
 
61
  "num_attention_heads": 32,
62
  "word_window_size": 1
63
  },
64
  "hidden_size": 1024,
65
  "intermediate_size": 2816,
 
 
 
66
  "max_position_embeddings": 262144,
67
  "mlp_bias": false,
68
  "num_attention_heads": 8,
@@ -78,9 +94,13 @@
78
  "use_cache": true,
79
  "vocab_size": 256
80
  },
 
 
81
  "model_type": "hierarchical_autoregressive_transformer",
 
82
  "special_token_dict": {
83
  "<|eot_id|>": 192
84
  },
 
85
  "transformers_version": "4.46.3"
86
- }
 
1
  {
2
+ "architectures": [
3
+ "HATForCausalLM"
4
+ ],
5
  "auto_map": {
6
  "AutoConfig": "config.HATArchitectureConfig",
7
  "AutoModelForCausalLM": "model.HATForCausalLM"
 
9
  "backbone_config": {
10
  "hidden_size": 4096,
11
  "intermediate_size": 14336,
12
+ "is_neox_style": true,
13
+ "key_query_norm": false,
14
+ "key_query_norm_per_head": false,
15
  "max_position_embeddings": 32900,
16
  "mlp_bias": false,
17
  "num_attention_heads": 32,
 
37
  "hidden_size": 1024,
38
  "hidden_size_kv": 4096,
39
  "hidden_size_q": 1024,
40
+ "key_query_norm": false,
41
+ "key_query_norm_per_head": false,
42
  "num_attention_heads": 8,
43
  "word_window_size": 1
44
  },
45
  "cross_attn_every_layer": true,
46
  "hidden_size": 1024,
47
  "intermediate_size": 2816,
48
+ "is_neox_style": true,
49
+ "key_query_norm": false,
50
+ "key_query_norm_per_head": false,
51
  "max_position_embeddings": 262144,
52
  "mlp_bias": false,
53
  "num_attention_heads": 8,
 
69
  "hidden_size": 4096,
70
  "hidden_size_kv": 1024,
71
  "hidden_size_q": 4096,
72
+ "key_query_norm": false,
73
+ "key_query_norm_per_head": false,
74
  "num_attention_heads": 32,
75
  "word_window_size": 1
76
  },
77
  "hidden_size": 1024,
78
  "intermediate_size": 2816,
79
+ "is_neox_style": true,
80
+ "key_query_norm": false,
81
+ "key_query_norm_per_head": false,
82
  "max_position_embeddings": 262144,
83
  "mlp_bias": false,
84
  "num_attention_heads": 8,
 
94
  "use_cache": true,
95
  "vocab_size": 256
96
  },
97
+ "max_position_embeddings": 262144,
98
+ "max_word_size": 100,
99
  "model_type": "hierarchical_autoregressive_transformer",
100
+ "sliding_window": 768,
101
  "special_token_dict": {
102
  "<|eot_id|>": 192
103
  },
104
+ "torch_dtype": "bfloat16",
105
  "transformers_version": "4.46.3"
106
+ }