Pearush commited on
Commit
a647488
1 Parent(s): 9c82147

Upload PhiMoEForCausalLM

Browse files
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "input_jitter_noise": 0.01,
19
  "intermediate_size": 1600,
20
  "lm_head_bias": true,
21
- "max_position_embeddings": 131072,
22
  "model_type": "phimoe",
23
  "num_attention_heads": 32,
24
  "num_experts_per_tok": 2,
@@ -28,148 +28,11 @@
28
  "original_max_position_embeddings": 4096,
29
  "output_router_logits": false,
30
  "rms_norm_eps": 1e-05,
31
- "rope_scaling": {
32
- "long_factor": [
33
- 1.0199999809265137,
34
- 1.0299999713897705,
35
- 1.0399999618530273,
36
- 1.0499999523162842,
37
- 1.0499999523162842,
38
- 1.0499999523162842,
39
- 1.059999942779541,
40
- 1.059999942779541,
41
- 1.059999942779541,
42
- 1.059999942779541,
43
- 1.059999942779541,
44
- 1.059999942779541,
45
- 1.0999999046325684,
46
- 1.1799999475479126,
47
- 1.1799999475479126,
48
- 1.3700000047683716,
49
- 1.4899998903274536,
50
- 2.109999895095825,
51
- 2.8899998664855957,
52
- 3.9499998092651367,
53
- 4.299999713897705,
54
- 6.429999828338623,
55
- 8.09000015258789,
56
- 10.690000534057617,
57
- 12.050000190734863,
58
- 18.229999542236328,
59
- 18.84000015258789,
60
- 19.899999618530273,
61
- 21.420000076293945,
62
- 26.200000762939453,
63
- 34.28000259399414,
64
- 34.590003967285156,
65
- 38.730003356933594,
66
- 40.22000503540039,
67
- 42.54000473022461,
68
- 44.000003814697266,
69
- 47.590003967285156,
70
- 54.750003814697266,
71
- 56.19000244140625,
72
- 57.44000244140625,
73
- 57.4900016784668,
74
- 61.20000076293945,
75
- 61.540000915527344,
76
- 61.75,
77
- 61.779998779296875,
78
- 62.06999969482422,
79
- 63.11000061035156,
80
- 63.43000030517578,
81
- 63.560001373291016,
82
- 63.71000289916992,
83
- 63.92000198364258,
84
- 63.94000244140625,
85
- 63.94000244140625,
86
- 63.96000289916992,
87
- 63.980003356933594,
88
- 64.0300064086914,
89
- 64.0300064086914,
90
- 64.0300064086914,
91
- 64.04000854492188,
92
- 64.10000610351562,
93
- 64.19000244140625,
94
- 64.20999908447266,
95
- 64.75,
96
- 64.95999908447266
97
- ],
98
- "long_mscale": 1.243163121016122,
99
- "original_max_position_embeddings": 4096,
100
- "short_factor": [
101
- 1.0,
102
- 1.0399999618530273,
103
- 1.0399999618530273,
104
- 1.0399999618530273,
105
- 1.0499999523162842,
106
- 1.0499999523162842,
107
- 1.0499999523162842,
108
- 1.0499999523162842,
109
- 1.0499999523162842,
110
- 1.0499999523162842,
111
- 1.0499999523162842,
112
- 1.0499999523162842,
113
- 1.0499999523162842,
114
- 1.0499999523162842,
115
- 1.059999942779541,
116
- 1.059999942779541,
117
- 1.0699999332427979,
118
- 1.0699999332427979,
119
- 1.0699999332427979,
120
- 1.0699999332427979,
121
- 1.1399999856948853,
122
- 1.159999966621399,
123
- 1.159999966621399,
124
- 1.159999966621399,
125
- 1.159999966621399,
126
- 1.1799999475479126,
127
- 1.1999999284744263,
128
- 1.3199999332427979,
129
- 1.3399999141693115,
130
- 1.3499999046325684,
131
- 1.3999998569488525,
132
- 1.4799998998641968,
133
- 1.4999998807907104,
134
- 1.589999794960022,
135
- 1.6499998569488525,
136
- 1.71999990940094,
137
- 1.8999998569488525,
138
- 1.9099998474121094,
139
- 1.9099998474121094,
140
- 1.9899998903274536,
141
- 1.9999998807907104,
142
- 1.9999998807907104,
143
- 2.009999990463257,
144
- 2.009999990463257,
145
- 2.009999990463257,
146
- 2.009999990463257,
147
- 2.009999990463257,
148
- 2.009999990463257,
149
- 2.009999990463257,
150
- 2.009999990463257,
151
- 2.009999990463257,
152
- 2.009999990463257,
153
- 2.009999990463257,
154
- 2.009999990463257,
155
- 2.009999990463257,
156
- 2.009999990463257,
157
- 2.009999990463257,
158
- 2.009999990463257,
159
- 2.009999990463257,
160
- 2.0999999046325684,
161
- 2.319999933242798,
162
- 2.419999837875366,
163
- 2.5899999141693115,
164
- 2.7899999618530273
165
- ],
166
- "short_mscale": 1.243163121016122,
167
- "type": "longrope"
168
- },
169
  "rope_theta": 10000.0,
170
  "router_aux_loss_coef": 0.0,
171
  "router_jitter_noise": 0.01,
172
- "sliding_window": 131072,
173
  "tie_word_embeddings": false,
174
  "torch_dtype": "bfloat16",
175
  "transformers_version": "4.41.2",
 
18
  "input_jitter_noise": 0.01,
19
  "intermediate_size": 1600,
20
  "lm_head_bias": true,
21
+ "max_position_embeddings": 4096,
22
  "model_type": "phimoe",
23
  "num_attention_heads": 32,
24
  "num_experts_per_tok": 2,
 
28
  "original_max_position_embeddings": 4096,
29
  "output_router_logits": false,
30
  "rms_norm_eps": 1e-05,
31
+ "rope_scaling": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "rope_theta": 10000.0,
33
  "router_aux_loss_coef": 0.0,
34
  "router_jitter_noise": 0.01,
35
+ "sliding_window": 4096,
36
  "tie_word_embeddings": false,
37
  "torch_dtype": "bfloat16",
38
  "transformers_version": "4.41.2",
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:743415eaaf652b801f37d2db078fe7f091da413ee0925d69c59fdb469e7a9233
3
  size 4993054664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30b660d64f6e7cb03a07ba372746573e0ae78bed7c194389f5df8dcb1c4abab
3
  size 4993054664
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5e0b7f3d40c5c27d90e6f3d2cc131c8af2a64a15b22242dfc067290bf890812
3
  size 4992566288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf609995b73d0cf6b1e08474190e678194213fc7dc50c18129d144be94931bc
3
  size 4992566288
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dba61a33ce4da53c2c89135de904f887963e18922f9a2751ff35af2e60a95d6
3
  size 4992566480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e389bfe5abe1c58b8dc376ba3dc3d4ee2b23cae3660f510acc77ae362fe7d661
3
  size 4992566480
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6715354c9536a6a27b80b5f1c318e35f1e027d811354b043bcc57715cb4716b3
3
  size 4992566480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d939e1ddab609df4a2f3847e3b28323d6465bcf245547c4a0101546aef778764
3
  size 4992566480
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce2190a34df6c3900debe73fd9d73591ca979d1e41924ea040f055e57cbce3ed
3
  size 3377824152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eece3b0a1ff1445560f73843f8680bc6f5de4ffa585e0b88e226e7983ca296c4
3
  size 3377824152