Upload folder using huggingface_hub
Browse files- configuration_codeshell.py +7 -7
- merges.txt +0 -0
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- quantizer.py +15 -0
configuration_codeshell.py
CHANGED
|
@@ -99,11 +99,11 @@ class CodeShellConfig(PretrainedConfig):
|
|
| 99 |
|
| 100 |
def __init__(
|
| 101 |
self,
|
| 102 |
-
vocab_size=
|
| 103 |
-
n_positions=
|
| 104 |
-
n_embd=
|
| 105 |
-
n_layer=
|
| 106 |
-
n_head=
|
| 107 |
n_inner=None,
|
| 108 |
activation_function="gelu_pytorch_tanh",
|
| 109 |
resid_pdrop=0.1,
|
|
@@ -113,8 +113,8 @@ class CodeShellConfig(PretrainedConfig):
|
|
| 113 |
initializer_range=0.02,
|
| 114 |
scale_attn_weights=True,
|
| 115 |
use_cache=True,
|
| 116 |
-
bos_token_id=
|
| 117 |
-
eos_token_id=
|
| 118 |
attention_softmax_in_fp32=True,
|
| 119 |
scale_attention_softmax_in_fp32=True,
|
| 120 |
group_query_attention=True,
|
|
|
|
| 99 |
|
| 100 |
def __init__(
|
| 101 |
self,
|
| 102 |
+
vocab_size=70144,
|
| 103 |
+
n_positions=8192,
|
| 104 |
+
n_embd=4096,
|
| 105 |
+
n_layer=42,
|
| 106 |
+
n_head=32,
|
| 107 |
n_inner=None,
|
| 108 |
activation_function="gelu_pytorch_tanh",
|
| 109 |
resid_pdrop=0.1,
|
|
|
|
| 113 |
initializer_range=0.02,
|
| 114 |
scale_attn_weights=True,
|
| 115 |
use_cache=True,
|
| 116 |
+
bos_token_id=70000,
|
| 117 |
+
eos_token_id=70000,
|
| 118 |
attention_softmax_in_fp32=True,
|
| 119 |
scale_attention_softmax_in_fp32=True,
|
| 120 |
group_query_attention=True,
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9955659648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:911eb9034a0be6d4dde83bb2957d89bb0dcf3ad153737e6a58e455f0c05c071a
|
| 3 |
size 9955659648
|
model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5420501688
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19377de31c1b367d59a3470329b2e1b09ab5eda4f4dafbaba03ed8a4095060a3
|
| 3 |
size 5420501688
|
quantizer.py
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
try:
|
| 2 |
import bitsandbytes as bnb
|
| 3 |
from bitsandbytes.nn.modules import Params4bit, Int8Params
|
|
|
|
| 1 |
+
# coding=utf-8
|
| 2 |
+
# Copyright 2023 WisdomShell Inc. All Rights Reserved.
|
| 3 |
+
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
+
# you may not use this file except in compliance with the License.
|
| 6 |
+
# You may obtain a copy of the License at
|
| 7 |
+
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
+
#
|
| 10 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
+
# See the License for the specific language governing permissions and
|
| 14 |
+
# limitations under the License.
|
| 15 |
+
|
| 16 |
try:
|
| 17 |
import bitsandbytes as bnb
|
| 18 |
from bitsandbytes.nn.modules import Params4bit, Int8Params
|