Update README.md
Browse files
README.md
CHANGED
@@ -12,8 +12,6 @@ base_model:
|
|
12 |
|
13 |
This model is an int4 model with group_size 128 and symmetric quantization of [deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm.
|
14 |
|
15 |
-
**Loading the model in Transformers can be quite slow, especially with CUDA devices(30m-1hours). Consider using an alternative serving framework (some frameworks have overflow issues).** However, we have not tested it on other frameworks due to limited cuda resources.
|
16 |
-
|
17 |
Please follow the license of the original model.
|
18 |
|
19 |
## How To Use
|
@@ -26,6 +24,29 @@ While we have added a workaround to address this issue, we cannot guarantee reli
|
|
26 |
|
27 |
~~~python
|
28 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
import torch
|
30 |
|
31 |
quantized_model_dir = "OPEA/DeepSeek-V3-int4-sym-gptq-inc"
|
|
|
12 |
|
13 |
This model is an int4 model with group_size 128 and symmetric quantization of [deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm.
|
14 |
|
|
|
|
|
15 |
Please follow the license of the original model.
|
16 |
|
17 |
## How To Use
|
|
|
24 |
|
25 |
~~~python
|
26 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
27 |
+
|
28 |
+
# https://github.com/huggingface/transformers/pull/35493
|
29 |
+
def set_initialized_submodules(model, state_dict_keys):
|
30 |
+
"""
|
31 |
+
Sets the `_is_hf_initialized` flag in all submodules of a given model when all its weights are in the loaded state
|
32 |
+
dict.
|
33 |
+
"""
|
34 |
+
state_dict_keys = set(state_dict_keys)
|
35 |
+
not_initialized_submodules = {}
|
36 |
+
for module_name, module in model.named_modules():
|
37 |
+
if module_name == "":
|
38 |
+
# When checking if the root module is loaded there's no need to prepend module_name.
|
39 |
+
module_keys = set(module.state_dict())
|
40 |
+
else:
|
41 |
+
module_keys = {f"{module_name}.{k}" for k in module.state_dict()}
|
42 |
+
if module_keys.issubset(state_dict_keys):
|
43 |
+
module._is_hf_initialized = True
|
44 |
+
else:
|
45 |
+
not_initialized_submodules[module_name] = module
|
46 |
+
return not_initialized_submodules
|
47 |
+
|
48 |
+
transformers.modeling_utils.set_initialized_submodules = set_initialized_submodules
|
49 |
+
|
50 |
import torch
|
51 |
|
52 |
quantized_model_dir = "OPEA/DeepSeek-V3-int4-sym-gptq-inc"
|