Merge branch 'main' of https://huggingface.co/OPEA/MiniMax-Text-01-int4-sym-inc-preview into main
Browse files
README.md
CHANGED
@@ -30,7 +30,7 @@ from auto_round import AutoRoundConfig ##must import for autoround format
|
|
30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
31 |
import torch
|
32 |
|
33 |
-
quantized_model_dir = "/
|
34 |
|
35 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
36 |
model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16,
|
@@ -41,13 +41,13 @@ def forward_hook(module, input, output):
|
|
41 |
return torch.clamp(output, -65504, 65504).to(torch.bfloat16)
|
42 |
|
43 |
|
44 |
-
def
|
45 |
for name, module in model.named_modules():
|
46 |
if "QuantLinear" in module.__class__.__name__ or isinstance(module, torch.nn.Linear):
|
47 |
module.register_forward_hook(forward_hook)
|
48 |
|
49 |
|
50 |
-
|
51 |
tokenizer.pad_token = tokenizer.eos_token
|
52 |
|
53 |
prompts = [
|
@@ -153,7 +153,7 @@ pip3 install git+https://github.com/intel/auto-round.git@bf16_inference
|
|
153 |
|
154 |
```python
|
155 |
import torch
|
156 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
157 |
|
158 |
model_name = "MiniMaxAI/MiniMax-Text-01"
|
159 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
30 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
31 |
import torch
|
32 |
|
33 |
+
quantized_model_dir = "OPEA/MiniMax-Text-01-int4-sym-inc-preview"
|
34 |
|
35 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
36 |
model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16,
|
|
|
41 |
return torch.clamp(output, -65504, 65504).to(torch.bfloat16)
|
42 |
|
43 |
|
44 |
+
def register_fp16_hooks(model):
|
45 |
for name, module in model.named_modules():
|
46 |
if "QuantLinear" in module.__class__.__name__ or isinstance(module, torch.nn.Linear):
|
47 |
module.register_forward_hook(forward_hook)
|
48 |
|
49 |
|
50 |
+
register_fp16_hooks(model)
|
51 |
tokenizer.pad_token = tokenizer.eos_token
|
52 |
|
53 |
prompts = [
|
|
|
153 |
|
154 |
```python
|
155 |
import torch
|
156 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
157 |
|
158 |
model_name = "MiniMaxAI/MiniMax-Text-01"
|
159 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|