wenhuach commited on
Commit
3d99e84
·
2 Parent(s): d158feb 0ca4acd

Merge branch 'main' of https://huggingface.co/OPEA/MiniMax-Text-01-int4-sym-inc-preview into main

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -30,7 +30,7 @@ from auto_round import AutoRoundConfig ##must import for autoround format
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
  import torch
32
 
33
- quantized_model_dir = "/data3/wenhuach/MiniMax-Text-01-int4-sym-w4g128"
34
 
35
  tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
36
  model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16,
@@ -41,13 +41,13 @@ def forward_hook(module, input, output):
41
  return torch.clamp(output, -65504, 65504).to(torch.bfloat16)
42
 
43
 
44
- def register_fp16_pre_hooks(model):
45
  for name, module in model.named_modules():
46
  if "QuantLinear" in module.__class__.__name__ or isinstance(module, torch.nn.Linear):
47
  module.register_forward_hook(forward_hook)
48
 
49
 
50
- register_fp16_pre_hooks(model)
51
  tokenizer.pad_token = tokenizer.eos_token
52
 
53
  prompts = [
@@ -153,7 +153,7 @@ pip3 install git+https://github.com/intel/auto-round.git@bf16_inference
153
 
154
  ```python
155
  import torch
156
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
157
 
158
  model_name = "MiniMaxAI/MiniMax-Text-01"
159
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
  import torch
32
 
33
+ quantized_model_dir = "OPEA/MiniMax-Text-01-int4-sym-inc-preview"
34
 
35
  tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
36
  model = AutoModelForCausalLM.from_pretrained(quantized_model_dir, trust_remote_code=True, torch_dtype=torch.bfloat16,
 
41
  return torch.clamp(output, -65504, 65504).to(torch.bfloat16)
42
 
43
 
44
+ def register_fp16_hooks(model):
45
  for name, module in model.named_modules():
46
  if "QuantLinear" in module.__class__.__name__ or isinstance(module, torch.nn.Linear):
47
  module.register_forward_hook(forward_hook)
48
 
49
 
50
+ register_fp16_hooks(model)
51
  tokenizer.pad_token = tokenizer.eos_token
52
 
53
  prompts = [
 
153
 
154
  ```python
155
  import torch
156
+ from transformers import AutoModelForCausalLM, AutoTokenizer
157
 
158
  model_name = "MiniMaxAI/MiniMax-Text-01"
159
  tokenizer = AutoTokenizer.from_pretrained(model_name)