Update README.md to include GPTQModel usage.
Browse files
README.md
CHANGED
@@ -69,3 +69,31 @@ model = AutoGPTQForCausalLM.from_quantized(pretrained_model_dir, device="cuda:0"
|
|
69 |
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
70 |
print(pipeline("auto-gptq is")[0]["generated_text"])
|
71 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
70 |
print(pipeline("auto-gptq is")[0]["generated_text"])
|
71 |
```
|
72 |
+
|
73 |
+
### Run the model with GPTQModel
|
74 |
+
|
75 |
+
GPTQModel package: https://github.com/ModelCloud/GPTQModel
|
76 |
+
|
77 |
+
```
|
78 |
+
pip install -v gptqmodel=="1.8.0" --no-build-isolation
|
79 |
+
from gptqmodel import GPTQModel
|
80 |
+
|
81 |
+
model_id = 'iproskurina/Mistral-7B-v0.3-GPTQ-4bit-g128'
|
82 |
+
model = GPTQModel.load(model_id)
|
83 |
+
result = model.generate("Uncovering deep insights")[0] # tokens
|
84 |
+
print(model.tokenizer.decode(result)) # string output
|
85 |
+
```
|
86 |
+
|
87 |
+
### Run the model with GPTQModel
|
88 |
+
|
89 |
+
GPTQModel package: https://github.com/ModelCloud/GPTQModel
|
90 |
+
|
91 |
+
```
|
92 |
+
pip install -v gptqmodel=="1.8.0" --no-build-isolation
|
93 |
+
from gptqmodel import GPTQModel
|
94 |
+
|
95 |
+
model_id = 'iproskurina/Mistral-7B-v0.3-GPTQ-4bit-g128'
|
96 |
+
model = GPTQModel.load(model_id)
|
97 |
+
result = model.generate("Uncovering deep insights")[0] # tokens
|
98 |
+
print(model.tokenizer.decode(result)) # string output
|
99 |
+
```
|