Update README.md to include GPTQModel usage.
Browse files
README.md
CHANGED
@@ -67,3 +67,31 @@ model = AutoGPTQForCausalLM.from_quantized(pretrained_model_dir, device="cuda:0"
|
|
67 |
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
68 |
print(pipeline("auto-gptq is")[0]["generated_text"])
|
69 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
|
68 |
print(pipeline("auto-gptq is")[0]["generated_text"])
|
69 |
```
|
70 |
+
|
71 |
+
### Run the model with GPTQModel
|
72 |
+
|
73 |
+
GPTQModel package: https://github.com/ModelCloud/GPTQModel
|
74 |
+
|
75 |
+
```
|
76 |
+
pip install -v gptqmodel=="1.8.0" --no-build-isolation
|
77 |
+
from gptqmodel import GPTQModel
|
78 |
+
|
79 |
+
model_id = 'iproskurina/bloom-560m-GPTQ-4bit-g128'
|
80 |
+
model = GPTQModel.load(model_id)
|
81 |
+
result = model.generate("Uncovering deep insights")[0] # tokens
|
82 |
+
print(model.tokenizer.decode(result)) # string output
|
83 |
+
```
|
84 |
+
|
85 |
+
### Run the model with GPTQModel
|
86 |
+
|
87 |
+
GPTQModel package: https://github.com/ModelCloud/GPTQModel
|
88 |
+
|
89 |
+
```
|
90 |
+
pip install -v gptqmodel=="1.8.0" --no-build-isolation
|
91 |
+
from gptqmodel import GPTQModel
|
92 |
+
|
93 |
+
model_id = 'iproskurina/bloom-560m-GPTQ-4bit-g128'
|
94 |
+
model = GPTQModel.load(model_id)
|
95 |
+
result = model.generate("Uncovering deep insights")[0] # tokens
|
96 |
+
print(model.tokenizer.decode(result)) # string output
|
97 |
+
```
|