iproskurina commited on
Commit
850ca74
·
verified ·
1 Parent(s): 1636531

Update README.md to include GPTQModel usage.

Browse files
Files changed (1) hide show
  1. README.md +28 -0
README.md CHANGED
@@ -67,3 +67,31 @@ model = AutoGPTQForCausalLM.from_quantized(pretrained_model_dir, device="cuda:0"
67
  pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
68
  print(pipeline("auto-gptq is")[0]["generated_text"])
69
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  pipeline = TextGenerationPipeline(model=model, tokenizer=tokenizer)
68
  print(pipeline("auto-gptq is")[0]["generated_text"])
69
  ```
70
+
71
+ ### Run the model with GPTQModel
72
+
73
+ GPTQModel package: https://github.com/ModelCloud/GPTQModel
74
+
75
+ ```
76
+ pip install -v gptqmodel=="1.8.0" --no-build-isolation
77
+ from gptqmodel import GPTQModel
78
+
79
+ model_id = 'iproskurina/bloom-560m-GPTQ-4bit-g128'
80
+ model = GPTQModel.load(model_id)
81
+ result = model.generate("Uncovering deep insights")[0] # tokens
82
+ print(model.tokenizer.decode(result)) # string output
83
+ ```
84
+
85
+ ### Run the model with GPTQModel
86
+
87
+ GPTQModel package: https://github.com/ModelCloud/GPTQModel
88
+
89
+ ```
90
+ pip install -v gptqmodel=="1.8.0" --no-build-isolation
91
+ from gptqmodel import GPTQModel
92
+
93
+ model_id = 'iproskurina/bloom-560m-GPTQ-4bit-g128'
94
+ model = GPTQModel.load(model_id)
95
+ result = model.generate("Uncovering deep insights")[0] # tokens
96
+ print(model.tokenizer.decode(result)) # string output
97
+ ```