Update README.md
Browse files
README.md
CHANGED
|
@@ -181,12 +181,14 @@ quantized_model = model
|
|
| 181 |
linear_config = AWQConfig(base_config, step="prepare_for_loading")
|
| 182 |
quant_config = get_quant_config(linear_config)
|
| 183 |
quantized_model.config.quantization_config = TorchAoConfig(quant_config)
|
|
|
|
| 184 |
# Push to hub
|
| 185 |
USER_ID = "YOUR_USER_ID"
|
| 186 |
MODEL_NAME = model_id.split("/")[-1]
|
| 187 |
save_to = f"{USER_ID}/{MODEL_NAME}-AWQ-INT4"
|
| 188 |
quantized_model.push_to_hub(save_to, safe_serialization=False)
|
| 189 |
tokenizer.push_to_hub(save_to)
|
|
|
|
| 190 |
# Manual Testing
|
| 191 |
quantized_model = AutoModelForCausalLM.from_pretrained(
|
| 192 |
save_to,
|
|
|
|
| 181 |
linear_config = AWQConfig(base_config, step="prepare_for_loading")
|
| 182 |
quant_config = get_quant_config(linear_config)
|
| 183 |
quantized_model.config.quantization_config = TorchAoConfig(quant_config)
|
| 184 |
+
|
| 185 |
# Push to hub
|
| 186 |
USER_ID = "YOUR_USER_ID"
|
| 187 |
MODEL_NAME = model_id.split("/")[-1]
|
| 188 |
save_to = f"{USER_ID}/{MODEL_NAME}-AWQ-INT4"
|
| 189 |
quantized_model.push_to_hub(save_to, safe_serialization=False)
|
| 190 |
tokenizer.push_to_hub(save_to)
|
| 191 |
+
|
| 192 |
# Manual Testing
|
| 193 |
quantized_model = AutoModelForCausalLM.from_pretrained(
|
| 194 |
save_to,
|