from transformers import AutoModelForCausalLM, AutoTokenizer
from auto_round import AutoRoundConfig  ## must import for auto-round format

quantized_model_path = "Siddharth63/Qwen3-4B-Base-4bit-Autoround-GPTQ-sym"
model = AutoModelForCausalLM.from_pretrained(quantized_model_path,
                                             device_map="auto", torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)
text = "There is a girl who likes adventure,"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
print(tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0]))
Downloads last month
9
Safetensors
Model size
876M params
Tensor type
I32
·
BF16
·
FP16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Collection including Siddharth63/Qwen3-4B-Base-4bit-Autoround-GPTQ-sym