--- license: apache-2.0 --- ``` !pip install --upgrade auto-round transformers from transformers import AutoModelForCausalLM, AutoTokenizer import torch from auto_round import AutoRoundConfig ## must import for auto-round format quantized_model_path = "Siddharth63/Qwen3-8B-Base-2bits-AutoRound-GPTQ-sym" quantization_config = AutoRoundConfig(backend="auto") model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="auto", torch_dtype=torch.float16, quantization_config=quantization_config) tokenizer = AutoTokenizer.from_pretrained(quantized_model_path) text = "Atherosclerosis" inputs = tokenizer(text, return_tensors="pt").to(model.device) print(tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0])) ```