git clone https://github.com/casper-hansen/AutoAWQ.git   # latest source 2025-05-01
cd AutoAWQ
pip install -e .
## go into AutoAWQ folder
pip install --upgrade transformers

## FOR STREAMING
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer, TextStreamer
from awq.utils.utils import get_best_device

device      = get_best_device()
quant_path  = "Siddharth63/Qwen3-14B-base-AWQ"        # path or HF repo for the AWQ checkpoint

# ---------- load model & tokenizer ----------
model      = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True)
tokenizer  = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)
streamer   = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

# ---------- tokenise & generate ----------
input_ids = tokenizer("Atherosclerosis is", return_tensors="pt"
).input_ids.to(device)

_ = model.generate(
        input_ids,
        streamer        = streamer,
        max_new_tokens =  512,        # full context window
        use_cache       = True
    )


## FOR NON_STREAMING
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer, TextStreamer
from awq.utils.utils import get_best_device

device      = get_best_device()
quant_path  = "Siddharth63/Qwen3-14B-base-AWQ"        # path or HF repo for the AWQ checkpoint

# ---------- load model & tokenizer ----------
model      = AutoAWQForCausalLM.from_quantized(quant_path, fuse_layers=True)
tokenizer  = AutoTokenizer.from_pretrained(quant_path, trust_remote_code=True)

input_ids = tokenizer(
    "Atherosclerosis is",
    return_tensors="pt"
).input_ids.to(device)

# ---------- generate (blocking) ----------
output_ids = model.generate(
    input_ids,
    max_new_tokens=100,          # or max_length / temperature / etc.
    use_cache=True               # default; speeds up incremental decoding
)

response = tokenizer.decode(
    output_ids[0],
    skip_special_tokens=True,    # drop <|im_start|> tokens
)

print("\n=== Model reply ===\n", response)
Downloads last month
4
Safetensors
Model size
3.32B params
Tensor type
I32
·
BF16
·
FP16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Collection including Siddharth63/Qwen3-14B-Base-AWQ