--- library_name: transformers license: apache-2.0 license_link: https://huggingface.co/Qwen/Qwen3-32B/blob/main/LICENSE pipeline_tag: text-generation base_model: Qwen/Qwen3-32B --- # creation ```python from transformers import AutoModelForCausalLM from llmcompressor import oneshot from llmcompressor.modifiers.quantization import QuantizationModifier from accelerate import infer_auto_device_map, init_empty_weights model_id = "Qwen/Qwen3-32B" model_out = model_id.split("/")[1] + ".w4a16" device_map = [] with init_empty_weights(): dummy_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="bfloat16") device_map = infer_auto_device_map(dummy_model, no_split_module_classes=dummy_model._no_split_modules) del dummy_model for k, v in device_map.items(): device_map[k] = 'cpu' model = AutoModelForCausalLM.from_pretrained( model_id, device_map=device_map, torch_dtype="bfloat16", ) recipe = QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"], dampening_frac=0.1) oneshot(model=model, recipe=recipe, output_dir=model_out) ```