Update bitsandbytes examples
#33
by
mdouglas
HF staff
- opened
Works on my machine. Tested with
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it")
model = AutoModelForCausalLM.from_pretrained(
"google/gemma-2-27b-it",
quantization_config=quantization_config,
torch_dtype=torch.bfloat16)
input_text = tokenizer.apply_chat_template([{"role":"user","content":"Write me a poem about Machine Learning."}], tokenize=False, add_generation_prompt=True)
inputs = tokenizer.encode(input_text, add_special_tokens=False, return_tensors="pt")
outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=20)
print(tokenizer.decode(outputs[0]))