import torch from transformers import AutoModelForCausalLM, AutoTokenizer # Load model and tokenizer model = AutoModelForCausalLM.from_pretrained("PATH_TO_MODEL", device_map="auto", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("PATH_TO_MODEL", trust_remote_code=True) # For PEFT models (if not merged) # from peft import PeftModel # model = PeftModel.from_pretrained(model, "PATH_TO_MODEL") # Prepare input system_prompt = "You are embodying the following persona: [PERSONA]" user_prompt = "Survey Question: [QUESTION]\n\nPlease provide your honest and detailed response to this question." messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ] # Tokenize input input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device) # Generate response with torch.no_grad(): outputs = model.generate( input_ids=input_ids, max_new_tokens=256, temperature=0.7, top_p=0.9, do_sample=True ) # Decode response response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(response)