from load_model import load_model model, tokenizer = load_model() def generate_response(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(inputs['input_ids']) return tokenizer.decode(outputs[0], skip_special_tokens=True)