from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

# Model name and configuration
model_name = "ruslanmv/Medical-Llama3-8B"
device_map = "auto"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True,
    use_cache=False,
    device_map=device_map,
)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Set the chat template
chat_template = """<|im_start|>system
{system}
<|im_end|>
<|im_start|>user
{user}
<|im_end|>
<|im_start|>assistant
"""
tokenizer.chat_template = chat_template

# Define the askme function
def askme(question):
    sys_message = """ 
    You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
    provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
    """   
    # Structure messages for the chat
    messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": question}]
    
    # Apply the chat template
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    # Generate response
    outputs = model.generate(**inputs, max_new_tokens=100, use_cache=True)
    
    # Extract and return the generated text
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = response_text.split("<|im_start|>assistant")[-1].strip()
    return answer

# Example usage
question = """
I'm a 35-year-old male and for the past few months, I've been experiencing fatigue, 
increased sensitivity to cold, and dry, itchy skin. 
Could these symptoms be related to hypothyroidism? 
If so, what steps should I take to get a proper diagnosis and discuss treatment options?
"""
print(askme(question))