import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import os

# --- Configuration ---
# Option A: Use a public model directly available on HF Hub
MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat" # Example: Use a smaller, faster model for demo
try:
    # Try loading with device_map for potential multi-GPU or CPU offload
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="auto")
    # If device_map causes issues on basic HF infra, load to CPU (slower) or single GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Loading model to: {device}")
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16 if device=="cuda" else torch.float32).to(device) # Use bfloat16 on GPU if possible

    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=model.device) # Use model.device
    print(f"Pipeline created on device: {pipe.device}")
    model_loaded = True
except Exception as e:
    print(f"Error loading model {MODEL_NAME}: {e}")
    model_loaded = False
    # Fallback or error message handling needed here

# Option B: Load a custom demo model (if you prepared and uploaded one)
# MODEL_NAME = "YOUR_HF_USERNAME/your-custom-demo-model"
# ... loading logic ...

# --- Synthetic/Public Conference Data (Context for RAG) ---
# Keep this concise for the demo prompt limit
CONFERENCE_CONTEXT = """
**Conference:** 2024 TianSuan AI National Annual Conference - "Intelligent Computing the Future, Charting a New Chapter Together"
**Date:** November 15-16, 2024
**Location:** Hangzhou Future Sci-Tech City International Conference Center (Virtual Location for Demo)
**Keynote Speaker (Day 1 AM):** Dr. Evelyn Reed (CEO, TianSuan AI), Topic: "Year in Review & Future Strategy"
**Tech Talk (Day 1 PM):** Dr. Kenji Tanaka (CTO, TianSuan AI), Topic: "Advances in Generative AI at TianSuan"
**Gala Dinner:** November 15th Evening, Grand Ballroom
**Check-in:** Starts 8:00 AM, Nov 15th, via AI Assistant App (Face Recognition or QR)
**Gift:** Digital coupon delivered via AI Assistant App after conference conclusion.
**WiFi:** Network: TianSuanGuest, Password: AIConf2024
**Emergency Contact:** Available via the 'Security' section in the AI Assistant App.
"""

# --- Chat Function ---
def ask_ai_assistant(query, chat_history):
    if not model_loaded:
         return "Sorry, the AI model is currently unavailable. Please try again later."

    # Simple RAG: Inject context into the prompt
    prompt_template = f"""Based ONLY on the following conference information:
    {CONFERENCE_CONTEXT}

    Answer the user's question: {query}

    Answer:"""

    # Use the pipeline for generation - Qwen Chat format requires specific message structure
    messages = [
        {"role": "system", "content": f"You are a helpful AI assistant for the TianSuan AI conference. Use only the provided context to answer questions. Context: {CONFERENCE_CONTEXT}"},
        {"role": "user", "content": query}
    ]
    # Note: The 'pipeline' might not directly support the chat format well.
    # It might be better to use model.generate directly with tokenizer.apply_chat_template
    try:
        terminators = [
            tokenizer.eos_token_id,
            tokenizer.convert_tokens_to_ids("<|eot_id|>") # Specific to Qwen2
        ]
        tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(pipe.device) # Move tensors to model device

        outputs = pipe(messages, max_new_tokens=150, eos_token_id=terminators, do_sample=True, temperature=0.7, top_p=0.9)[0] # Adjust generation parameters
        # Extract the generated part. This depends heavily on the specific model's output format.
        # For Qwen chat pipeline, it might be in outputs['generated_text'] which could be a list or dict
        response_data = outputs['generated_text']

        # Find the actual response part (needs careful parsing based on model output)
        # This is a common challenge with pipelines vs direct model.generate
        if isinstance(response_data, list): # Handle potential list output format
            # Look for the assistant's last message
            for msg in reversed(response_data):
                 if msg['role'] == 'assistant':
                      response = msg['content']
                      break
            else: # If no assistant message found (shouldn't happen with add_generation_prompt=True)
                 response = "Sorry, I couldn't generate a response based on the format."
        elif isinstance(response_data, str): # Handle simple string output
            # Might need to split based on prompt or look for assistant markers if the pipeline adds them
            response = response_data.split("Answer:")[-1].strip() # Basic attempt
        else:
             response = str(response_data) # Fallback

    except Exception as e:
        print(f"Error during generation: {e}")
        response = f"Sorry, an error occurred while generating the response: {e}"


    chat_history.append((query, response))
    return "", chat_history # Clear input box, update history

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # TianSuan AI Conference Assistant - Demo
        Ask questions about the **2024 TianSuan AI National Annual Conference** (based on limited demo data).
        *This is a conceptual demonstration using public AI models.*
        [Visit GitHub Repo for Full Concept](https://github.com/YOUR_GITHUB_USERNAME/tian-suan-ai-conference-assistant-showcase) <!-- Replace with your actual GitHub link -->
        """
    )
    chatbot = gr.Chatbot(label="AI Assistant Chat", height=500)
    msg = gr.Textbox(label="Your Question", placeholder="e.g., When is the Gala Dinner?")
    clear = gr.Button("Clear Chat")

    msg.submit(ask_ai_assistant, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch(debug=True) # Debug=True for local testing