File size: 6,216 Bytes
be19360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import os

# --- Configuration ---
# Option A: Use a public model directly available on HF Hub
MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat" # Example: Use a smaller, faster model for demo
try:
    # Try loading with device_map for potential multi-GPU or CPU offload
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="auto")
    # If device_map causes issues on basic HF infra, load to CPU (slower) or single GPU if available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Loading model to: {device}")
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16 if device=="cuda" else torch.float32).to(device) # Use bfloat16 on GPU if possible

    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=model.device) # Use model.device
    print(f"Pipeline created on device: {pipe.device}")
    model_loaded = True
except Exception as e:
    print(f"Error loading model {MODEL_NAME}: {e}")
    model_loaded = False
    # Fallback or error message handling needed here

# Option B: Load a custom demo model (if you prepared and uploaded one)
# MODEL_NAME = "YOUR_HF_USERNAME/your-custom-demo-model"
# ... loading logic ...

# --- Synthetic/Public Conference Data (Context for RAG) ---
# Keep this concise for the demo prompt limit
CONFERENCE_CONTEXT = """
**Conference:** 2024 TianSuan AI National Annual Conference - "Intelligent Computing the Future, Charting a New Chapter Together"
**Date:** November 15-16, 2024
**Location:** Hangzhou Future Sci-Tech City International Conference Center (Virtual Location for Demo)
**Keynote Speaker (Day 1 AM):** Dr. Evelyn Reed (CEO, TianSuan AI), Topic: "Year in Review & Future Strategy"
**Tech Talk (Day 1 PM):** Dr. Kenji Tanaka (CTO, TianSuan AI), Topic: "Advances in Generative AI at TianSuan"
**Gala Dinner:** November 15th Evening, Grand Ballroom
**Check-in:** Starts 8:00 AM, Nov 15th, via AI Assistant App (Face Recognition or QR)
**Gift:** Digital coupon delivered via AI Assistant App after conference conclusion.
**WiFi:** Network: TianSuanGuest, Password: AIConf2024
**Emergency Contact:** Available via the 'Security' section in the AI Assistant App.
"""

# --- Chat Function ---
def ask_ai_assistant(query, chat_history):
    if not model_loaded:
         return "Sorry, the AI model is currently unavailable. Please try again later."

    # Simple RAG: Inject context into the prompt
    prompt_template = f"""Based ONLY on the following conference information:
    {CONFERENCE_CONTEXT}

    Answer the user's question: {query}

    Answer:"""

    # Use the pipeline for generation - Qwen Chat format requires specific message structure
    messages = [
        {"role": "system", "content": f"You are a helpful AI assistant for the TianSuan AI conference. Use only the provided context to answer questions. Context: {CONFERENCE_CONTEXT}"},
        {"role": "user", "content": query}
    ]
    # Note: The 'pipeline' might not directly support the chat format well.
    # It might be better to use model.generate directly with tokenizer.apply_chat_template
    try:
        terminators = [
            tokenizer.eos_token_id,
            tokenizer.convert_tokens_to_ids("<|eot_id|>") # Specific to Qwen2
        ]
        tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(pipe.device) # Move tensors to model device

        outputs = pipe(messages, max_new_tokens=150, eos_token_id=terminators, do_sample=True, temperature=0.7, top_p=0.9)[0] # Adjust generation parameters
        # Extract the generated part. This depends heavily on the specific model's output format.
        # For Qwen chat pipeline, it might be in outputs['generated_text'] which could be a list or dict
        response_data = outputs['generated_text']

        # Find the actual response part (needs careful parsing based on model output)
        # This is a common challenge with pipelines vs direct model.generate
        if isinstance(response_data, list): # Handle potential list output format
            # Look for the assistant's last message
            for msg in reversed(response_data):
                 if msg['role'] == 'assistant':
                      response = msg['content']
                      break
            else: # If no assistant message found (shouldn't happen with add_generation_prompt=True)
                 response = "Sorry, I couldn't generate a response based on the format."
        elif isinstance(response_data, str): # Handle simple string output
            # Might need to split based on prompt or look for assistant markers if the pipeline adds them
            response = response_data.split("Answer:")[-1].strip() # Basic attempt
        else:
             response = str(response_data) # Fallback

    except Exception as e:
        print(f"Error during generation: {e}")
        response = f"Sorry, an error occurred while generating the response: {e}"


    chat_history.append((query, response))
    return "", chat_history # Clear input box, update history

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # TianSuan AI Conference Assistant - Demo
        Ask questions about the **2024 TianSuan AI National Annual Conference** (based on limited demo data).
        *This is a conceptual demonstration using public AI models.*
        [Visit GitHub Repo for Full Concept](https://github.com/YOUR_GITHUB_USERNAME/tian-suan-ai-conference-assistant-showcase) <!-- Replace with your actual GitHub link -->
        """
    )
    chatbot = gr.Chatbot(label="AI Assistant Chat", height=500)
    msg = gr.Textbox(label="Your Question", placeholder="e.g., When is the Gala Dinner?")
    clear = gr.Button("Clear Chat")

    msg.submit(ask_ai_assistant, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch(debug=True) # Debug=True for local testing