jinv2's picture
Create app.py
be19360 verified
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import os
# --- Configuration ---
# Option A: Use a public model directly available on HF Hub
MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat" # Example: Use a smaller, faster model for demo
try:
# Try loading with device_map for potential multi-GPU or CPU offload
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="auto")
# If device_map causes issues on basic HF infra, load to CPU (slower) or single GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model to: {device}")
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16 if device=="cuda" else torch.float32).to(device) # Use bfloat16 on GPU if possible
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=model.device) # Use model.device
print(f"Pipeline created on device: {pipe.device}")
model_loaded = True
except Exception as e:
print(f"Error loading model {MODEL_NAME}: {e}")
model_loaded = False
# Fallback or error message handling needed here
# Option B: Load a custom demo model (if you prepared and uploaded one)
# MODEL_NAME = "YOUR_HF_USERNAME/your-custom-demo-model"
# ... loading logic ...
# --- Synthetic/Public Conference Data (Context for RAG) ---
# Keep this concise for the demo prompt limit
CONFERENCE_CONTEXT = """
**Conference:** 2024 TianSuan AI National Annual Conference - "Intelligent Computing the Future, Charting a New Chapter Together"
**Date:** November 15-16, 2024
**Location:** Hangzhou Future Sci-Tech City International Conference Center (Virtual Location for Demo)
**Keynote Speaker (Day 1 AM):** Dr. Evelyn Reed (CEO, TianSuan AI), Topic: "Year in Review & Future Strategy"
**Tech Talk (Day 1 PM):** Dr. Kenji Tanaka (CTO, TianSuan AI), Topic: "Advances in Generative AI at TianSuan"
**Gala Dinner:** November 15th Evening, Grand Ballroom
**Check-in:** Starts 8:00 AM, Nov 15th, via AI Assistant App (Face Recognition or QR)
**Gift:** Digital coupon delivered via AI Assistant App after conference conclusion.
**WiFi:** Network: TianSuanGuest, Password: AIConf2024
**Emergency Contact:** Available via the 'Security' section in the AI Assistant App.
"""
# --- Chat Function ---
def ask_ai_assistant(query, chat_history):
if not model_loaded:
return "Sorry, the AI model is currently unavailable. Please try again later."
# Simple RAG: Inject context into the prompt
prompt_template = f"""Based ONLY on the following conference information:
{CONFERENCE_CONTEXT}
Answer the user's question: {query}
Answer:"""
# Use the pipeline for generation - Qwen Chat format requires specific message structure
messages = [
{"role": "system", "content": f"You are a helpful AI assistant for the TianSuan AI conference. Use only the provided context to answer questions. Context: {CONFERENCE_CONTEXT}"},
{"role": "user", "content": query}
]
# Note: The 'pipeline' might not directly support the chat format well.
# It might be better to use model.generate directly with tokenizer.apply_chat_template
try:
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Specific to Qwen2
]
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(pipe.device) # Move tensors to model device
outputs = pipe(messages, max_new_tokens=150, eos_token_id=terminators, do_sample=True, temperature=0.7, top_p=0.9)[0] # Adjust generation parameters
# Extract the generated part. This depends heavily on the specific model's output format.
# For Qwen chat pipeline, it might be in outputs['generated_text'] which could be a list or dict
response_data = outputs['generated_text']
# Find the actual response part (needs careful parsing based on model output)
# This is a common challenge with pipelines vs direct model.generate
if isinstance(response_data, list): # Handle potential list output format
# Look for the assistant's last message
for msg in reversed(response_data):
if msg['role'] == 'assistant':
response = msg['content']
break
else: # If no assistant message found (shouldn't happen with add_generation_prompt=True)
response = "Sorry, I couldn't generate a response based on the format."
elif isinstance(response_data, str): # Handle simple string output
# Might need to split based on prompt or look for assistant markers if the pipeline adds them
response = response_data.split("Answer:")[-1].strip() # Basic attempt
else:
response = str(response_data) # Fallback
except Exception as e:
print(f"Error during generation: {e}")
response = f"Sorry, an error occurred while generating the response: {e}"
chat_history.append((query, response))
return "", chat_history # Clear input box, update history
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# TianSuan AI Conference Assistant - Demo
Ask questions about the **2024 TianSuan AI National Annual Conference** (based on limited demo data).
*This is a conceptual demonstration using public AI models.*
[Visit GitHub Repo for Full Concept](https://github.com/YOUR_GITHUB_USERNAME/tian-suan-ai-conference-assistant-showcase) <!-- Replace with your actual GitHub link -->
"""
)
chatbot = gr.Chatbot(label="AI Assistant Chat", height=500)
msg = gr.Textbox(label="Your Question", placeholder="e.g., When is the Gala Dinner?")
clear = gr.Button("Clear Chat")
msg.submit(ask_ai_assistant, [msg, chatbot], [msg, chatbot])
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.launch(debug=True) # Debug=True for local testing