Spaces:

jinv2
/

tian-suan-ai-conference-assistant-demo

Sleeping

App Files Files Community

tian-suan-ai-conference-assistant-demo / app.py

jinv2

Create app.py

be19360 verified 5 months ago

raw

history blame contribute delete

6.22 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	import torch
	import os

	# --- Configuration ---
	# Option A: Use a public model directly available on HF Hub
	MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat" # Example: Use a smaller, faster model for demo
	try:
	# Try loading with device_map for potential multi-GPU or CPU offload
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	# model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="auto")
	# If device_map causes issues on basic HF infra, load to CPU (slower) or single GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Loading model to: {device}")
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16 if device=="cuda" else torch.float32).to(device) # Use bfloat16 on GPU if possible

	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=model.device) # Use model.device
	print(f"Pipeline created on device: {pipe.device}")
	model_loaded = True
	except Exception as e:
	print(f"Error loading model {MODEL_NAME}: {e}")
	model_loaded = False
	# Fallback or error message handling needed here

	# Option B: Load a custom demo model (if you prepared and uploaded one)
	# MODEL_NAME = "YOUR_HF_USERNAME/your-custom-demo-model"
	# ... loading logic ...

	# --- Synthetic/Public Conference Data (Context for RAG) ---
	# Keep this concise for the demo prompt limit
	CONFERENCE_CONTEXT = """
	Conference: 2024 TianSuan AI National Annual Conference - "Intelligent Computing the Future, Charting a New Chapter Together"
	Date: November 15-16, 2024
	Location: Hangzhou Future Sci-Tech City International Conference Center (Virtual Location for Demo)
	Keynote Speaker (Day 1 AM): Dr. Evelyn Reed (CEO, TianSuan AI), Topic: "Year in Review & Future Strategy"
	Tech Talk (Day 1 PM): Dr. Kenji Tanaka (CTO, TianSuan AI), Topic: "Advances in Generative AI at TianSuan"
	Gala Dinner: November 15th Evening, Grand Ballroom
	Check-in: Starts 8:00 AM, Nov 15th, via AI Assistant App (Face Recognition or QR)
	Gift: Digital coupon delivered via AI Assistant App after conference conclusion.
	WiFi: Network: TianSuanGuest, Password: AIConf2024
	Emergency Contact: Available via the 'Security' section in the AI Assistant App.
	"""

	# --- Chat Function ---
	def ask_ai_assistant(query, chat_history):
	if not model_loaded:
	return "Sorry, the AI model is currently unavailable. Please try again later."

	# Simple RAG: Inject context into the prompt
	prompt_template = f"""Based ONLY on the following conference information:
	{CONFERENCE_CONTEXT}

	Answer the user's question: {query}

	Answer:"""

	# Use the pipeline for generation - Qwen Chat format requires specific message structure
	messages = [
	{"role": "system", "content": f"You are a helpful AI assistant for the TianSuan AI conference. Use only the provided context to answer questions. Context: {CONFERENCE_CONTEXT}"},
	{"role": "user", "content": query}
	]
	# Note: The 'pipeline' might not directly support the chat format well.
	# It might be better to use model.generate directly with tokenizer.apply_chat_template
	try:
	terminators = [
	tokenizer.eos_token_id,
	tokenizer.convert_tokens_to_ids("<\|eot_id\|>") # Specific to Qwen2
	]
	tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(pipe.device) # Move tensors to model device

	outputs = pipe(messages, max_new_tokens=150, eos_token_id=terminators, do_sample=True, temperature=0.7, top_p=0.9)[0] # Adjust generation parameters
	# Extract the generated part. This depends heavily on the specific model's output format.
	# For Qwen chat pipeline, it might be in outputs['generated_text'] which could be a list or dict
	response_data = outputs['generated_text']

	# Find the actual response part (needs careful parsing based on model output)
	# This is a common challenge with pipelines vs direct model.generate
	if isinstance(response_data, list): # Handle potential list output format
	# Look for the assistant's last message
	for msg in reversed(response_data):
	if msg['role'] == 'assistant':
	response = msg['content']
	break
	else: # If no assistant message found (shouldn't happen with add_generation_prompt=True)
	response = "Sorry, I couldn't generate a response based on the format."
	elif isinstance(response_data, str): # Handle simple string output
	# Might need to split based on prompt or look for assistant markers if the pipeline adds them
	response = response_data.split("Answer:")[-1].strip() # Basic attempt
	else:
	response = str(response_data) # Fallback

	except Exception as e:
	print(f"Error during generation: {e}")
	response = f"Sorry, an error occurred while generating the response: {e}"


	chat_history.append((query, response))
	return "", chat_history # Clear input box, update history

	# --- Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# TianSuan AI Conference Assistant - Demo
	Ask questions about the 2024 TianSuan AI National Annual Conference (based on limited demo data).
	This is a conceptual demonstration using public AI models.
	[Visit GitHub Repo for Full Concept](https://github.com/YOUR_GITHUB_USERNAME/tian-suan-ai-conference-assistant-showcase) <!-- Replace with your actual GitHub link -->
	"""
	)
	chatbot = gr.Chatbot(label="AI Assistant Chat", height=500)
	msg = gr.Textbox(label="Your Question", placeholder="e.g., When is the Gala Dinner?")
	clear = gr.Button("Clear Chat")

	msg.submit(ask_ai_assistant, [msg, chatbot], [msg, chatbot])
	clear.click(lambda: None, None, chatbot, queue=False)

	if __name__ == "__main__":
	demo.launch(debug=True) # Debug=True for local testing