import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import torch import os # --- Configuration --- # Option A: Use a public model directly available on HF Hub MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat" # Example: Use a smaller, faster model for demo try: # Try loading with device_map for potential multi-GPU or CPU offload tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="auto") # If device_map causes issues on basic HF infra, load to CPU (slower) or single GPU if available device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Loading model to: {device}") model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16 if device=="cuda" else torch.float32).to(device) # Use bfloat16 on GPU if possible pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=model.device) # Use model.device print(f"Pipeline created on device: {pipe.device}") model_loaded = True except Exception as e: print(f"Error loading model {MODEL_NAME}: {e}") model_loaded = False # Fallback or error message handling needed here # Option B: Load a custom demo model (if you prepared and uploaded one) # MODEL_NAME = "YOUR_HF_USERNAME/your-custom-demo-model" # ... loading logic ... # --- Synthetic/Public Conference Data (Context for RAG) --- # Keep this concise for the demo prompt limit CONFERENCE_CONTEXT = """ **Conference:** 2024 TianSuan AI National Annual Conference - "Intelligent Computing the Future, Charting a New Chapter Together" **Date:** November 15-16, 2024 **Location:** Hangzhou Future Sci-Tech City International Conference Center (Virtual Location for Demo) **Keynote Speaker (Day 1 AM):** Dr. Evelyn Reed (CEO, TianSuan AI), Topic: "Year in Review & Future Strategy" **Tech Talk (Day 1 PM):** Dr. Kenji Tanaka (CTO, TianSuan AI), Topic: "Advances in Generative AI at TianSuan" **Gala Dinner:** November 15th Evening, Grand Ballroom **Check-in:** Starts 8:00 AM, Nov 15th, via AI Assistant App (Face Recognition or QR) **Gift:** Digital coupon delivered via AI Assistant App after conference conclusion. **WiFi:** Network: TianSuanGuest, Password: AIConf2024 **Emergency Contact:** Available via the 'Security' section in the AI Assistant App. """ # --- Chat Function --- def ask_ai_assistant(query, chat_history): if not model_loaded: return "Sorry, the AI model is currently unavailable. Please try again later." # Simple RAG: Inject context into the prompt prompt_template = f"""Based ONLY on the following conference information: {CONFERENCE_CONTEXT} Answer the user's question: {query} Answer:""" # Use the pipeline for generation - Qwen Chat format requires specific message structure messages = [ {"role": "system", "content": f"You are a helpful AI assistant for the TianSuan AI conference. Use only the provided context to answer questions. Context: {CONFERENCE_CONTEXT}"}, {"role": "user", "content": query} ] # Note: The 'pipeline' might not directly support the chat format well. # It might be better to use model.generate directly with tokenizer.apply_chat_template try: terminators = [ tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>") # Specific to Qwen2 ] tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(pipe.device) # Move tensors to model device outputs = pipe(messages, max_new_tokens=150, eos_token_id=terminators, do_sample=True, temperature=0.7, top_p=0.9)[0] # Adjust generation parameters # Extract the generated part. This depends heavily on the specific model's output format. # For Qwen chat pipeline, it might be in outputs['generated_text'] which could be a list or dict response_data = outputs['generated_text'] # Find the actual response part (needs careful parsing based on model output) # This is a common challenge with pipelines vs direct model.generate if isinstance(response_data, list): # Handle potential list output format # Look for the assistant's last message for msg in reversed(response_data): if msg['role'] == 'assistant': response = msg['content'] break else: # If no assistant message found (shouldn't happen with add_generation_prompt=True) response = "Sorry, I couldn't generate a response based on the format." elif isinstance(response_data, str): # Handle simple string output # Might need to split based on prompt or look for assistant markers if the pipeline adds them response = response_data.split("Answer:")[-1].strip() # Basic attempt else: response = str(response_data) # Fallback except Exception as e: print(f"Error during generation: {e}") response = f"Sorry, an error occurred while generating the response: {e}" chat_history.append((query, response)) return "", chat_history # Clear input box, update history # --- Gradio Interface --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown( """ # TianSuan AI Conference Assistant - Demo Ask questions about the **2024 TianSuan AI National Annual Conference** (based on limited demo data). *This is a conceptual demonstration using public AI models.* [Visit GitHub Repo for Full Concept](https://github.com/YOUR_GITHUB_USERNAME/tian-suan-ai-conference-assistant-showcase) """ ) chatbot = gr.Chatbot(label="AI Assistant Chat", height=500) msg = gr.Textbox(label="Your Question", placeholder="e.g., When is the Gala Dinner?") clear = gr.Button("Clear Chat") msg.submit(ask_ai_assistant, [msg, chatbot], [msg, chatbot]) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch(debug=True) # Debug=True for local testing