🤖 AI Chat Assistant

Powered by Gemma 270M Model & Hugging Face Transformers

Fast Inference
🔒 100% Browser-Based
🌐 No Server Required

💡 How It Works

This application runs entirely in your browser using WebAssembly and Pyodide. The AI model is loaded directly in your browser for complete privacy and offline capability.

🚀 Technologies Used

Gradio Lite Hugging Face ONNX Runtime WebAssembly

📝 Getting Started

Simply type your message in the chat interface below. The AI will process your request locally and generate a response using the Gemma model.

Loading AI Model...

This may take 10-30 seconds on first load as we download and initialize the model.

transformers_js_py import gradio as gr from transformers_js import import_transformers_js import asyncio # Import transformers.js transformers = await import_transformers_js() pipeline = transformers.pipeline TextStreamer = transformers.TextStreamer # Initialize the model pipeline print("Loading model... This may take a moment.") generator = await pipeline( "text-generation", "onnx-community/gemma-3-270m-it-ONNX", { "dtype": "fp32", } ) # Store conversation history conversation_history = [] async def chat_with_ai(message, history): """Process user message and generate AI response""" if not message: return history # Format the conversation for the model messages = [ {"role": "system", "content": "You are a helpful AI assistant. Provide clear, concise, and informative responses."}, ] # Add conversation history for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) try: # Generate response output = await generator(messages, { "max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_p": 0.9, }) # Extract the assistant's response response = output[0]["generated_text"][-1]["content"] # Update history history.append([message, response]) return history except Exception as e: error_msg = f"Error generating response: {str(e)}" history.append([message, error_msg]) return history def clear_chat(): """Clear the conversation history""" return [] # Create the Gradio interface with gr.Blocks(title="AI Chat Assistant") as demo: gr.Markdown( """ # 🤖 AI Chat Assistant Chat with an AI powered by the Gemma model running entirely in your browser! """ ) chatbot = gr.Chatbot( height=400, placeholder="Start chatting with the AI assistant...", bubble_full_width=False, ) with gr.Row(): msg = gr.Textbox( placeholder="Type your message here and press Enter...", label="Your Message", lines=2, scale=4 ) submit_btn = gr.Button("Send", variant="primary", scale=1) with gr.Row(): clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary") gr.Examples( examples=[ "What is machine learning?", "Write a short poem about technology", "Explain quantum computing in simple terms", "What are the benefits of renewable energy?", "How does the internet work?", ], inputs=msg, label="Example Questions" ) # Set up event handlers msg.submit(chat_with_ai, [msg, chatbot], chatbot) msg.submit(lambda: "", None, msg) submit_btn.click(chat_with_ai, [msg, chatbot], chatbot) submit_btn.click(lambda: "", None, msg) clear_btn.click(clear_chat, None, chatbot) gr.Markdown( """ --- **Note:** This model runs entirely in your browser. No data is sent to any server. Initial loading may take some time as the model is downloaded and initialized. """ ) demo.launch()