Reubencf's picture
Update app.py
f98a7e0 verified
# app.py β€” Simplified for Hugging Face Spaces
# ---------------------------------------------------------------
# This version uses the high-level `pipeline` from transformers
# for a much simpler and cleaner implementation.
# ---------------------------------------------------------------
import os
import torch
import gradio as gr
from transformers import pipeline
# ── Configuration ──────────────────────────────────────────────────────────────
# Set the model repository ID
MODEL_ID = "Reubencf/gemma3-goan-finetuned"
HF_TOKEN = os.getenv("HF_TOKEN") # Optional: for private models
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
TITLE = "🌴 Gemma Goan Q&A Bot"
DESCRIPTION = (
"This is a simple Gradio chat interface for the Gemma model fine-tuned on a Goan Q&A dataset.\n"
"Ask about Goa, Konkani culture, or general topics!"
)
# ── Load Model Pipeline ─────────────────────────────────────────────────────
# We load the model and tokenizer into a pipeline object.
# This is done only once when the app starts.
# `device_map="auto"` ensures the model is placed on a GPU if available.
print(f"[Init] Loading model pipeline: {MODEL_ID} on {DEVICE}...")
try:
pipe = pipeline(
"text-generation",
model=MODEL_ID,
torch_dtype=torch.bfloat16, # Use bfloat16 for better performance
device_map="auto",
token=HF_TOKEN,
)
MODEL_LOADED = True
print("[Init] Model pipeline loaded successfully.")
except Exception as e:
MODEL_LOADED = False
DESCRIPTION = f"❌ Model failed to load: {e}"
print(f"[Fatal] Could not load model: {e}")
# ── Generation Function ──────────────────────────────────────────────────────
def generate_response(message, history):
"""
This function is called for each user message.
It takes the user's message and the conversation history,
formats them for the model, and returns the model's response.
"""
if not MODEL_LOADED:
return "⚠️ Model is not available. Please check the Space logs for errors."
# Format the conversation history into the format expected by the model
# The model expects a list of dictionaries with "role" and "content" keys
conversation = []
for user_msg, assistant_msg in history:
conversation.append({"role": "user", "content": user_msg})
if assistant_msg:
conversation.append({"role": "assistant", "content": assistant_msg})
# Add the current user's message
conversation.append({"role": "user", "content": message})
# Use the pipeline's tokenizer to apply the chat template
# This correctly formats the input for the conversational model
prompt = pipe.tokenizer.apply_chat_template(
conversation,
tokenize=False,
add_generation_prompt=True
)
# Generate the response using the pipeline
outputs = pipe(
prompt,
do_sample=True,
temperature=0.7,
top_k=50,
top_p=0.95
)
# The pipeline output includes the entire conversation history (prompt).
# We need to extract only the newly generated text from the assistant.
response = outputs[0]["generated_text"]
# Slice the response to get only the new part
new_response = response[len(prompt):].strip()
return new_response
# ── UI ────────────────────────────────────────────────────────────────────────
# Define some example questions to display in the UI
examples = [
"What is bebinca?",
"Tell me about the history of Feni.",
"Suggest a good, quiet beach in South Goa.",
"Describe Goan fish curry.",
]
# Create the Gradio ChatInterface
demo = gr.ChatInterface(
fn=generate_response,
title=TITLE,
description=DESCRIPTION,
examples=examples,
theme="soft",
)
# ── Launch ────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
print("πŸš€ Starting Gradio app...")
demo.launch()