mirxakamran893's picture
Update app.py
be128ec verified
raw
history blame
3.03 kB
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import gradio as gr
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import requests
import nest_asyncio
import uvicorn
# βœ… Load RAG-related files
with open("texts.json", "r", encoding="utf-8") as f:
texts = json.load(f)
index = faiss.read_index("faiss_index.bin")
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
API_KEY = os.environ.get("OPENROUTER_API_KEY")
MODEL = "qwen/qwq-32b:free"
# βœ… FastAPI app
app = FastAPI()
# βœ… Context Retriever with threshold
def get_context(query, top_k=5, threshold=0.3):
query_vec = embed_model.encode([query])
D, I = index.search(np.array(query_vec), top_k)
# Filter based on similarity distance threshold
results = [(texts[i], d) for i, d in zip(I[0], D[0]) if d >= threshold]
if not results:
return ""
return "\n".join([text for text, _ in results])
# βœ… Chat Function (RAG-aware + fallback)
def chat_fn(message, history):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
context = get_context(message)
if not context.strip():
return "❌ Sorry, I can't answer that."
messages = [
{
"role": "system",
"content": (
"You are a concise and helpful assistant. Use only the context below to answer. "
"Respond clearly and briefly. Avoid unnecessary details. If the context doesn't contain the answer, reply: "
"'Sorry, I can't answer that.'\n\nContext:\n" + context
)
}
]
for user, assistant in history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
payload = {
"model": MODEL,
"messages": messages,
"max_tokens": 200 # Optional: limit response length
}
try:
response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
reply = response.json()["choices"][0]["message"]["content"]
except Exception as e:
reply = f"❌ Error: {e}"
return reply
# βœ… FastAPI POST endpoint
@app.post("/chat")
async def chat_api(request: Request):
body = await request.json()
message = body.get("message")
history = body.get("history", [])
response = chat_fn(message, history)
return JSONResponse(content={"response": response})
# βœ… Gradio Interface
demo = gr.ChatInterface(
fn=chat_fn,
title="CODEX MIRXA KAMRAN",
description="Chat with AI MODEL trained by Mirxa Kamran",
theme="soft"
)
# βœ… Mount Gradio at root
app = gr.mount_gradio_app(app, demo, path="/")
# βœ… For local development
if __name__ == "__main__":
nest_asyncio.apply()
uvicorn.run(app, host="0.0.0.0", port=7860)