RAG-wine / app.py
Illia56's picture
Rename main.py to app.py
b8f87da verified
raw
history blame
2.79 kB
import pandas as pd
import gradio as gr
from langchain import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from rapidfuzz import process, fuzz
import json
from dotenv import load_dotenv
load_dotenv()
import os
# Load the data
df = pd.read_csv('merged_output_compressed.bz2', compression='bz2', low_memory=False)
# Create normalized names array and index map
wine_names_arr = df['name_of_wine'].str.strip().values
name_to_index = {name: idx for idx, name in enumerate(wine_names_arr)}
# Set up prompt to extract only the English name of the wine
prompt = ChatPromptTemplate.from_template(
"Provide only the native name of the wine in Latin characters from this user query: {query}"
)
model = ChatOpenAI(model="gpt-4o-mini")
# Create processing chain using LLMChain
name_extraction_chain = LLMChain(prompt=prompt, llm=model)
def find_wine_info_fast(query, secret_phrase):
# Validate the secret phrase
if secret_phrase != os.getenv("SECRET"):
return {"name": "Invalid secret phrase", "rating": None, "rank": -1}
extracted = name_extraction_chain.invoke({"query": query})
if 'text' not in extracted:
return {"name": "Error in extraction", "rating": None, "rank": -1}
query_clean = extracted['text'].strip()
matches = process.extract(
query_clean,
wine_names_arr,
scorer=fuzz.token_sort_ratio,
limit=5
)
top_wines = [match[0] for match in matches]
verification_prompt = f"""
Given the user query "{query}" and the top 5 wine matches {top_wines}, determine the rank (1 to 5) of the best matching wine. If the best match is not within the top 5, return -1. Please respond in JSON format with a key 'rank'.
"""
verification = name_extraction_chain.invoke({"query": verification_prompt})
rank_data = json.loads(verification['text'].strip().replace('```json','').replace('```',''))
rank = rank_data.get('rank', -1)
if rank == -1 or rank > len(top_wines):
return {"name": "No matching wine found", "rating": None, "rank": -1}
matched_wine = top_wines[rank - 1]
result = df.iloc[name_to_index.get(matched_wine, -1)]
if name_to_index.get(matched_wine, -1) == -1:
return {"name": "No matching wine found", "rating": None, "rank": -1}
return {
"name": result['name_of_wine'],
"rating": result['rating'],
}
# Gradio interface
iface = gr.Interface(
fn=find_wine_info_fast,
inputs=[gr.Textbox(label="Enter Wine Query"), gr.Textbox(label="Enter Secret Phrase")],
outputs=gr.JSON(label="Wine Information"),
title="Wine Finder",
description="Enter a query to find information about a wine."
)
iface.launch()