import pandas as pd import gradio as gr from langchain import LLMChain from langchain.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI from rapidfuzz import process, fuzz import json from dotenv import load_dotenv load_dotenv() import os # Load the data df = pd.read_csv('merged_output_compressed.bz2', compression='bz2', low_memory=False) # Create normalized names array and index map wine_names_arr = df['name_of_wine'].str.strip().values name_to_index = {name: idx for idx, name in enumerate(wine_names_arr)} # Set up prompt to extract only the English name of the wine prompt = ChatPromptTemplate.from_template( "Provide only the native name of the wine in Latin characters from this user query: {query}" ) model = ChatOpenAI(model="gpt-4o-mini") # Create processing chain using LLMChain name_extraction_chain = LLMChain(prompt=prompt, llm=model) def find_wine_info_fast(query, secret_phrase): # Validate the secret phrase if secret_phrase != os.getenv("SECRET"): return {"name": "Invalid secret phrase", "rating": None, "rank": -1} extracted = name_extraction_chain.invoke({"query": query}) if 'text' not in extracted: return {"name": "Error in extraction", "rating": None, "rank": -1} query_clean = extracted['text'].strip() matches = process.extract( query_clean, wine_names_arr, scorer=fuzz.token_sort_ratio, limit=5 ) top_wines = [match[0] for match in matches] verification_prompt = f""" Given the user query "{query}" and the top 5 wine matches {top_wines}, determine the rank (1 to 5) of the best matching wine. If the best match is not within the top 5, return -1. Please respond in JSON format with a key 'rank'. """ verification = name_extraction_chain.invoke({"query": verification_prompt}) rank_data = json.loads(verification['text'].strip().replace('```json','').replace('```','')) rank = rank_data.get('rank', -1) if rank == -1 or rank > len(top_wines): return {"name": "No matching wine found", "rating": None, "rank": -1} matched_wine = top_wines[rank - 1] result = df.iloc[name_to_index.get(matched_wine, -1)] if name_to_index.get(matched_wine, -1) == -1: return {"name": "No matching wine found", "rating": None, "rank": -1} return { "name": result['name_of_wine'], "rating": result['rating'], } # Gradio interface iface = gr.Interface( fn=find_wine_info_fast, inputs=[gr.Textbox(label="Enter Wine Query"), gr.Textbox(label="Enter Secret Phrase")], outputs=gr.JSON(label="Wine Information"), title="Wine Finder", description="Enter a query to find information about a wine." ) iface.launch()