Spaces:

Illia56
/

RAG-wine

Runtime error

File size: 2,786 Bytes

35e1d56

import pandas as pd
import gradio as gr
from langchain import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from rapidfuzz import process, fuzz
import json
from dotenv import load_dotenv
load_dotenv()
import os

# Load the data
df = pd.read_csv('merged_output_compressed.bz2', compression='bz2', low_memory=False)

# Create normalized names array and index map
wine_names_arr = df['name_of_wine'].str.strip().values
name_to_index = {name: idx for idx, name in enumerate(wine_names_arr)}

# Set up prompt to extract only the English name of the wine
prompt = ChatPromptTemplate.from_template(
    "Provide only the native name of the wine in Latin characters from this user query: {query}"
)
model = ChatOpenAI(model="gpt-4o-mini")

# Create processing chain using LLMChain
name_extraction_chain = LLMChain(prompt=prompt, llm=model)

def find_wine_info_fast(query, secret_phrase):
    # Validate the secret phrase
    if secret_phrase != os.getenv("SECRET"):
        return {"name": "Invalid secret phrase", "rating": None, "rank": -1}
    
    extracted = name_extraction_chain.invoke({"query": query})
    
    if 'text' not in extracted:
        return {"name": "Error in extraction", "rating": None, "rank": -1}
    
    query_clean = extracted['text'].strip()
    
    matches = process.extract(
        query_clean,
        wine_names_arr,
        scorer=fuzz.token_sort_ratio,
        limit=5
    )
    
    top_wines = [match[0] for match in matches]
    
    verification_prompt = f"""
    Given the user query "{query}" and the top 5 wine matches {top_wines}, determine the rank (1 to 5) of the best matching wine. If the best match is not within the top 5, return -1. Please respond in JSON format with a key 'rank'.
    """
    
    verification = name_extraction_chain.invoke({"query": verification_prompt})

    rank_data = json.loads(verification['text'].strip().replace('```json','').replace('```',''))
    rank = rank_data.get('rank', -1)
    
    if rank == -1 or rank > len(top_wines):
        return {"name": "No matching wine found", "rating": None, "rank": -1}
    
    matched_wine = top_wines[rank - 1]
    result = df.iloc[name_to_index.get(matched_wine, -1)]
    
    if name_to_index.get(matched_wine, -1) == -1:
        return {"name": "No matching wine found", "rating": None, "rank": -1}
    
    return {
        "name": result['name_of_wine'],
        "rating": result['rating'],
    }

# Gradio interface
iface = gr.Interface(
    fn=find_wine_info_fast,
    inputs=[gr.Textbox(label="Enter Wine Query"), gr.Textbox(label="Enter Secret Phrase")],
    outputs=gr.JSON(label="Wine Information"),
    title="Wine Finder",
    description="Enter a query to find information about a wine."
)

iface.launch()