Spaces:

Illia56
/

RAG-wine

Runtime error

App Files Files Community

Illia56 commited on Feb 11

Commit

35e1d56

verified ·

1 Parent(s): 8e964af

Upload 3 files

Browse files

Files changed (3) hide show

main.py +81 -0
merged_output_compressed.bz2 +3 -0
requirements.txt +6 -0

main.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import pandas as pd
+import gradio as gr
+from langchain import LLMChain
+from langchain.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+from rapidfuzz import process, fuzz
+import json
+from dotenv import load_dotenv
+load_dotenv()
+import os
+# Load the data
+df = pd.read_csv('merged_output_compressed.bz2', compression='bz2', low_memory=False)
+# Create normalized names array and index map
+wine_names_arr = df['name_of_wine'].str.strip().values
+name_to_index = {name: idx for idx, name in enumerate(wine_names_arr)}
+# Set up prompt to extract only the English name of the wine
+prompt = ChatPromptTemplate.from_template(
+    "Provide only the native name of the wine in Latin characters from this user query: {query}"
+)
+model = ChatOpenAI(model="gpt-4o-mini")
+# Create processing chain using LLMChain
+name_extraction_chain = LLMChain(prompt=prompt, llm=model)
+def find_wine_info_fast(query, secret_phrase):
+    # Validate the secret phrase
+    if secret_phrase != os.getenv("SECRET"):
+        return {"name": "Invalid secret phrase", "rating": None, "rank": -1}
+    extracted = name_extraction_chain.invoke({"query": query})
+    if 'text' not in extracted:
+        return {"name": "Error in extraction", "rating": None, "rank": -1}
+    query_clean = extracted['text'].strip()
+    matches = process.extract(
+        query_clean,
+        wine_names_arr,
+        scorer=fuzz.token_sort_ratio,
+        limit=5
+    )
+    top_wines = [match[0] for match in matches]
+    verification_prompt = f"""
+    Given the user query "{query}" and the top 5 wine matches {top_wines}, determine the rank (1 to 5) of the best matching wine. If the best match is not within the top 5, return -1. Please respond in JSON format with a key 'rank'.
+    """
+    verification = name_extraction_chain.invoke({"query": verification_prompt})
+    rank_data = json.loads(verification['text'].strip().replace('```json','').replace('```',''))
+    rank = rank_data.get('rank', -1)
+    if rank == -1 or rank > len(top_wines):
+        return {"name": "No matching wine found", "rating": None, "rank": -1}
+    matched_wine = top_wines[rank - 1]
+    result = df.iloc[name_to_index.get(matched_wine, -1)]
+    if name_to_index.get(matched_wine, -1) == -1:
+        return {"name": "No matching wine found", "rating": None, "rank": -1}
+    return {
+        "name": result['name_of_wine'],
+        "rating": result['rating'],
+    }
+# Gradio interface
+iface = gr.Interface(
+    fn=find_wine_info_fast,
+    inputs=[gr.Textbox(label="Enter Wine Query"), gr.Textbox(label="Enter Secret Phrase")],
+    outputs=gr.JSON(label="Wine Information"),
+    title="Wine Finder",
+    description="Enter a query to find information about a wine."
+)
+iface.launch()

merged_output_compressed.bz2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a90ef9849993c7700354f126702ddb0212bf3277e7f2e7f073b81f21dd91097
+size 9128822

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+langchain
+langchain-openai
+rapidfuzz
+gradio
+pandas
+python-dotenv