Upload 3 files
Browse files- main.py +81 -0
- merged_output_compressed.bz2 +3 -0
- requirements.txt +6 -0
main.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import gradio as gr
|
3 |
+
from langchain import LLMChain
|
4 |
+
from langchain.prompts import ChatPromptTemplate
|
5 |
+
from langchain_openai import ChatOpenAI
|
6 |
+
from rapidfuzz import process, fuzz
|
7 |
+
import json
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
load_dotenv()
|
10 |
+
import os
|
11 |
+
|
12 |
+
# Load the data
|
13 |
+
df = pd.read_csv('merged_output_compressed.bz2', compression='bz2', low_memory=False)
|
14 |
+
|
15 |
+
# Create normalized names array and index map
|
16 |
+
wine_names_arr = df['name_of_wine'].str.strip().values
|
17 |
+
name_to_index = {name: idx for idx, name in enumerate(wine_names_arr)}
|
18 |
+
|
19 |
+
# Set up prompt to extract only the English name of the wine
|
20 |
+
prompt = ChatPromptTemplate.from_template(
|
21 |
+
"Provide only the native name of the wine in Latin characters from this user query: {query}"
|
22 |
+
)
|
23 |
+
model = ChatOpenAI(model="gpt-4o-mini")
|
24 |
+
|
25 |
+
# Create processing chain using LLMChain
|
26 |
+
name_extraction_chain = LLMChain(prompt=prompt, llm=model)
|
27 |
+
|
28 |
+
def find_wine_info_fast(query, secret_phrase):
|
29 |
+
# Validate the secret phrase
|
30 |
+
if secret_phrase != os.getenv("SECRET"):
|
31 |
+
return {"name": "Invalid secret phrase", "rating": None, "rank": -1}
|
32 |
+
|
33 |
+
extracted = name_extraction_chain.invoke({"query": query})
|
34 |
+
|
35 |
+
if 'text' not in extracted:
|
36 |
+
return {"name": "Error in extraction", "rating": None, "rank": -1}
|
37 |
+
|
38 |
+
query_clean = extracted['text'].strip()
|
39 |
+
|
40 |
+
matches = process.extract(
|
41 |
+
query_clean,
|
42 |
+
wine_names_arr,
|
43 |
+
scorer=fuzz.token_sort_ratio,
|
44 |
+
limit=5
|
45 |
+
)
|
46 |
+
|
47 |
+
top_wines = [match[0] for match in matches]
|
48 |
+
|
49 |
+
verification_prompt = f"""
|
50 |
+
Given the user query "{query}" and the top 5 wine matches {top_wines}, determine the rank (1 to 5) of the best matching wine. If the best match is not within the top 5, return -1. Please respond in JSON format with a key 'rank'.
|
51 |
+
"""
|
52 |
+
|
53 |
+
verification = name_extraction_chain.invoke({"query": verification_prompt})
|
54 |
+
|
55 |
+
rank_data = json.loads(verification['text'].strip().replace('```json','').replace('```',''))
|
56 |
+
rank = rank_data.get('rank', -1)
|
57 |
+
|
58 |
+
if rank == -1 or rank > len(top_wines):
|
59 |
+
return {"name": "No matching wine found", "rating": None, "rank": -1}
|
60 |
+
|
61 |
+
matched_wine = top_wines[rank - 1]
|
62 |
+
result = df.iloc[name_to_index.get(matched_wine, -1)]
|
63 |
+
|
64 |
+
if name_to_index.get(matched_wine, -1) == -1:
|
65 |
+
return {"name": "No matching wine found", "rating": None, "rank": -1}
|
66 |
+
|
67 |
+
return {
|
68 |
+
"name": result['name_of_wine'],
|
69 |
+
"rating": result['rating'],
|
70 |
+
}
|
71 |
+
|
72 |
+
# Gradio interface
|
73 |
+
iface = gr.Interface(
|
74 |
+
fn=find_wine_info_fast,
|
75 |
+
inputs=[gr.Textbox(label="Enter Wine Query"), gr.Textbox(label="Enter Secret Phrase")],
|
76 |
+
outputs=gr.JSON(label="Wine Information"),
|
77 |
+
title="Wine Finder",
|
78 |
+
description="Enter a query to find information about a wine."
|
79 |
+
)
|
80 |
+
|
81 |
+
iface.launch()
|
merged_output_compressed.bz2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a90ef9849993c7700354f126702ddb0212bf3277e7f2e7f073b81f21dd91097
|
3 |
+
size 9128822
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
langchain-openai
|
3 |
+
rapidfuzz
|
4 |
+
gradio
|
5 |
+
pandas
|
6 |
+
python-dotenv
|