Illia56 commited on
Commit
35e1d56
·
verified ·
1 Parent(s): 8e964af

Upload 3 files

Browse files
Files changed (3) hide show
  1. main.py +81 -0
  2. merged_output_compressed.bz2 +3 -0
  3. requirements.txt +6 -0
main.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ from langchain import LLMChain
4
+ from langchain.prompts import ChatPromptTemplate
5
+ from langchain_openai import ChatOpenAI
6
+ from rapidfuzz import process, fuzz
7
+ import json
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
+ import os
11
+
12
+ # Load the data
13
+ df = pd.read_csv('merged_output_compressed.bz2', compression='bz2', low_memory=False)
14
+
15
+ # Create normalized names array and index map
16
+ wine_names_arr = df['name_of_wine'].str.strip().values
17
+ name_to_index = {name: idx for idx, name in enumerate(wine_names_arr)}
18
+
19
+ # Set up prompt to extract only the English name of the wine
20
+ prompt = ChatPromptTemplate.from_template(
21
+ "Provide only the native name of the wine in Latin characters from this user query: {query}"
22
+ )
23
+ model = ChatOpenAI(model="gpt-4o-mini")
24
+
25
+ # Create processing chain using LLMChain
26
+ name_extraction_chain = LLMChain(prompt=prompt, llm=model)
27
+
28
+ def find_wine_info_fast(query, secret_phrase):
29
+ # Validate the secret phrase
30
+ if secret_phrase != os.getenv("SECRET"):
31
+ return {"name": "Invalid secret phrase", "rating": None, "rank": -1}
32
+
33
+ extracted = name_extraction_chain.invoke({"query": query})
34
+
35
+ if 'text' not in extracted:
36
+ return {"name": "Error in extraction", "rating": None, "rank": -1}
37
+
38
+ query_clean = extracted['text'].strip()
39
+
40
+ matches = process.extract(
41
+ query_clean,
42
+ wine_names_arr,
43
+ scorer=fuzz.token_sort_ratio,
44
+ limit=5
45
+ )
46
+
47
+ top_wines = [match[0] for match in matches]
48
+
49
+ verification_prompt = f"""
50
+ Given the user query "{query}" and the top 5 wine matches {top_wines}, determine the rank (1 to 5) of the best matching wine. If the best match is not within the top 5, return -1. Please respond in JSON format with a key 'rank'.
51
+ """
52
+
53
+ verification = name_extraction_chain.invoke({"query": verification_prompt})
54
+
55
+ rank_data = json.loads(verification['text'].strip().replace('```json','').replace('```',''))
56
+ rank = rank_data.get('rank', -1)
57
+
58
+ if rank == -1 or rank > len(top_wines):
59
+ return {"name": "No matching wine found", "rating": None, "rank": -1}
60
+
61
+ matched_wine = top_wines[rank - 1]
62
+ result = df.iloc[name_to_index.get(matched_wine, -1)]
63
+
64
+ if name_to_index.get(matched_wine, -1) == -1:
65
+ return {"name": "No matching wine found", "rating": None, "rank": -1}
66
+
67
+ return {
68
+ "name": result['name_of_wine'],
69
+ "rating": result['rating'],
70
+ }
71
+
72
+ # Gradio interface
73
+ iface = gr.Interface(
74
+ fn=find_wine_info_fast,
75
+ inputs=[gr.Textbox(label="Enter Wine Query"), gr.Textbox(label="Enter Secret Phrase")],
76
+ outputs=gr.JSON(label="Wine Information"),
77
+ title="Wine Finder",
78
+ description="Enter a query to find information about a wine."
79
+ )
80
+
81
+ iface.launch()
merged_output_compressed.bz2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a90ef9849993c7700354f126702ddb0212bf3277e7f2e7f073b81f21dd91097
3
+ size 9128822
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-openai
3
+ rapidfuzz
4
+ gradio
5
+ pandas
6
+ python-dotenv