Spaces:

Illia56
/

RAG-wine

Runtime error

App Files Files Community

RAG-wine / app.py

Illia56

Rename main.py to app.py

b8f87da verified 3 months ago

raw

history blame contribute delete

2.79 kB

	import pandas as pd
	import gradio as gr
	from langchain import LLMChain
	from langchain.prompts import ChatPromptTemplate
	from langchain_openai import ChatOpenAI
	from rapidfuzz import process, fuzz
	import json
	from dotenv import load_dotenv
	load_dotenv()
	import os

	# Load the data
	df = pd.read_csv('merged_output_compressed.bz2', compression='bz2', low_memory=False)

	# Create normalized names array and index map
	wine_names_arr = df['name_of_wine'].str.strip().values
	name_to_index = {name: idx for idx, name in enumerate(wine_names_arr)}

	# Set up prompt to extract only the English name of the wine
	prompt = ChatPromptTemplate.from_template(
	"Provide only the native name of the wine in Latin characters from this user query: {query}"
	)
	model = ChatOpenAI(model="gpt-4o-mini")

	# Create processing chain using LLMChain
	name_extraction_chain = LLMChain(prompt=prompt, llm=model)

	def find_wine_info_fast(query, secret_phrase):
	# Validate the secret phrase
	if secret_phrase != os.getenv("SECRET"):
	return {"name": "Invalid secret phrase", "rating": None, "rank": -1}

	extracted = name_extraction_chain.invoke({"query": query})

	if 'text' not in extracted:
	return {"name": "Error in extraction", "rating": None, "rank": -1}

	query_clean = extracted['text'].strip()

	matches = process.extract(
	query_clean,
	wine_names_arr,
	scorer=fuzz.token_sort_ratio,
	limit=5
	)

	top_wines = [match[0] for match in matches]

	verification_prompt = f"""
	Given the user query "{query}" and the top 5 wine matches {top_wines}, determine the rank (1 to 5) of the best matching wine. If the best match is not within the top 5, return -1. Please respond in JSON format with a key 'rank'.
	"""

	verification = name_extraction_chain.invoke({"query": verification_prompt})

	rank_data = json.loads(verification['text'].strip().replace('```json','').replace('```',''))
	rank = rank_data.get('rank', -1)

	if rank == -1 or rank > len(top_wines):
	return {"name": "No matching wine found", "rating": None, "rank": -1}

	matched_wine = top_wines[rank - 1]
	result = df.iloc[name_to_index.get(matched_wine, -1)]

	if name_to_index.get(matched_wine, -1) == -1:
	return {"name": "No matching wine found", "rating": None, "rank": -1}

	return {
	"name": result['name_of_wine'],
	"rating": result['rating'],
	}

	# Gradio interface
	iface = gr.Interface(
	fn=find_wine_info_fast,
	inputs=[gr.Textbox(label="Enter Wine Query"), gr.Textbox(label="Enter Secret Phrase")],
	outputs=gr.JSON(label="Wine Information"),
	title="Wine Finder",
	description="Enter a query to find information about a wine."
	)

	iface.launch()