Spaces:

dnzblgn
/

RAG_for_customer_reviews

Sleeping

App Files Files Community

RAG_for_customer_reviews / app.py

dnzblgn

Update app.py

9d8f95d verified 3 months ago

raw

history blame contribute delete

9.48 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain.chains import ConversationalRetrievalChain
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.memory import ConversationBufferMemory
	from langchain_community.llms import HuggingFaceEndpoint
	import os
	import time

	# Load models and tokenizers
	sentiment_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Sentiment-Analysis-Customer-Reviews")
	sentiment_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Sentiment-Analysis-Customer-Reviews")

	sarcasm_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Sarcasm-Detection-Customer-Reviews")
	sarcasm_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Sarcasm-Detection-Customer-Reviews")

	doc_tokenizer = AutoTokenizer.from_pretrained("dnzblgn/Customer-Reviews-Classification")
	doc_model = AutoModelForSequenceClassification.from_pretrained("dnzblgn/Customer-Reviews-Classification")

	label_mapping = {
	"shipping_and_delivery": 0,
	"customer_service": 1,
	"price_and_value": 2,
	"quality_and_performance": 3,
	"use_and_design": 4,
	"other": 5
	}
	reverse_label_mapping = {v: k for k, v in label_mapping.items()}

	def analyze_reviews(reviews):
	analysis = {
	"overall": {"positive": 0, "negative": 0},
	"categories": {label: {"positive": 0, "negative": 0} for label in label_mapping.keys()}
	}

	for review in reviews:
	sentiment_inputs = sentiment_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
	with torch.no_grad():
	sentiment_outputs = sentiment_model(**sentiment_inputs)
	sentiment_class = torch.argmax(sentiment_outputs.logits, dim=-1).item()
	sentiment = "positive" if sentiment_class == 0 else "negative"

	if sentiment == "positive":
	sarcasm_inputs = sarcasm_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
	with torch.no_grad():
	sarcasm_outputs = sarcasm_model(**sarcasm_inputs)
	sarcasm_class = torch.argmax(sarcasm_outputs.logits, dim=-1).item()
	if sarcasm_class == 1:
	sentiment = "negative"

	doc_inputs = doc_tokenizer(review, return_tensors="pt", truncation=True, padding=True, max_length=512)
	with torch.no_grad():
	doc_outputs = doc_model(**doc_inputs)
	category_class = torch.argmax(doc_outputs.logits, dim=-1).item()
	category = reverse_label_mapping[category_class]

	analysis["overall"][sentiment] += 1
	analysis["categories"][category][sentiment] += 1

	return analysis

	def generate_analysis_document(analysis):
	total_reviews = analysis["overall"]["positive"] + analysis["overall"]["negative"]
	overall_positive = analysis["overall"]["positive"]
	overall_negative = analysis["overall"]["negative"]

	doc = [
	f"Overall Sentiment Analysis:",
	f"Positive Feedback: {overall_positive} comments ({(overall_positive / total_reviews) * 100:.0f}%)",
	f"Negative Feedback: {overall_negative} comments ({(overall_negative / total_reviews) * 100:.0f}%)",
	"--END--",
	"Category-Specific Analysis:",
	"--END--"
	]

	for category, feedback in analysis["categories"].items():
	total_category = feedback["positive"] + feedback["negative"]
	positive_rate = (feedback["positive"] / total_category) * 100 if total_category > 0 else 0
	negative_rate = (feedback["negative"] / total_category) * 100 if total_category > 0 else 0
	doc.extend([
	f"{category.capitalize()}:",
	f"- Positive Feedback: {feedback['positive']} comments ({positive_rate:.0f}%)",
	f"- Negative Feedback: {feedback['negative']} comments ({negative_rate:.0f}%)",
	"--END--"
	])

	return "\n".join(doc)

	def write_analysis_to_file(analysis_document):
	with open("processed_analysis.txt", "w") as f:
	f.write(analysis_document)
	return "processed_analysis.txt"

	def read_processed_file():
	with open("processed_analysis.txt", "r") as f:
	return f.read()

	def create_db_from_analysis(analysis_document):
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1024, chunk_overlap=64
	)
	splits = text_splitter.create_documents([analysis_document])
	embeddings = HuggingFaceEmbeddings()
	vector_db = FAISS.from_documents(splits, embeddings)
	return vector_db

	def initialize_chatbot(vector_db):
	memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	retriever = vector_db.as_retriever()

	llm = HuggingFaceEndpoint(
	repo_id="mistralai/Mistral-7B-Instruct-v0.2",
	huggingfacehub_api_token = os.environ.get("HUGGINGFACE_API_TOKEN"),
	temperature=0.5,
	max_new_tokens=256
	)

	qa_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=retriever,
	memory=memory,
	verbose=False
	)
	return qa_chain

	def process_and_initialize(file):
	if file is None:
	return None, None, "Please upload a file first."

	try:
	if not os.path.exists(file):
	return None, None, "File not found. Please try uploading again."

	with open(file, 'r', encoding='utf-8') as f:
	reviews = [line.strip() for line in f if line.strip()]

	if not reviews:
	return None, None, "File is empty. Please upload a file with reviews."

	analysis = analyze_reviews(reviews)
	analysis_doc = generate_analysis_document(analysis)

	processed_file = write_analysis_to_file(analysis_doc)
	processed_content = read_processed_file()

	db = create_db_from_analysis(processed_content)
	qa = initialize_chatbot(db)

	return db, qa, f"Successfully processed {len(reviews)} reviews! Ready for questions."

	except Exception as e:
	return None, None, f"Processing error: {str(e)}"

	def user_query_typing_effect(query, qa_chain, chatbot):
	history = chatbot or []
	try:
	response = qa_chain.invoke({"question": query, "chat_history": []})
	assistant_response = response["answer"]

	history.append({"role": "user", "content": query})
	history.append({"role": "assistant", "content": ""})

	for i in range(len(assistant_response)):
	history[-1]["content"] += assistant_response[i]
	yield history, ""
	time.sleep(0.05) # Slower typing effect
	except Exception as e:
	history.append({"role": "assistant", "content": f"Error: {str(e)}"})
	yield history, ""

	def demo():
	custom_css = """
	body {
	background-color: #FF8C00;
	font-family: Arial, sans-serif;
	}
	.gradio-container {
	border-radius: 15px;
	box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.3);
	padding: 20px;
	}
	footer {
	visibility: hidden;
	}
	.chatbot {
	border: 2px solid #000;
	border-radius: 10px;
	background-color: #FFF5E1;
	}
	"""

	with gr.Blocks(css=custom_css) as app:
	vector_db = gr.State(None)
	qa_chain = gr.State(None)

	gr.Markdown("### 🌟 Customer Review Analysis and Chatbot 🌟")
	gr.Markdown("#### Upload your review file and ask questions interactively!")

	with gr.Row():
	with gr.Column(scale=1):
	txt_file = gr.File(
	label="📁 Upload Reviews",
	file_types=[".txt"],
	type="filepath"
	)
	analyze_btn = gr.Button("🚀 Process Reviews")
	status = gr.Textbox(
	label="📊 Status",
	placeholder="Status updates will appear here...",
	interactive=False
	)

	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="🤖 Chat with your data",
	height=600,
	bubble_full_width=False,
	show_label=False,
	render_markdown=True,
	type="messages",
	elem_classes=["chatbot"]
	)
	query_input = gr.Textbox(
	label="Ask a question",
	placeholder="Ask about the reviews...",
	show_label=False,
	container=False
	)
	query_btn = gr.Button("Ask")

	analyze_btn.click(
	fn=process_and_initialize,
	inputs=[txt_file],
	outputs=[vector_db, qa_chain, status],
	show_progress="minimal"
	)

	query_btn.click(
	fn=user_query_typing_effect,
	inputs=[query_input, qa_chain, chatbot],
	outputs=[chatbot, query_input],
	show_progress="minimal"
	)

	query_input.submit(
	fn=user_query_typing_effect,
	inputs=[query_input, qa_chain, chatbot],
	outputs=[chatbot, query_input],
	show_progress="minimal"
	)

	app.launch()

	if __name__ == "__main__":
	demo()