Spaces:
Configuration error
Configuration error
Upload 3 files
Browse files- README.md +29 -12
- app.py +139 -0
- requirements.txt +8 -0
README.md
CHANGED
|
@@ -1,12 +1,29 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AI Product Search Agent
|
| 2 |
+
|
| 3 |
+
This is a fully free, deployable AI tool built with Streamlit and LangChain that searches Amazon cellphone accessories using natural language.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- Natural language understanding
|
| 8 |
+
- Vector search (FAISS + sentence-transformers)
|
| 9 |
+
- Price and keyword filtering
|
| 10 |
+
- Conversational agent interface
|
| 11 |
+
- Deployed on Hugging Face Spaces
|
| 12 |
+
|
| 13 |
+
## How to Run
|
| 14 |
+
|
| 15 |
+
### Locally
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
pip install -r requirements.txt
|
| 19 |
+
streamlit run app.py
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
### Hugging Face Spaces
|
| 23 |
+
|
| 24 |
+
1. Create a new Space (Streamlit)
|
| 25 |
+
2. Upload all files
|
| 26 |
+
3. Add your `secrets.toml` with an OpenRouter or OpenAI API key
|
| 27 |
+
4. Deploy!
|
| 28 |
+
|
| 29 |
+
Enjoy your free AI agent!
|
app.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import faiss
|
| 5 |
+
import re
|
| 6 |
+
import ast
|
| 7 |
+
import os
|
| 8 |
+
import urllib.request
|
| 9 |
+
|
| 10 |
+
from sentence_transformers import SentenceTransformer
|
| 11 |
+
from sentence_transformers.util import cos_sim
|
| 12 |
+
from langchain.chat_models import ChatOpenAI
|
| 13 |
+
from langchain.agents import initialize_agent, AgentType, tool
|
| 14 |
+
from streamlit_chat import message
|
| 15 |
+
|
| 16 |
+
# ---------------------------
|
| 17 |
+
# Configuration
|
| 18 |
+
# ---------------------------
|
| 19 |
+
st.set_page_config(page_title="📱 AI Product Search Agent", layout="wide")
|
| 20 |
+
|
| 21 |
+
# ---------------------------
|
| 22 |
+
# Load model
|
| 23 |
+
# ---------------------------
|
| 24 |
+
@st.cache_resource
|
| 25 |
+
def load_model():
|
| 26 |
+
return SentenceTransformer("all-MiniLM-L6-v2")
|
| 27 |
+
|
| 28 |
+
# ---------------------------
|
| 29 |
+
# Load dataset and FAISS index
|
| 30 |
+
# ---------------------------
|
| 31 |
+
@st.cache_data
|
| 32 |
+
def load_data():
|
| 33 |
+
parquet_url = "https://huggingface.co/datasets/McAuley-Lab/Amazon-Reviews-2023/resolve/main/raw_meta_Cell_Phones_and_Accessories/full-00000-of-00007.parquet"
|
| 34 |
+
df = pd.read_parquet(parquet_url)
|
| 35 |
+
|
| 36 |
+
index_url = "https://huggingface.co/GovinKin/MGTA415database/resolve/main/cellphones_index.faiss"
|
| 37 |
+
local_index_path = "cellphones_index.faiss"
|
| 38 |
+
if not os.path.exists(local_index_path):
|
| 39 |
+
urllib.request.urlretrieve(index_url, local_index_path)
|
| 40 |
+
|
| 41 |
+
index = faiss.read_index(local_index_path)
|
| 42 |
+
return df, index
|
| 43 |
+
|
| 44 |
+
# ---------------------------
|
| 45 |
+
# Search functions
|
| 46 |
+
# ---------------------------
|
| 47 |
+
def search(query, model, df, index, top_k=10):
|
| 48 |
+
query_vector = model.encode([query]).astype("float32")
|
| 49 |
+
distances, indices = index.search(query_vector, k=top_k)
|
| 50 |
+
results = df.iloc[indices[0]].copy()
|
| 51 |
+
results["distance"] = distances[0]
|
| 52 |
+
return results
|
| 53 |
+
|
| 54 |
+
def search_plus(query, model, df, index, top_k=20):
|
| 55 |
+
results = search(query, model, df, index, top_k=top_k)
|
| 56 |
+
|
| 57 |
+
price_match = re.search(r"(under|below)\s*\$?(\d+)", query.lower())
|
| 58 |
+
price_under = float(price_match.group(2)) if price_match else None
|
| 59 |
+
|
| 60 |
+
if price_under:
|
| 61 |
+
try:
|
| 62 |
+
results["price"] = results["price"].astype(float)
|
| 63 |
+
results = results[results["price"] < price_under]
|
| 64 |
+
except:
|
| 65 |
+
pass
|
| 66 |
+
|
| 67 |
+
stop_words = {"i", "want", "need", "the", "a", "for", "with", "to", "is", "it", "on", "of", "buy", "and", "in"}
|
| 68 |
+
keywords = [kw for kw in query.lower().split() if kw not in stop_words and len(kw) > 2]
|
| 69 |
+
|
| 70 |
+
if not results.empty and keywords:
|
| 71 |
+
pattern = '|'.join(map(re.escape, keywords))
|
| 72 |
+
results = results[results["title"].str.lower().str.contains(pattern, na=False)]
|
| 73 |
+
|
| 74 |
+
return results
|
| 75 |
+
|
| 76 |
+
def rerank_by_similarity(query, results, model, top_n=5):
|
| 77 |
+
if results.empty:
|
| 78 |
+
return results
|
| 79 |
+
query_vec = model.encode([query], convert_to_tensor=True)
|
| 80 |
+
titles = results["title"].astype(str).tolist()
|
| 81 |
+
title_vecs = model.encode(titles, convert_to_tensor=True)
|
| 82 |
+
scores = cos_sim(query_vec, title_vecs)[0].cpu().numpy()
|
| 83 |
+
results["similarity"] = scores
|
| 84 |
+
return results.sort_values("similarity", ascending=False).head(top_n)
|
| 85 |
+
|
| 86 |
+
# ---------------------------
|
| 87 |
+
# Agent Tool: wraps search_plus
|
| 88 |
+
# ---------------------------
|
| 89 |
+
@tool
|
| 90 |
+
def product_search_tool(query: str) -> str:
|
| 91 |
+
"""Search for cellphone accessories using a natural query."""
|
| 92 |
+
results = search_plus(query, model, df_all, index, top_k=10)
|
| 93 |
+
if results.empty:
|
| 94 |
+
return "No results found."
|
| 95 |
+
return "\n".join(results["title"].head(5).tolist())
|
| 96 |
+
|
| 97 |
+
# ---------------------------
|
| 98 |
+
# Load all resources
|
| 99 |
+
# ---------------------------
|
| 100 |
+
model = load_model()
|
| 101 |
+
df_all, index = load_data()
|
| 102 |
+
|
| 103 |
+
# ---------------------------
|
| 104 |
+
# Agent setup
|
| 105 |
+
# ---------------------------
|
| 106 |
+
import os
|
| 107 |
+
os.environ["OPENAI_API_KEY"] = st.secrets["openai"]["api_key"]
|
| 108 |
+
os.environ["OPENAI_API_BASE"] = st.secrets["openai"].get("base_url", "https://api.openai.com/v1")
|
| 109 |
+
|
| 110 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
| 111 |
+
agent = initialize_agent(
|
| 112 |
+
tools=[product_search_tool],
|
| 113 |
+
llm=llm,
|
| 114 |
+
agent=AgentType.OPENAI_FUNCTIONS,
|
| 115 |
+
verbose=True
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# ---------------------------
|
| 119 |
+
# Streamlit Chat Interface
|
| 120 |
+
# ---------------------------
|
| 121 |
+
st.title("🤖 AI Product Search Agent")
|
| 122 |
+
st.markdown("Ask natural questions like 'cheap rugged iPhone case under $30'")
|
| 123 |
+
|
| 124 |
+
if "chat_history" not in st.session_state:
|
| 125 |
+
st.session_state.chat_history = []
|
| 126 |
+
|
| 127 |
+
user_input = st.chat_input("Ask about cellphone accessories...")
|
| 128 |
+
|
| 129 |
+
if user_input:
|
| 130 |
+
st.session_state.chat_history.append(("user", user_input))
|
| 131 |
+
with st.spinner("Agent is thinking..."):
|
| 132 |
+
try:
|
| 133 |
+
reply = agent.run(user_input)
|
| 134 |
+
except Exception as e:
|
| 135 |
+
reply = f"⚠️ Agent error: {e}"
|
| 136 |
+
st.session_state.chat_history.append(("agent", reply))
|
| 137 |
+
|
| 138 |
+
for role, msg in st.session_state.chat_history:
|
| 139 |
+
message(msg, is_user=(role == "user"))
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
streamlit-chat
|
| 3 |
+
langchain
|
| 4 |
+
openai
|
| 5 |
+
sentence-transformers
|
| 6 |
+
faiss-cpu
|
| 7 |
+
pandas
|
| 8 |
+
numpy
|