Spaces:
Sleeping
Sleeping
Update product_recommender.py
Browse files- product_recommender.py +32 -44
product_recommender.py
CHANGED
@@ -15,12 +15,10 @@ class DynamicRecommender:
|
|
15 |
'Chrome/100.0.4896.75 Safari/537.36'
|
16 |
)
|
17 |
}
|
18 |
-
# Load
|
19 |
self.model = SentenceTransformer('all-mpnet-base-v2')
|
20 |
|
21 |
-
# Pre‐define
|
22 |
-
# Adjust these to suit your domain. The more you add, the more "general"
|
23 |
-
# your coverage becomes. They can be as broad or as niche as you like.
|
24 |
self.candidate_categories = [
|
25 |
"tech gadgets",
|
26 |
"programming books",
|
@@ -33,7 +31,7 @@ class DynamicRecommender:
|
|
33 |
"music instruments",
|
34 |
"cooking utensils",
|
35 |
"cookbooks",
|
36 |
-
"art and painting supplies",
|
37 |
"home decor",
|
38 |
"pet supplies",
|
39 |
"novels",
|
@@ -43,7 +41,7 @@ class DynamicRecommender:
|
|
43 |
"toys",
|
44 |
"gift hamper"
|
45 |
]
|
46 |
-
# Pre‐encode
|
47 |
self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
|
48 |
|
49 |
# ------------------------------------------------------------------
|
@@ -63,7 +61,7 @@ class DynamicRecommender:
|
|
63 |
soup = BeautifulSoup(html, 'html.parser')
|
64 |
products = []
|
65 |
|
66 |
-
#
|
67 |
search_items = soup.select('.s-result-item')
|
68 |
|
69 |
for item in search_items:
|
@@ -81,7 +79,7 @@ class DynamicRecommender:
|
|
81 |
'price': product_price,
|
82 |
'source': 'Amazon',
|
83 |
'url': 'https://www.amazon.in' + product_url,
|
84 |
-
'description': f"
|
85 |
})
|
86 |
except Exception:
|
87 |
continue
|
@@ -104,8 +102,8 @@ class DynamicRecommender:
|
|
104 |
def _parse_flipkart_results(self, html: str) -> List[Dict]:
|
105 |
soup = BeautifulSoup(html, 'html.parser')
|
106 |
products = []
|
107 |
-
|
108 |
-
#
|
109 |
item_cards = soup.select('._1AtVbE')
|
110 |
|
111 |
for item in item_cards:
|
@@ -123,7 +121,7 @@ class DynamicRecommender:
|
|
123 |
'price': product_price,
|
124 |
'source': 'Flipkart',
|
125 |
'url': 'https://www.flipkart.com' + product_url,
|
126 |
-
'description': f"
|
127 |
})
|
128 |
except Exception:
|
129 |
continue
|
@@ -147,7 +145,7 @@ class DynamicRecommender:
|
|
147 |
soup = BeautifulSoup(html, 'html.parser')
|
148 |
products = []
|
149 |
|
150 |
-
# Likely need to
|
151 |
item_cards = soup.select('.product-item')
|
152 |
|
153 |
for item in item_cards:
|
@@ -165,7 +163,7 @@ class DynamicRecommender:
|
|
165 |
'price': product_price,
|
166 |
'source': 'IGP',
|
167 |
'url': 'https://www.igp.com' + product_url,
|
168 |
-
'description': f"
|
169 |
})
|
170 |
except Exception:
|
171 |
continue
|
@@ -173,26 +171,19 @@ class DynamicRecommender:
|
|
173 |
return products[:5]
|
174 |
|
175 |
# ------------------------------------------------------------------
|
176 |
-
#
|
177 |
# ------------------------------------------------------------------
|
178 |
-
def
|
179 |
-
|
180 |
-
1. Parse out age if present
|
181 |
-
2. Use embeddings to find top 2-3 matching categories
|
182 |
-
from self.candidate_categories.
|
183 |
-
3. Combine them with the age if found.
|
184 |
-
"""
|
185 |
-
# 1) Check for age with a regex
|
186 |
age_match = re.search(r'age\s+(\d+)', text.lower())
|
187 |
age = age_match.group(1) if age_match else None
|
188 |
|
189 |
-
# 2
|
190 |
user_emb = self.model.encode(text, convert_to_tensor=True)
|
191 |
|
192 |
-
#
|
193 |
-
sims = util.cos_sim(user_emb, self.category_embeddings)[0]
|
194 |
-
#
|
195 |
-
top_k = min(3, len(self.candidate_categories))
|
196 |
top_results = torch.topk(sims, k=top_k)
|
197 |
|
198 |
best_categories = []
|
@@ -202,7 +193,7 @@ class DynamicRecommender:
|
|
202 |
cat_text = f"{cat_text} for {age} year old"
|
203 |
best_categories.append(cat_text)
|
204 |
|
205 |
-
print("
|
206 |
return best_categories
|
207 |
|
208 |
# ------------------------------------------------------------------
|
@@ -210,25 +201,23 @@ class DynamicRecommender:
|
|
210 |
# ------------------------------------------------------------------
|
211 |
async def get_recommendations(self, text: str) -> List[Dict]:
|
212 |
"""
|
213 |
-
Search across Amazon, Flipkart,
|
|
|
214 |
"""
|
215 |
try:
|
216 |
-
# 1)
|
217 |
-
|
218 |
|
219 |
-
# 2)
|
220 |
all_products = []
|
221 |
-
for
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
igp_products = await self.search_igp(query)
|
226 |
|
227 |
-
all_products.extend(amazon_products)
|
228 |
-
all_products.extend(flipkart_products)
|
229 |
-
all_products.extend(igp_products)
|
230 |
|
231 |
-
# 3)
|
232 |
seen = set()
|
233 |
unique_products = []
|
234 |
for product in all_products:
|
@@ -236,9 +225,8 @@ class DynamicRecommender:
|
|
236 |
seen.add(product['name'])
|
237 |
unique_products.append(product)
|
238 |
|
239 |
-
# 4) Optionally slice or sort further
|
240 |
return unique_products[:5]
|
241 |
|
242 |
except Exception as e:
|
243 |
-
print(f"Error in
|
244 |
-
return []
|
|
|
15 |
'Chrome/100.0.4896.75 Safari/537.36'
|
16 |
)
|
17 |
}
|
18 |
+
# Load SentenceTransformer for embedding-based recommendations
|
19 |
self.model = SentenceTransformer('all-mpnet-base-v2')
|
20 |
|
21 |
+
# Pre‐define broad candidate categories. Adjust to your needs.
|
|
|
|
|
22 |
self.candidate_categories = [
|
23 |
"tech gadgets",
|
24 |
"programming books",
|
|
|
31 |
"music instruments",
|
32 |
"cooking utensils",
|
33 |
"cookbooks",
|
34 |
+
"art and painting supplies", # covers user "art" interest
|
35 |
"home decor",
|
36 |
"pet supplies",
|
37 |
"novels",
|
|
|
41 |
"toys",
|
42 |
"gift hamper"
|
43 |
]
|
44 |
+
# Pre‐encode category texts
|
45 |
self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
|
46 |
|
47 |
# ------------------------------------------------------------------
|
|
|
61 |
soup = BeautifulSoup(html, 'html.parser')
|
62 |
products = []
|
63 |
|
64 |
+
# (Might need to tweak if Amazon changes HTML)
|
65 |
search_items = soup.select('.s-result-item')
|
66 |
|
67 |
for item in search_items:
|
|
|
79 |
'price': product_price,
|
80 |
'source': 'Amazon',
|
81 |
'url': 'https://www.amazon.in' + product_url,
|
82 |
+
'description': f"From Amazon: {product_name}"
|
83 |
})
|
84 |
except Exception:
|
85 |
continue
|
|
|
102 |
def _parse_flipkart_results(self, html: str) -> List[Dict]:
|
103 |
soup = BeautifulSoup(html, 'html.parser')
|
104 |
products = []
|
105 |
+
|
106 |
+
# (Might need to tweak if Flipkart changes HTML)
|
107 |
item_cards = soup.select('._1AtVbE')
|
108 |
|
109 |
for item in item_cards:
|
|
|
121 |
'price': product_price,
|
122 |
'source': 'Flipkart',
|
123 |
'url': 'https://www.flipkart.com' + product_url,
|
124 |
+
'description': f"From Flipkart: {product_name}"
|
125 |
})
|
126 |
except Exception:
|
127 |
continue
|
|
|
145 |
soup = BeautifulSoup(html, 'html.parser')
|
146 |
products = []
|
147 |
|
148 |
+
# (Likely need to tweak if IGP changes HTML)
|
149 |
item_cards = soup.select('.product-item')
|
150 |
|
151 |
for item in item_cards:
|
|
|
163 |
'price': product_price,
|
164 |
'source': 'IGP',
|
165 |
'url': 'https://www.igp.com' + product_url,
|
166 |
+
'description': f"From IGP: {product_name}"
|
167 |
})
|
168 |
except Exception:
|
169 |
continue
|
|
|
171 |
return products[:5]
|
172 |
|
173 |
# ------------------------------------------------------------------
|
174 |
+
# Embedding-based category extraction
|
175 |
# ------------------------------------------------------------------
|
176 |
+
def _extract_categories(self, text: str) -> List[str]:
|
177 |
+
# 1. Check for age with a regex
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
age_match = re.search(r'age\s+(\d+)', text.lower())
|
179 |
age = age_match.group(1) if age_match else None
|
180 |
|
181 |
+
# 2. Encode user text
|
182 |
user_emb = self.model.encode(text, convert_to_tensor=True)
|
183 |
|
184 |
+
# 3. Cosine similarity with candidate categories
|
185 |
+
sims = util.cos_sim(user_emb, self.category_embeddings)[0]
|
186 |
+
top_k = min(3, len(self.candidate_categories)) # pick top 3
|
|
|
187 |
top_results = torch.topk(sims, k=top_k)
|
188 |
|
189 |
best_categories = []
|
|
|
193 |
cat_text = f"{cat_text} for {age} year old"
|
194 |
best_categories.append(cat_text)
|
195 |
|
196 |
+
print("Top categories chosen via embeddings:", best_categories)
|
197 |
return best_categories
|
198 |
|
199 |
# ------------------------------------------------------------------
|
|
|
201 |
# ------------------------------------------------------------------
|
202 |
async def get_recommendations(self, text: str) -> List[Dict]:
|
203 |
"""
|
204 |
+
Search across Amazon, Flipkart, IGP based on top embedding matches,
|
205 |
+
then deduplicate, then return final list.
|
206 |
"""
|
207 |
try:
|
208 |
+
# 1) Get top matching categories from user text
|
209 |
+
categories = self._extract_categories(text)
|
210 |
|
211 |
+
# 2) For each category, search across sites
|
212 |
all_products = []
|
213 |
+
for c in categories:
|
214 |
+
amazon_products = await self.search_amazon(c)
|
215 |
+
flipkart_products = await self.search_flipkart(c)
|
216 |
+
igp_products = await self.search_igp(c)
|
|
|
217 |
|
218 |
+
all_products.extend(amazon_products + flipkart_products + igp_products)
|
|
|
|
|
219 |
|
220 |
+
# 3) Deduplicate
|
221 |
seen = set()
|
222 |
unique_products = []
|
223 |
for product in all_products:
|
|
|
225 |
seen.add(product['name'])
|
226 |
unique_products.append(product)
|
227 |
|
|
|
228 |
return unique_products[:5]
|
229 |
|
230 |
except Exception as e:
|
231 |
+
print(f"Error in get_recommendations: {str(e)}")
|
232 |
+
return []
|