noddysnots commited on
Commit
e9780b1
·
verified ·
1 Parent(s): cc2fcc8

Update product_recommender.py

Browse files
Files changed (1) hide show
  1. product_recommender.py +32 -44
product_recommender.py CHANGED
@@ -15,12 +15,10 @@ class DynamicRecommender:
15
  'Chrome/100.0.4896.75 Safari/537.36'
16
  )
17
  }
18
- # Load your model
19
  self.model = SentenceTransformer('all-mpnet-base-v2')
20
 
21
- # Pre‐define some candidate categories you might want to search for.
22
- # Adjust these to suit your domain. The more you add, the more "general"
23
- # your coverage becomes. They can be as broad or as niche as you like.
24
  self.candidate_categories = [
25
  "tech gadgets",
26
  "programming books",
@@ -33,7 +31,7 @@ class DynamicRecommender:
33
  "music instruments",
34
  "cooking utensils",
35
  "cookbooks",
36
- "art and painting supplies",
37
  "home decor",
38
  "pet supplies",
39
  "novels",
@@ -43,7 +41,7 @@ class DynamicRecommender:
43
  "toys",
44
  "gift hamper"
45
  ]
46
- # Pre‐encode those categories for faster scoring.
47
  self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
48
 
49
  # ------------------------------------------------------------------
@@ -63,7 +61,7 @@ class DynamicRecommender:
63
  soup = BeautifulSoup(html, 'html.parser')
64
  products = []
65
 
66
- # These selectors may need updating if Amazon changes HTML
67
  search_items = soup.select('.s-result-item')
68
 
69
  for item in search_items:
@@ -81,7 +79,7 @@ class DynamicRecommender:
81
  'price': product_price,
82
  'source': 'Amazon',
83
  'url': 'https://www.amazon.in' + product_url,
84
- 'description': f"This item is from Amazon related to '{product_name}'."
85
  })
86
  except Exception:
87
  continue
@@ -104,8 +102,8 @@ class DynamicRecommender:
104
  def _parse_flipkart_results(self, html: str) -> List[Dict]:
105
  soup = BeautifulSoup(html, 'html.parser')
106
  products = []
107
-
108
- # These selectors may need updating if Flipkart changes HTML
109
  item_cards = soup.select('._1AtVbE')
110
 
111
  for item in item_cards:
@@ -123,7 +121,7 @@ class DynamicRecommender:
123
  'price': product_price,
124
  'source': 'Flipkart',
125
  'url': 'https://www.flipkart.com' + product_url,
126
- 'description': f"This item is from Flipkart related to '{product_name}'."
127
  })
128
  except Exception:
129
  continue
@@ -147,7 +145,7 @@ class DynamicRecommender:
147
  soup = BeautifulSoup(html, 'html.parser')
148
  products = []
149
 
150
- # Likely need to update based on actual IGP HTML
151
  item_cards = soup.select('.product-item')
152
 
153
  for item in item_cards:
@@ -165,7 +163,7 @@ class DynamicRecommender:
165
  'price': product_price,
166
  'source': 'IGP',
167
  'url': 'https://www.igp.com' + product_url,
168
- 'description': f"This item is from IGP related to '{product_name}'."
169
  })
170
  except Exception:
171
  continue
@@ -173,26 +171,19 @@ class DynamicRecommender:
173
  return products[:5]
174
 
175
  # ------------------------------------------------------------------
176
- # Extract categories from user text using embeddings
177
  # ------------------------------------------------------------------
178
- def _extract_keywords(self, text: str) -> List[str]:
179
- """
180
- 1. Parse out age if present
181
- 2. Use embeddings to find top 2-3 matching categories
182
- from self.candidate_categories.
183
- 3. Combine them with the age if found.
184
- """
185
- # 1) Check for age with a regex
186
  age_match = re.search(r'age\s+(\d+)', text.lower())
187
  age = age_match.group(1) if age_match else None
188
 
189
- # 2) Use the entire user text as an embedding
190
  user_emb = self.model.encode(text, convert_to_tensor=True)
191
 
192
- # Compute similarity with each candidate category
193
- sims = util.cos_sim(user_emb, self.category_embeddings)[0] # shape: [num_categories]
194
- # Grab top 3 indices
195
- top_k = min(3, len(self.candidate_categories))
196
  top_results = torch.topk(sims, k=top_k)
197
 
198
  best_categories = []
@@ -202,7 +193,7 @@ class DynamicRecommender:
202
  cat_text = f"{cat_text} for {age} year old"
203
  best_categories.append(cat_text)
204
 
205
- print("Embedding-based categories:", best_categories)
206
  return best_categories
207
 
208
  # ------------------------------------------------------------------
@@ -210,25 +201,23 @@ class DynamicRecommender:
210
  # ------------------------------------------------------------------
211
  async def get_recommendations(self, text: str) -> List[Dict]:
212
  """
213
- Search across Amazon, Flipkart, and IGP based on the top category matches.
 
214
  """
215
  try:
216
- # 1) Figure out best categories (queries) from user text
217
- queries = self._extract_keywords(text)
218
 
219
- # 2) Search each site for each query
220
  all_products = []
221
- for query in queries:
222
- # For each query, hit Amazon, Flipkart, IGP
223
- amazon_products = await self.search_amazon(query)
224
- flipkart_products = await self.search_flipkart(query)
225
- igp_products = await self.search_igp(query)
226
 
227
- all_products.extend(amazon_products)
228
- all_products.extend(flipkart_products)
229
- all_products.extend(igp_products)
230
 
231
- # 3) De‐duplicate by product name
232
  seen = set()
233
  unique_products = []
234
  for product in all_products:
@@ -236,9 +225,8 @@ class DynamicRecommender:
236
  seen.add(product['name'])
237
  unique_products.append(product)
238
 
239
- # 4) Optionally slice or sort further
240
  return unique_products[:5]
241
 
242
  except Exception as e:
243
- print(f"Error in recommendations: {str(e)}")
244
- return []
 
15
  'Chrome/100.0.4896.75 Safari/537.36'
16
  )
17
  }
18
+ # Load SentenceTransformer for embedding-based recommendations
19
  self.model = SentenceTransformer('all-mpnet-base-v2')
20
 
21
+ # Pre‐define broad candidate categories. Adjust to your needs.
 
 
22
  self.candidate_categories = [
23
  "tech gadgets",
24
  "programming books",
 
31
  "music instruments",
32
  "cooking utensils",
33
  "cookbooks",
34
+ "art and painting supplies", # covers user "art" interest
35
  "home decor",
36
  "pet supplies",
37
  "novels",
 
41
  "toys",
42
  "gift hamper"
43
  ]
44
+ # Pre‐encode category texts
45
  self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
46
 
47
  # ------------------------------------------------------------------
 
61
  soup = BeautifulSoup(html, 'html.parser')
62
  products = []
63
 
64
+ # (Might need to tweak if Amazon changes HTML)
65
  search_items = soup.select('.s-result-item')
66
 
67
  for item in search_items:
 
79
  'price': product_price,
80
  'source': 'Amazon',
81
  'url': 'https://www.amazon.in' + product_url,
82
+ 'description': f"From Amazon: {product_name}"
83
  })
84
  except Exception:
85
  continue
 
102
  def _parse_flipkart_results(self, html: str) -> List[Dict]:
103
  soup = BeautifulSoup(html, 'html.parser')
104
  products = []
105
+
106
+ # (Might need to tweak if Flipkart changes HTML)
107
  item_cards = soup.select('._1AtVbE')
108
 
109
  for item in item_cards:
 
121
  'price': product_price,
122
  'source': 'Flipkart',
123
  'url': 'https://www.flipkart.com' + product_url,
124
+ 'description': f"From Flipkart: {product_name}"
125
  })
126
  except Exception:
127
  continue
 
145
  soup = BeautifulSoup(html, 'html.parser')
146
  products = []
147
 
148
+ # (Likely need to tweak if IGP changes HTML)
149
  item_cards = soup.select('.product-item')
150
 
151
  for item in item_cards:
 
163
  'price': product_price,
164
  'source': 'IGP',
165
  'url': 'https://www.igp.com' + product_url,
166
+ 'description': f"From IGP: {product_name}"
167
  })
168
  except Exception:
169
  continue
 
171
  return products[:5]
172
 
173
  # ------------------------------------------------------------------
174
+ # Embedding-based category extraction
175
  # ------------------------------------------------------------------
176
+ def _extract_categories(self, text: str) -> List[str]:
177
+ # 1. Check for age with a regex
 
 
 
 
 
 
178
  age_match = re.search(r'age\s+(\d+)', text.lower())
179
  age = age_match.group(1) if age_match else None
180
 
181
+ # 2. Encode user text
182
  user_emb = self.model.encode(text, convert_to_tensor=True)
183
 
184
+ # 3. Cosine similarity with candidate categories
185
+ sims = util.cos_sim(user_emb, self.category_embeddings)[0]
186
+ top_k = min(3, len(self.candidate_categories)) # pick top 3
 
187
  top_results = torch.topk(sims, k=top_k)
188
 
189
  best_categories = []
 
193
  cat_text = f"{cat_text} for {age} year old"
194
  best_categories.append(cat_text)
195
 
196
+ print("Top categories chosen via embeddings:", best_categories)
197
  return best_categories
198
 
199
  # ------------------------------------------------------------------
 
201
  # ------------------------------------------------------------------
202
  async def get_recommendations(self, text: str) -> List[Dict]:
203
  """
204
+ Search across Amazon, Flipkart, IGP based on top embedding matches,
205
+ then deduplicate, then return final list.
206
  """
207
  try:
208
+ # 1) Get top matching categories from user text
209
+ categories = self._extract_categories(text)
210
 
211
+ # 2) For each category, search across sites
212
  all_products = []
213
+ for c in categories:
214
+ amazon_products = await self.search_amazon(c)
215
+ flipkart_products = await self.search_flipkart(c)
216
+ igp_products = await self.search_igp(c)
 
217
 
218
+ all_products.extend(amazon_products + flipkart_products + igp_products)
 
 
219
 
220
+ # 3) Deduplicate
221
  seen = set()
222
  unique_products = []
223
  for product in all_products:
 
225
  seen.add(product['name'])
226
  unique_products.append(product)
227
 
 
228
  return unique_products[:5]
229
 
230
  except Exception as e:
231
+ print(f"Error in get_recommendations: {str(e)}")
232
+ return []