noddysnots commited on
Commit
cc2fcc8
Β·
verified Β·
1 Parent(s): 0f58622

Update product_recommender.py

Browse files
Files changed (1) hide show
  1. product_recommender.py +75 -73
product_recommender.py CHANGED
@@ -2,9 +2,9 @@ from typing import Dict, List
2
  import aiohttp
3
  import asyncio
4
  import re
 
 
5
  from bs4 import BeautifulSoup
6
- from sentence_transformers import SentenceTransformer
7
- import numpy as np
8
 
9
  class DynamicRecommender:
10
  def __init__(self):
@@ -15,17 +15,41 @@ class DynamicRecommender:
15
  'Chrome/100.0.4896.75 Safari/537.36'
16
  )
17
  }
18
- # Load your model if you need it for further logic
19
  self.model = SentenceTransformer('all-mpnet-base-v2')
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # ------------------------------------------------------------------
22
  # Amazon search
23
  # ------------------------------------------------------------------
24
  async def search_amazon(self, query: str) -> List[Dict]:
25
- """
26
- Search Amazon for products by building the search URL
27
- and parsing the resulting HTML.
28
- """
29
  print(f"Searching Amazon for: {query}")
30
  search_url = f"https://www.amazon.in/s?k={query}"
31
  async with aiohttp.ClientSession() as session:
@@ -39,7 +63,7 @@ class DynamicRecommender:
39
  soup = BeautifulSoup(html, 'html.parser')
40
  products = []
41
 
42
- # These selectors may need updating if Amazon changes their HTML
43
  search_items = soup.select('.s-result-item')
44
 
45
  for item in search_items:
@@ -47,32 +71,27 @@ class DynamicRecommender:
47
  name_elem = item.select_one('.a-text-normal')
48
  price_elem = item.select_one('.a-price-whole')
49
  link_elem = item.select_one('a.a-link-normal')
50
-
51
  if name_elem and price_elem and link_elem:
52
  product_name = name_elem.get_text(strip=True)
53
  product_price = price_elem.get_text(strip=True)
54
  product_url = link_elem.get('href')
55
-
56
  products.append({
57
  'name': product_name,
58
  'price': product_price,
59
  'source': 'Amazon',
60
  'url': 'https://www.amazon.in' + product_url,
61
- 'description': 'Leadership/novel recommendation from Amazon'
62
  })
63
  except Exception:
64
  continue
65
 
66
- print(f"Found {len(products)} Amazon products.")
67
  return products[:5]
68
 
69
  # ------------------------------------------------------------------
70
  # Flipkart search
71
  # ------------------------------------------------------------------
72
  async def search_flipkart(self, query: str) -> List[Dict]:
73
- """
74
- Search Flipkart for products.
75
- """
76
  print(f"Searching Flipkart for: {query}")
77
  search_url = f"https://www.flipkart.com/search?q={query}"
78
  async with aiohttp.ClientSession() as session:
@@ -86,7 +105,7 @@ class DynamicRecommender:
86
  soup = BeautifulSoup(html, 'html.parser')
87
  products = []
88
 
89
- # These selectors may need updating if Flipkart changes their HTML
90
  item_cards = soup.select('._1AtVbE')
91
 
92
  for item in item_cards:
@@ -94,33 +113,27 @@ class DynamicRecommender:
94
  name_elem = item.select_one('._4rR01T')
95
  price_elem = item.select_one('._30jeq3')
96
  link_elem = item.select_one('a')
97
-
98
  if name_elem and price_elem and link_elem:
99
  product_name = name_elem.get_text(strip=True)
100
  product_price = price_elem.get_text(strip=True)
101
  product_url = link_elem.get('href')
102
-
103
  products.append({
104
  'name': product_name,
105
  'price': product_price,
106
  'source': 'Flipkart',
107
  'url': 'https://www.flipkart.com' + product_url,
108
- 'description': 'Leadership/novel recommendation from Flipkart'
109
  })
110
  except Exception:
111
  continue
112
 
113
- print(f"Found {len(products)} Flipkart products.")
114
  return products[:5]
115
 
116
  # ------------------------------------------------------------------
117
- # IGP search (example approach; may need updating)
118
  # ------------------------------------------------------------------
119
  async def search_igp(self, query: str) -> List[Dict]:
120
- """
121
- Search IGP for products (gift store).
122
- Adjust the selectors or approach as needed.
123
- """
124
  print(f"Searching IGP for: {query}")
125
  search_url = f"https://www.igp.com/search/{query}"
126
  async with aiohttp.ClientSession() as session:
@@ -134,8 +147,7 @@ class DynamicRecommender:
134
  soup = BeautifulSoup(html, 'html.parser')
135
  products = []
136
 
137
- # You must figure out correct selectors for IGP
138
- # This is just an *example*; may not match actual IGP HTML
139
  item_cards = soup.select('.product-item')
140
 
141
  for item in item_cards:
@@ -143,86 +155,80 @@ class DynamicRecommender:
143
  name_elem = item.select_one('.product-title')
144
  price_elem = item.select_one('.product-price')
145
  link_elem = item.select_one('a')
146
-
147
  if name_elem and price_elem and link_elem:
148
  product_name = name_elem.get_text(strip=True)
149
  product_price = price_elem.get_text(strip=True)
150
  product_url = link_elem.get('href')
151
-
152
  products.append({
153
  'name': product_name,
154
  'price': product_price,
155
  'source': 'IGP',
156
  'url': 'https://www.igp.com' + product_url,
157
- 'description': 'Gift idea from IGP'
158
  })
159
  except Exception:
160
  continue
161
 
162
- print(f"Found {len(products)} IGP products.")
163
  return products[:5]
164
 
165
  # ------------------------------------------------------------------
166
- # Extract keywords / fallback
167
  # ------------------------------------------------------------------
168
  def _extract_keywords(self, text: str) -> List[str]:
169
  """
170
- Extract relevant search keywords from input text.
171
- You can expand these rules or use the entire text as fallback.
 
 
172
  """
173
- text_lower = text.lower()
174
-
175
- # Try to find age
176
- age_match = re.search(r'age\s+(\d+)', text_lower)
177
  age = age_match.group(1) if age_match else None
178
 
179
- interests = []
 
180
 
181
- # Some sample rules
182
- if 'software' in text_lower or 'engineer' in text_lower:
183
- interests.extend(['programming books', 'tech gadgets'])
184
- if 'books' in text_lower:
185
- interests.append('books')
186
- if 'novel' in text_lower or 'leader' in text_lower or 'leadership' in text_lower:
187
- interests.append('leadership novels')
188
- if 'successful' in text_lower:
189
- interests.extend(['self help books', 'business books'])
190
 
191
- # If we found no interests at all, fallback to using the entire text
192
- if not interests:
193
- interests.append(text)
 
 
 
194
 
195
- # Optionally add "for 25 year old" context if age is found
196
- if age:
197
- # You can decide how exactly you want to incorporate age
198
- interests = [f"{interest} for {age} year old" for interest in interests]
199
-
200
- print("Extracted keywords:", interests)
201
- return interests
202
 
203
  # ------------------------------------------------------------------
204
  # Main recommendations
205
  # ------------------------------------------------------------------
206
  async def get_recommendations(self, text: str) -> List[Dict]:
207
  """
208
- Get personalized recommendations from Amazon, Flipkart, and IGP.
209
  """
210
  try:
211
- # Step 1: Extract keywords from user input
212
- keywords = self._extract_keywords(text)
213
 
214
- # Step 2: Search across multiple sources
215
  all_products = []
216
- for keyword in keywords:
217
- amazon_products = await self.search_amazon(keyword)
218
- flipkart_products = await self.search_flipkart(keyword)
219
- igp_products = await self.search_igp(keyword) # new
 
220
 
221
  all_products.extend(amazon_products)
222
  all_products.extend(flipkart_products)
223
  all_products.extend(igp_products)
224
 
225
- # Step 3: De-duplicate by product name
226
  seen = set()
227
  unique_products = []
228
  for product in all_products:
@@ -230,12 +236,8 @@ class DynamicRecommender:
230
  seen.add(product['name'])
231
  unique_products.append(product)
232
 
233
- # Step 4: Optionally, sort by "relevance" if desired
234
- # For now, we just slice the first five
235
- final_results = unique_products[:5]
236
-
237
- print(f"Returning {len(final_results)} products.")
238
- return final_results
239
 
240
  except Exception as e:
241
  print(f"Error in recommendations: {str(e)}")
 
2
  import aiohttp
3
  import asyncio
4
  import re
5
+ import torch
6
+ from sentence_transformers import SentenceTransformer, util
7
  from bs4 import BeautifulSoup
 
 
8
 
9
  class DynamicRecommender:
10
  def __init__(self):
 
15
  'Chrome/100.0.4896.75 Safari/537.36'
16
  )
17
  }
18
+ # Load your model
19
  self.model = SentenceTransformer('all-mpnet-base-v2')
20
 
21
+ # Pre‐define some candidate categories you might want to search for.
22
+ # Adjust these to suit your domain. The more you add, the more "general"
23
+ # your coverage becomes. They can be as broad or as niche as you like.
24
+ self.candidate_categories = [
25
+ "tech gadgets",
26
+ "programming books",
27
+ "self help books",
28
+ "business books",
29
+ "leadership novels",
30
+ "fashion accessories",
31
+ "beauty products",
32
+ "board games",
33
+ "music instruments",
34
+ "cooking utensils",
35
+ "cookbooks",
36
+ "art and painting supplies",
37
+ "home decor",
38
+ "pet supplies",
39
+ "novels",
40
+ "gaming consoles",
41
+ "smartphones",
42
+ "camera gear",
43
+ "toys",
44
+ "gift hamper"
45
+ ]
46
+ # Pre‐encode those categories for faster scoring.
47
+ self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
48
+
49
  # ------------------------------------------------------------------
50
  # Amazon search
51
  # ------------------------------------------------------------------
52
  async def search_amazon(self, query: str) -> List[Dict]:
 
 
 
 
53
  print(f"Searching Amazon for: {query}")
54
  search_url = f"https://www.amazon.in/s?k={query}"
55
  async with aiohttp.ClientSession() as session:
 
63
  soup = BeautifulSoup(html, 'html.parser')
64
  products = []
65
 
66
+ # These selectors may need updating if Amazon changes HTML
67
  search_items = soup.select('.s-result-item')
68
 
69
  for item in search_items:
 
71
  name_elem = item.select_one('.a-text-normal')
72
  price_elem = item.select_one('.a-price-whole')
73
  link_elem = item.select_one('a.a-link-normal')
 
74
  if name_elem and price_elem and link_elem:
75
  product_name = name_elem.get_text(strip=True)
76
  product_price = price_elem.get_text(strip=True)
77
  product_url = link_elem.get('href')
78
+
79
  products.append({
80
  'name': product_name,
81
  'price': product_price,
82
  'source': 'Amazon',
83
  'url': 'https://www.amazon.in' + product_url,
84
+ 'description': f"This item is from Amazon related to '{product_name}'."
85
  })
86
  except Exception:
87
  continue
88
 
 
89
  return products[:5]
90
 
91
  # ------------------------------------------------------------------
92
  # Flipkart search
93
  # ------------------------------------------------------------------
94
  async def search_flipkart(self, query: str) -> List[Dict]:
 
 
 
95
  print(f"Searching Flipkart for: {query}")
96
  search_url = f"https://www.flipkart.com/search?q={query}"
97
  async with aiohttp.ClientSession() as session:
 
105
  soup = BeautifulSoup(html, 'html.parser')
106
  products = []
107
 
108
+ # These selectors may need updating if Flipkart changes HTML
109
  item_cards = soup.select('._1AtVbE')
110
 
111
  for item in item_cards:
 
113
  name_elem = item.select_one('._4rR01T')
114
  price_elem = item.select_one('._30jeq3')
115
  link_elem = item.select_one('a')
 
116
  if name_elem and price_elem and link_elem:
117
  product_name = name_elem.get_text(strip=True)
118
  product_price = price_elem.get_text(strip=True)
119
  product_url = link_elem.get('href')
120
+
121
  products.append({
122
  'name': product_name,
123
  'price': product_price,
124
  'source': 'Flipkart',
125
  'url': 'https://www.flipkart.com' + product_url,
126
+ 'description': f"This item is from Flipkart related to '{product_name}'."
127
  })
128
  except Exception:
129
  continue
130
 
 
131
  return products[:5]
132
 
133
  # ------------------------------------------------------------------
134
+ # IGP search
135
  # ------------------------------------------------------------------
136
  async def search_igp(self, query: str) -> List[Dict]:
 
 
 
 
137
  print(f"Searching IGP for: {query}")
138
  search_url = f"https://www.igp.com/search/{query}"
139
  async with aiohttp.ClientSession() as session:
 
147
  soup = BeautifulSoup(html, 'html.parser')
148
  products = []
149
 
150
+ # Likely need to update based on actual IGP HTML
 
151
  item_cards = soup.select('.product-item')
152
 
153
  for item in item_cards:
 
155
  name_elem = item.select_one('.product-title')
156
  price_elem = item.select_one('.product-price')
157
  link_elem = item.select_one('a')
 
158
  if name_elem and price_elem and link_elem:
159
  product_name = name_elem.get_text(strip=True)
160
  product_price = price_elem.get_text(strip=True)
161
  product_url = link_elem.get('href')
162
+
163
  products.append({
164
  'name': product_name,
165
  'price': product_price,
166
  'source': 'IGP',
167
  'url': 'https://www.igp.com' + product_url,
168
+ 'description': f"This item is from IGP related to '{product_name}'."
169
  })
170
  except Exception:
171
  continue
172
 
 
173
  return products[:5]
174
 
175
  # ------------------------------------------------------------------
176
+ # Extract categories from user text using embeddings
177
  # ------------------------------------------------------------------
178
  def _extract_keywords(self, text: str) -> List[str]:
179
  """
180
+ 1. Parse out age if present
181
+ 2. Use embeddings to find top 2-3 matching categories
182
+ from self.candidate_categories.
183
+ 3. Combine them with the age if found.
184
  """
185
+ # 1) Check for age with a regex
186
+ age_match = re.search(r'age\s+(\d+)', text.lower())
 
 
187
  age = age_match.group(1) if age_match else None
188
 
189
+ # 2) Use the entire user text as an embedding
190
+ user_emb = self.model.encode(text, convert_to_tensor=True)
191
 
192
+ # Compute similarity with each candidate category
193
+ sims = util.cos_sim(user_emb, self.category_embeddings)[0] # shape: [num_categories]
194
+ # Grab top 3 indices
195
+ top_k = min(3, len(self.candidate_categories))
196
+ top_results = torch.topk(sims, k=top_k)
 
 
 
 
197
 
198
+ best_categories = []
199
+ for idx in top_results.indices:
200
+ cat_text = self.candidate_categories[idx]
201
+ if age:
202
+ cat_text = f"{cat_text} for {age} year old"
203
+ best_categories.append(cat_text)
204
 
205
+ print("Embedding-based categories:", best_categories)
206
+ return best_categories
 
 
 
 
 
207
 
208
  # ------------------------------------------------------------------
209
  # Main recommendations
210
  # ------------------------------------------------------------------
211
  async def get_recommendations(self, text: str) -> List[Dict]:
212
  """
213
+ Search across Amazon, Flipkart, and IGP based on the top category matches.
214
  """
215
  try:
216
+ # 1) Figure out best categories (queries) from user text
217
+ queries = self._extract_keywords(text)
218
 
219
+ # 2) Search each site for each query
220
  all_products = []
221
+ for query in queries:
222
+ # For each query, hit Amazon, Flipkart, IGP
223
+ amazon_products = await self.search_amazon(query)
224
+ flipkart_products = await self.search_flipkart(query)
225
+ igp_products = await self.search_igp(query)
226
 
227
  all_products.extend(amazon_products)
228
  all_products.extend(flipkart_products)
229
  all_products.extend(igp_products)
230
 
231
+ # 3) De‐duplicate by product name
232
  seen = set()
233
  unique_products = []
234
  for product in all_products:
 
236
  seen.add(product['name'])
237
  unique_products.append(product)
238
 
239
+ # 4) Optionally slice or sort further
240
+ return unique_products[:5]
 
 
 
 
241
 
242
  except Exception as e:
243
  print(f"Error in recommendations: {str(e)}")