Spaces:

noddysnots
/

Gift-Recommender

Sleeping

App Files Files Community

noddysnots commited on 14 days ago

Commit

e9780b1

verified ·

1 Parent(s): cc2fcc8

Update product_recommender.py

Browse files

Files changed (1) hide show

product_recommender.py +32 -44

product_recommender.py CHANGED Viewed

@@ -15,12 +15,10 @@ class DynamicRecommender:
                 'Chrome/100.0.4896.75 Safari/537.36'
             )
         }
-        # Load your model
         self.model = SentenceTransformer('all-mpnet-base-v2')
-        # Pre‐define some candidate categories you might want to search for.
-        # Adjust these to suit your domain. The more you add, the more "general"
-        # your coverage becomes. They can be as broad or as niche as you like.
         self.candidate_categories = [
             "tech gadgets",
             "programming books",
@@ -33,7 +31,7 @@ class DynamicRecommender:
             "music instruments",
             "cooking utensils",
             "cookbooks",
-            "art and painting supplies",
             "home decor",
             "pet supplies",
             "novels",
@@ -43,7 +41,7 @@ class DynamicRecommender:
             "toys",
             "gift hamper"
         ]
-        # Pre‐encode those categories for faster scoring.
         self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
     # ------------------------------------------------------------------
@@ -63,7 +61,7 @@ class DynamicRecommender:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        # These selectors may need updating if Amazon changes HTML
         search_items = soup.select('.s-result-item')
         for item in search_items:
@@ -81,7 +79,7 @@ class DynamicRecommender:
                         'price': product_price,
                         'source': 'Amazon',
                         'url': 'https://www.amazon.in' + product_url,
-                        'description': f"This item is from Amazon related to '{product_name}'."
                     })
             except Exception:
                 continue
@@ -104,8 +102,8 @@ class DynamicRecommender:
     def _parse_flipkart_results(self, html: str) -> List[Dict]:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        # These selectors may need updating if Flipkart changes HTML
         item_cards = soup.select('._1AtVbE')
         for item in item_cards:
@@ -123,7 +121,7 @@ class DynamicRecommender:
                         'price': product_price,
                         'source': 'Flipkart',
                         'url': 'https://www.flipkart.com' + product_url,
-                        'description': f"This item is from Flipkart related to '{product_name}'."
                     })
             except Exception:
                 continue
@@ -147,7 +145,7 @@ class DynamicRecommender:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        # Likely need to update based on actual IGP HTML
         item_cards = soup.select('.product-item')
         for item in item_cards:
@@ -165,7 +163,7 @@ class DynamicRecommender:
                         'price': product_price,
                         'source': 'IGP',
                         'url': 'https://www.igp.com' + product_url,
-                        'description': f"This item is from IGP related to '{product_name}'."
                     })
             except Exception:
                 continue
@@ -173,26 +171,19 @@ class DynamicRecommender:
         return products[:5]
     # ------------------------------------------------------------------
-    # Extract categories from user text using embeddings
     # ------------------------------------------------------------------
-    def _extract_keywords(self, text: str) -> List[str]:
-        """
-        1. Parse out age if present
-        2. Use embeddings to find top 2-3 matching categories
-           from self.candidate_categories.
-        3. Combine them with the age if found.
-        """
-        # 1) Check for age with a regex
         age_match = re.search(r'age\s+(\d+)', text.lower())
         age = age_match.group(1) if age_match else None
-        # 2) Use the entire user text as an embedding
         user_emb = self.model.encode(text, convert_to_tensor=True)
-        # Compute similarity with each candidate category
-        sims = util.cos_sim(user_emb, self.category_embeddings)[0]  # shape: [num_categories]
-        # Grab top 3 indices
-        top_k = min(3, len(self.candidate_categories))
         top_results = torch.topk(sims, k=top_k)
         best_categories = []
@@ -202,7 +193,7 @@ class DynamicRecommender:
                 cat_text = f"{cat_text} for {age} year old"
             best_categories.append(cat_text)
-        print("Embedding-based categories:", best_categories)
         return best_categories
     # ------------------------------------------------------------------
@@ -210,25 +201,23 @@ class DynamicRecommender:
     # ------------------------------------------------------------------
     async def get_recommendations(self, text: str) -> List[Dict]:
         """
-        Search across Amazon, Flipkart, and IGP based on the top category matches.
         """
         try:
-            # 1) Figure out best categories (queries) from user text
-            queries = self._extract_keywords(text)
-            # 2) Search each site for each query
             all_products = []
-            for query in queries:
-                # For each query, hit Amazon, Flipkart, IGP
-                amazon_products = await self.search_amazon(query)
-                flipkart_products = await self.search_flipkart(query)
-                igp_products = await self.search_igp(query)
-                all_products.extend(amazon_products)
-                all_products.extend(flipkart_products)
-                all_products.extend(igp_products)
-            # 3) De‐duplicate by product name
             seen = set()
             unique_products = []
             for product in all_products:
@@ -236,9 +225,8 @@ class DynamicRecommender:
                     seen.add(product['name'])
                     unique_products.append(product)
-            # 4) Optionally slice or sort further
             return unique_products[:5]
         except Exception as e:
-            print(f"Error in recommendations: {str(e)}")
-            return []

                 'Chrome/100.0.4896.75 Safari/537.36'
             )
         }
+        # Load SentenceTransformer for embedding-based recommendations
         self.model = SentenceTransformer('all-mpnet-base-v2')
+        # Pre‐define broad candidate categories. Adjust to your needs.
         self.candidate_categories = [
             "tech gadgets",
             "programming books",
             "music instruments",
             "cooking utensils",
             "cookbooks",
+            "art and painting supplies",  # covers user "art" interest
             "home decor",
             "pet supplies",
             "novels",
             "toys",
             "gift hamper"
         ]
+        # Pre‐encode category texts
         self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
     # ------------------------------------------------------------------
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # (Might need to tweak if Amazon changes HTML)
         search_items = soup.select('.s-result-item')
         for item in search_items:
                         'price': product_price,
                         'source': 'Amazon',
                         'url': 'https://www.amazon.in' + product_url,
+                        'description': f"From Amazon: {product_name}"
                     })
             except Exception:
                 continue
     def _parse_flipkart_results(self, html: str) -> List[Dict]:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # (Might need to tweak if Flipkart changes HTML)
         item_cards = soup.select('._1AtVbE')
         for item in item_cards:
                         'price': product_price,
                         'source': 'Flipkart',
                         'url': 'https://www.flipkart.com' + product_url,
+                        'description': f"From Flipkart: {product_name}"
                     })
             except Exception:
                 continue
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # (Likely need to tweak if IGP changes HTML)
         item_cards = soup.select('.product-item')
         for item in item_cards:
                         'price': product_price,
                         'source': 'IGP',
                         'url': 'https://www.igp.com' + product_url,
+                        'description': f"From IGP: {product_name}"
                     })
             except Exception:
                 continue
         return products[:5]
     # ------------------------------------------------------------------
+    # Embedding-based category extraction
     # ------------------------------------------------------------------
+    def _extract_categories(self, text: str) -> List[str]:
+        # 1. Check for age with a regex
         age_match = re.search(r'age\s+(\d+)', text.lower())
         age = age_match.group(1) if age_match else None
+        # 2. Encode user text
         user_emb = self.model.encode(text, convert_to_tensor=True)
+        # 3. Cosine similarity with candidate categories
+        sims = util.cos_sim(user_emb, self.category_embeddings)[0]
+        top_k = min(3, len(self.candidate_categories))  # pick top 3
         top_results = torch.topk(sims, k=top_k)
         best_categories = []
                 cat_text = f"{cat_text} for {age} year old"
             best_categories.append(cat_text)
+        print("Top categories chosen via embeddings:", best_categories)
         return best_categories
     # ------------------------------------------------------------------
     # ------------------------------------------------------------------
     async def get_recommendations(self, text: str) -> List[Dict]:
         """
+        Search across Amazon, Flipkart, IGP based on top embedding matches,
+        then deduplicate, then return final list.
         """
         try:
+            # 1) Get top matching categories from user text
+            categories = self._extract_categories(text)
+            # 2) For each category, search across sites
             all_products = []
+            for c in categories:
+                amazon_products = await self.search_amazon(c)
+                flipkart_products = await self.search_flipkart(c)
+                igp_products = await self.search_igp(c)
+                all_products.extend(amazon_products + flipkart_products + igp_products)
+            # 3) Deduplicate
             seen = set()
             unique_products = []
             for product in all_products:
                     seen.add(product['name'])
                     unique_products.append(product)
             return unique_products[:5]
         except Exception as e:
+            print(f"Error in get_recommendations: {str(e)}")
+            return []