Spaces:

noddysnots
/

Gift-Recommender

Running

App Files Files Community

noddysnots commited on Jan 31

Commit

cc2fcc8

verified ·

1 Parent(s): 0f58622

Update product_recommender.py

Browse files

Files changed (1) hide show

product_recommender.py +75 -73

product_recommender.py CHANGED Viewed

@@ -2,9 +2,9 @@ from typing import Dict, List
 import aiohttp
 import asyncio
 import re
 from bs4 import BeautifulSoup
-from sentence_transformers import SentenceTransformer
-import numpy as np
 class DynamicRecommender:
     def __init__(self):
@@ -15,17 +15,41 @@ class DynamicRecommender:
                 'Chrome/100.0.4896.75 Safari/537.36'
             )
         }
-        # Load your model if you need it for further logic
         self.model = SentenceTransformer('all-mpnet-base-v2')
     # ------------------------------------------------------------------
     # Amazon search
     # ------------------------------------------------------------------
     async def search_amazon(self, query: str) -> List[Dict]:
-        """
-        Search Amazon for products by building the search URL
-        and parsing the resulting HTML.
-        """
         print(f"Searching Amazon for: {query}")
         search_url = f"https://www.amazon.in/s?k={query}"
         async with aiohttp.ClientSession() as session:
@@ -39,7 +63,7 @@ class DynamicRecommender:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        # These selectors may need updating if Amazon changes their HTML
         search_items = soup.select('.s-result-item')
         for item in search_items:
@@ -47,32 +71,27 @@ class DynamicRecommender:
                 name_elem = item.select_one('.a-text-normal')
                 price_elem = item.select_one('.a-price-whole')
                 link_elem = item.select_one('a.a-link-normal')
                 if name_elem and price_elem and link_elem:
                     product_name = name_elem.get_text(strip=True)
                     product_price = price_elem.get_text(strip=True)
                     product_url = link_elem.get('href')
                     products.append({
                         'name': product_name,
                         'price': product_price,
                         'source': 'Amazon',
                         'url': 'https://www.amazon.in' + product_url,
-                        'description': 'Leadership/novel recommendation from Amazon'
                     })
             except Exception:
                 continue
-        print(f"Found {len(products)} Amazon products.")
         return products[:5]
     # ------------------------------------------------------------------
     # Flipkart search
     # ------------------------------------------------------------------
     async def search_flipkart(self, query: str) -> List[Dict]:
-        """
-        Search Flipkart for products.
-        """
         print(f"Searching Flipkart for: {query}")
         search_url = f"https://www.flipkart.com/search?q={query}"
         async with aiohttp.ClientSession() as session:
@@ -86,7 +105,7 @@ class DynamicRecommender:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        # These selectors may need updating if Flipkart changes their HTML
         item_cards = soup.select('._1AtVbE')
         for item in item_cards:
@@ -94,33 +113,27 @@ class DynamicRecommender:
                 name_elem = item.select_one('._4rR01T')
                 price_elem = item.select_one('._30jeq3')
                 link_elem = item.select_one('a')
                 if name_elem and price_elem and link_elem:
                     product_name = name_elem.get_text(strip=True)
                     product_price = price_elem.get_text(strip=True)
                     product_url = link_elem.get('href')
                     products.append({
                         'name': product_name,
                         'price': product_price,
                         'source': 'Flipkart',
                         'url': 'https://www.flipkart.com' + product_url,
-                        'description': 'Leadership/novel recommendation from Flipkart'
                     })
             except Exception:
                 continue
-        print(f"Found {len(products)} Flipkart products.")
         return products[:5]
     # ------------------------------------------------------------------
-    # IGP search (example approach; may need updating)
     # ------------------------------------------------------------------
     async def search_igp(self, query: str) -> List[Dict]:
-        """
-        Search IGP for products (gift store).
-        Adjust the selectors or approach as needed.
-        """
         print(f"Searching IGP for: {query}")
         search_url = f"https://www.igp.com/search/{query}"
         async with aiohttp.ClientSession() as session:
@@ -134,8 +147,7 @@ class DynamicRecommender:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
-        # You must figure out correct selectors for IGP
-        # This is just an *example*; may not match actual IGP HTML
         item_cards = soup.select('.product-item')
         for item in item_cards:
@@ -143,86 +155,80 @@ class DynamicRecommender:
                 name_elem = item.select_one('.product-title')
                 price_elem = item.select_one('.product-price')
                 link_elem = item.select_one('a')
                 if name_elem and price_elem and link_elem:
                     product_name = name_elem.get_text(strip=True)
                     product_price = price_elem.get_text(strip=True)
                     product_url = link_elem.get('href')
                     products.append({
                         'name': product_name,
                         'price': product_price,
                         'source': 'IGP',
                         'url': 'https://www.igp.com' + product_url,
-                        'description': 'Gift idea from IGP'
                     })
             except Exception:
                 continue
-        print(f"Found {len(products)} IGP products.")
         return products[:5]
     # ------------------------------------------------------------------
-    # Extract keywords / fallback
     # ------------------------------------------------------------------
     def _extract_keywords(self, text: str) -> List[str]:
         """
-        Extract relevant search keywords from input text.
-        You can expand these rules or use the entire text as fallback.
         """
-        text_lower = text.lower()
-        # Try to find age
-        age_match = re.search(r'age\s+(\d+)', text_lower)
         age = age_match.group(1) if age_match else None
-        interests = []
-        # Some sample rules
-        if 'software' in text_lower or 'engineer' in text_lower:
-            interests.extend(['programming books', 'tech gadgets'])
-        if 'books' in text_lower:
-            interests.append('books')
-        if 'novel' in text_lower or 'leader' in text_lower or 'leadership' in text_lower:
-            interests.append('leadership novels')
-        if 'successful' in text_lower:
-            interests.extend(['self help books', 'business books'])
-        # If we found no interests at all, fallback to using the entire text
-        if not interests:
-            interests.append(text)
-        # Optionally add "for 25 year old" context if age is found
-        if age:
-            # You can decide how exactly you want to incorporate age
-            interests = [f"{interest} for {age} year old" for interest in interests]
-        print("Extracted keywords:", interests)
-        return interests
     # ------------------------------------------------------------------
     # Main recommendations
     # ------------------------------------------------------------------
     async def get_recommendations(self, text: str) -> List[Dict]:
         """
-        Get personalized recommendations from Amazon, Flipkart, and IGP.
         """
         try:
-            # Step 1: Extract keywords from user input
-            keywords = self._extract_keywords(text)
-            # Step 2: Search across multiple sources
             all_products = []
-            for keyword in keywords:
-                amazon_products = await self.search_amazon(keyword)
-                flipkart_products = await self.search_flipkart(keyword)
-                igp_products = await self.search_igp(keyword)  # new
                 all_products.extend(amazon_products)
                 all_products.extend(flipkart_products)
                 all_products.extend(igp_products)
-            # Step 3: De-duplicate by product name
             seen = set()
             unique_products = []
             for product in all_products:
@@ -230,12 +236,8 @@ class DynamicRecommender:
                     seen.add(product['name'])
                     unique_products.append(product)
-            # Step 4: Optionally, sort by "relevance" if desired
-            # For now, we just slice the first five
-            final_results = unique_products[:5]
-            print(f"Returning {len(final_results)} products.")
-            return final_results
         except Exception as e:
             print(f"Error in recommendations: {str(e)}")

 import aiohttp
 import asyncio
 import re
+import torch
+from sentence_transformers import SentenceTransformer, util
 from bs4 import BeautifulSoup
 class DynamicRecommender:
     def __init__(self):
                 'Chrome/100.0.4896.75 Safari/537.36'
             )
         }
+        # Load your model
         self.model = SentenceTransformer('all-mpnet-base-v2')
+        # Pre‐define some candidate categories you might want to search for.
+        # Adjust these to suit your domain. The more you add, the more "general"
+        # your coverage becomes. They can be as broad or as niche as you like.
+        self.candidate_categories = [
+            "tech gadgets",
+            "programming books",
+            "self help books",
+            "business books",
+            "leadership novels",
+            "fashion accessories",
+            "beauty products",
+            "board games",
+            "music instruments",
+            "cooking utensils",
+            "cookbooks",
+            "art and painting supplies",
+            "home decor",
+            "pet supplies",
+            "novels",
+            "gaming consoles",
+            "smartphones",
+            "camera gear",
+            "toys",
+            "gift hamper"
+        ]
+        # Pre‐encode those categories for faster scoring.
+        self.category_embeddings = self.model.encode(self.candidate_categories, convert_to_tensor=True)
     # ------------------------------------------------------------------
     # Amazon search
     # ------------------------------------------------------------------
     async def search_amazon(self, query: str) -> List[Dict]:
         print(f"Searching Amazon for: {query}")
         search_url = f"https://www.amazon.in/s?k={query}"
         async with aiohttp.ClientSession() as session:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # These selectors may need updating if Amazon changes HTML
         search_items = soup.select('.s-result-item')
         for item in search_items:
                 name_elem = item.select_one('.a-text-normal')
                 price_elem = item.select_one('.a-price-whole')
                 link_elem = item.select_one('a.a-link-normal')
                 if name_elem and price_elem and link_elem:
                     product_name = name_elem.get_text(strip=True)
                     product_price = price_elem.get_text(strip=True)
                     product_url = link_elem.get('href')
                     products.append({
                         'name': product_name,
                         'price': product_price,
                         'source': 'Amazon',
                         'url': 'https://www.amazon.in' + product_url,
+                        'description': f"This item is from Amazon related to '{product_name}'."
                     })
             except Exception:
                 continue
         return products[:5]
     # ------------------------------------------------------------------
     # Flipkart search
     # ------------------------------------------------------------------
     async def search_flipkart(self, query: str) -> List[Dict]:
         print(f"Searching Flipkart for: {query}")
         search_url = f"https://www.flipkart.com/search?q={query}"
         async with aiohttp.ClientSession() as session:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # These selectors may need updating if Flipkart changes HTML
         item_cards = soup.select('._1AtVbE')
         for item in item_cards:
                 name_elem = item.select_one('._4rR01T')
                 price_elem = item.select_one('._30jeq3')
                 link_elem = item.select_one('a')
                 if name_elem and price_elem and link_elem:
                     product_name = name_elem.get_text(strip=True)
                     product_price = price_elem.get_text(strip=True)
                     product_url = link_elem.get('href')
                     products.append({
                         'name': product_name,
                         'price': product_price,
                         'source': 'Flipkart',
                         'url': 'https://www.flipkart.com' + product_url,
+                        'description': f"This item is from Flipkart related to '{product_name}'."
                     })
             except Exception:
                 continue
         return products[:5]
     # ------------------------------------------------------------------
+    # IGP search
     # ------------------------------------------------------------------
     async def search_igp(self, query: str) -> List[Dict]:
         print(f"Searching IGP for: {query}")
         search_url = f"https://www.igp.com/search/{query}"
         async with aiohttp.ClientSession() as session:
         soup = BeautifulSoup(html, 'html.parser')
         products = []
+        # Likely need to update based on actual IGP HTML
         item_cards = soup.select('.product-item')
         for item in item_cards:
                 name_elem = item.select_one('.product-title')
                 price_elem = item.select_one('.product-price')
                 link_elem = item.select_one('a')
                 if name_elem and price_elem and link_elem:
                     product_name = name_elem.get_text(strip=True)
                     product_price = price_elem.get_text(strip=True)
                     product_url = link_elem.get('href')
                     products.append({
                         'name': product_name,
                         'price': product_price,
                         'source': 'IGP',
                         'url': 'https://www.igp.com' + product_url,
+                        'description': f"This item is from IGP related to '{product_name}'."
                     })
             except Exception:
                 continue
         return products[:5]
     # ------------------------------------------------------------------
+    # Extract categories from user text using embeddings
     # ------------------------------------------------------------------
     def _extract_keywords(self, text: str) -> List[str]:
         """
+        1. Parse out age if present
+        2. Use embeddings to find top 2-3 matching categories
+           from self.candidate_categories.
+        3. Combine them with the age if found.
         """
+        # 1) Check for age with a regex
+        age_match = re.search(r'age\s+(\d+)', text.lower())
         age = age_match.group(1) if age_match else None
+        # 2) Use the entire user text as an embedding
+        user_emb = self.model.encode(text, convert_to_tensor=True)
+        # Compute similarity with each candidate category
+        sims = util.cos_sim(user_emb, self.category_embeddings)[0]  # shape: [num_categories]
+        # Grab top 3 indices
+        top_k = min(3, len(self.candidate_categories))
+        top_results = torch.topk(sims, k=top_k)
+        best_categories = []
+        for idx in top_results.indices:
+            cat_text = self.candidate_categories[idx]
+            if age:
+                cat_text = f"{cat_text} for {age} year old"
+            best_categories.append(cat_text)
+        print("Embedding-based categories:", best_categories)
+        return best_categories
     # ------------------------------------------------------------------
     # Main recommendations
     # ------------------------------------------------------------------
     async def get_recommendations(self, text: str) -> List[Dict]:
         """
+        Search across Amazon, Flipkart, and IGP based on the top category matches.
         """
         try:
+            # 1) Figure out best categories (queries) from user text
+            queries = self._extract_keywords(text)
+            # 2) Search each site for each query
             all_products = []
+            for query in queries:
+                # For each query, hit Amazon, Flipkart, IGP
+                amazon_products = await self.search_amazon(query)
+                flipkart_products = await self.search_flipkart(query)
+                igp_products = await self.search_igp(query)
                 all_products.extend(amazon_products)
                 all_products.extend(flipkart_products)
                 all_products.extend(igp_products)
+            # 3) De‐duplicate by product name
             seen = set()
             unique_products = []
             for product in all_products:
                     seen.add(product['name'])
                     unique_products.append(product)
+            # 4) Optionally slice or sort further
+            return unique_products[:5]
         except Exception as e:
             print(f"Error in recommendations: {str(e)}")