Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	Upload 8 files
Browse files- src/__init__.py +2 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/__pycache__/recommender.cpython-313.pyc +0 -0
- src/recommender.py +95 -0
- src/utils/__init__.py +13 -0
- src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- src/utils/__pycache__/text_processors.cpython-313.pyc +0 -0
- src/utils/text_processors.py +122 -0
    	
        src/__init__.py
    ADDED
    
    | @@ -0,0 +1,2 @@ | |
|  | |
|  | 
|  | |
| 1 | 
            +
            __version__ = '0.1.0'
         | 
| 2 | 
            +
            __author__ = 'Your Name'
         | 
    	
        src/__pycache__/__init__.cpython-313.pyc
    ADDED
    
    | Binary file (204 Bytes). View file | 
|  | 
    	
        src/__pycache__/recommender.cpython-313.pyc
    ADDED
    
    | Binary file (4.08 kB). View file | 
|  | 
    	
        src/recommender.py
    ADDED
    
    | @@ -0,0 +1,95 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # recommender.py
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from transformers import pipeline
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            zero_shot = pipeline(
         | 
| 6 | 
            +
                "zero-shot-classification", 
         | 
| 7 | 
            +
                model="typeform/distilbert-base-uncased-mnli",
         | 
| 8 | 
            +
                device=-1
         | 
| 9 | 
            +
            )
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            def classify(text, labels):
         | 
| 12 | 
            +
                """Perform zero-shot classification with proper label mapping."""
         | 
| 13 | 
            +
                result = zero_shot(text, labels, multi_label=True)
         | 
| 14 | 
            +
                return [
         | 
| 15 | 
            +
                    {"label": res, "score": score} 
         | 
| 16 | 
            +
                    for res, score in zip(result["labels"], result["scores"])
         | 
| 17 | 
            +
                ]
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            from .utils.text_processors import (
         | 
| 21 | 
            +
                extract_age,
         | 
| 22 | 
            +
                extract_gender,
         | 
| 23 | 
            +
                extract_interests,
         | 
| 24 | 
            +
                extract_dislikes
         | 
| 25 | 
            +
            )
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            class GiftRecommender:
         | 
| 28 | 
            +
                
         | 
| 29 | 
            +
                def __init__(self):
         | 
| 30 | 
            +
                    self.zero_shot = pipeline("zero-shot-classification")
         | 
| 31 | 
            +
                    self.sentiment = pipeline("sentiment-analysis")
         | 
| 32 | 
            +
                    
         | 
| 33 | 
            +
                    # List of possible interest categories
         | 
| 34 | 
            +
                    self.interest_categories = [
         | 
| 35 | 
            +
                        "art", "music", "sports", "technology", "reading",
         | 
| 36 | 
            +
                        "travel", "cooking", "gaming", "fashion", "outdoor activities"
         | 
| 37 | 
            +
                    ]
         | 
| 38 | 
            +
                    
         | 
| 39 | 
            +
                    # Pre-defined gift suggestions for each category
         | 
| 40 | 
            +
                    self.gift_rules = {
         | 
| 41 | 
            +
                        "art": ["art supplies set", "digital drawing tablet", "museum membership"],
         | 
| 42 | 
            +
                        "music": ["wireless headphones", "concert tickets", "vinyl records"],
         | 
| 43 | 
            +
                        "sports": ["fitness tracker", "sports equipment", "team merchandise"],
         | 
| 44 | 
            +
                        "technology": ["smart devices", "electronics", "tech gadgets"],
         | 
| 45 | 
            +
                        "gaming": ["gaming console", "gaming accessories", "game subscription"],
         | 
| 46 | 
            +
                        "travel": ["travel gear", "language courses", "travel guides"],
         | 
| 47 | 
            +
                        "reading": ["e-reader", "book subscription", "rare books"],
         | 
| 48 | 
            +
                        "cooking": ["cooking classes", "kitchen gadgets", "recipe books"]
         | 
| 49 | 
            +
                    }
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                def get_gift_recommendations(self, text: str):
         | 
| 52 | 
            +
                    # Build the user's profile from the text
         | 
| 53 | 
            +
                    profile = {
         | 
| 54 | 
            +
                        'age': extract_age(text),
         | 
| 55 | 
            +
                        'gender': extract_gender(text),
         | 
| 56 | 
            +
                        'interests': extract_interests(text, self.interest_categories),
         | 
| 57 | 
            +
                        'dislikes': extract_dislikes(text)
         | 
| 58 | 
            +
                    }
         | 
| 59 | 
            +
                    
         | 
| 60 | 
            +
                    # Match each extracted interest to possible gift ideas
         | 
| 61 | 
            +
                    recommendations = []
         | 
| 62 | 
            +
                    for interest in profile['interests']:
         | 
| 63 | 
            +
                        cat = interest['category']
         | 
| 64 | 
            +
                        if cat in self.gift_rules:
         | 
| 65 | 
            +
                            for gift in self.gift_rules[cat]:
         | 
| 66 | 
            +
                                recommendations.append({
         | 
| 67 | 
            +
                                    'gift': gift,
         | 
| 68 | 
            +
                                    'category': cat,
         | 
| 69 | 
            +
                                    'reason': f"Based on interest in {interest['phrase']}"
         | 
| 70 | 
            +
                                })
         | 
| 71 | 
            +
                    
         | 
| 72 | 
            +
                    # Limit to top 5 for demonstration
         | 
| 73 | 
            +
                    return {'profile': profile, 'recommendations': recommendations[:5]}
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                def format_recommendations(self, results: dict) -> str:
         | 
| 76 | 
            +
                    output = []
         | 
| 77 | 
            +
                    output.append("π Gift Recommendations\n")
         | 
| 78 | 
            +
                    
         | 
| 79 | 
            +
                    profile = results['profile']
         | 
| 80 | 
            +
                    output.append("Profile Summary:")
         | 
| 81 | 
            +
                    output.append(f"Age: {profile['age'] or 'Unknown'}")
         | 
| 82 | 
            +
                    output.append(f"Gender: {profile['gender'].title()}")
         | 
| 83 | 
            +
                    
         | 
| 84 | 
            +
                    if profile['interests']:
         | 
| 85 | 
            +
                        output.append("Interests: " + ", ".join(i['phrase'] for i in profile['interests']))
         | 
| 86 | 
            +
                    if profile['dislikes']:
         | 
| 87 | 
            +
                        output.append("Dislikes: " + ", ".join(profile['dislikes']))
         | 
| 88 | 
            +
                    
         | 
| 89 | 
            +
                    if results['recommendations']:
         | 
| 90 | 
            +
                        output.append("\nTop Recommendations:")
         | 
| 91 | 
            +
                        for i, rec in enumerate(results['recommendations'], 1):
         | 
| 92 | 
            +
                            output.append(f"{i}. {rec['gift']}")
         | 
| 93 | 
            +
                            output.append(f"   β’ {rec['reason']}")
         | 
| 94 | 
            +
                    
         | 
| 95 | 
            +
                    return "\n".join(output)
         | 
    	
        src/utils/__init__.py
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from .text_processors import (
         | 
| 2 | 
            +
                extract_age,
         | 
| 3 | 
            +
                extract_gender,
         | 
| 4 | 
            +
                extract_interests,
         | 
| 5 | 
            +
                extract_dislikes
         | 
| 6 | 
            +
            )
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            __all__ = [
         | 
| 9 | 
            +
                'extract_age',
         | 
| 10 | 
            +
                'extract_gender',
         | 
| 11 | 
            +
                'extract_interests',
         | 
| 12 | 
            +
                'extract_dislikes'
         | 
| 13 | 
            +
            ]
         | 
    	
        src/utils/__pycache__/__init__.cpython-313.pyc
    ADDED
    
    | Binary file (317 Bytes). View file | 
|  | 
    	
        src/utils/__pycache__/text_processors.cpython-313.pyc
    ADDED
    
    | Binary file (6.01 kB). View file | 
|  | 
    	
        src/utils/text_processors.py
    ADDED
    
    | @@ -0,0 +1,122 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import re
         | 
| 2 | 
            +
            from typing import Dict, List, Optional
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            from transformers import pipeline
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Instantiate your pipelines just once
         | 
| 7 | 
            +
            zero_shot = pipeline("zero-shot-classification")
         | 
| 8 | 
            +
            sentiment = pipeline("sentiment-analysis")
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            def extract_age(text: str) -> Optional[int]:
         | 
| 11 | 
            +
                age_pattern = r'\b(\d{1,2})\s*-?\s*years?\s*-?\s*old\b|\b(\d{1,2})\b'
         | 
| 12 | 
            +
                matches = re.findall(age_pattern, text)
         | 
| 13 | 
            +
                if matches:
         | 
| 14 | 
            +
                    age = next(int(num) for nums in matches for num in nums if num)
         | 
| 15 | 
            +
                    return age if 0 < age < 120 else None
         | 
| 16 | 
            +
                return None
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            def extract_gender(text: str) -> str:
         | 
| 19 | 
            +
                text_lower = text.lower()
         | 
| 20 | 
            +
                gender_indicators = {
         | 
| 21 | 
            +
                    'male': ['he', 'him', 'his', 'brother', 'boyfriend', 'husband', 'son', 'dad', 'father'],
         | 
| 22 | 
            +
                    'female': ['she', 'her', 'hers', 'sister', 'girlfriend', 'wife', 'daughter', 'mom', 'mother']
         | 
| 23 | 
            +
                }
         | 
| 24 | 
            +
                
         | 
| 25 | 
            +
                for gender, indicators in gender_indicators.items():
         | 
| 26 | 
            +
                    if any(f" {indicator} " in f" {text_lower} " for indicator in indicators):
         | 
| 27 | 
            +
                        return gender
         | 
| 28 | 
            +
                return "unknown"
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            def extract_interests(text: str, categories: List[str]) -> List[Dict]:
         | 
| 31 | 
            +
                """
         | 
| 32 | 
            +
                Extracts all interests after verbs like "love(s)", "like(s)", or "enjoy(s)" until we hit
         | 
| 33 | 
            +
                another recognized verb or the end of the text. Then splits on "and"/commas as standalone words,
         | 
| 34 | 
            +
                preserving original casing (so "painting" is recognized properly).
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                Example:
         | 
| 37 | 
            +
                  "She loves painting and enjoys traveling" -> ["painting", "traveling"]
         | 
| 38 | 
            +
                  "She loves art and music" -> ["art", "music"]
         | 
| 39 | 
            +
                """
         | 
| 40 | 
            +
                import re
         | 
| 41 | 
            +
                from transformers import pipeline
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                # Fresh pipelines each call (or you can move these outside)
         | 
| 44 | 
            +
                zero_shot = pipeline("zero-shot-classification")
         | 
| 45 | 
            +
                sentiment = pipeline("sentiment-analysis")
         | 
| 46 | 
            +
                
         | 
| 47 | 
            +
                # Tokenize by any non-whitespace
         | 
| 48 | 
            +
                tokens = re.findall(r"\S+", text)  
         | 
| 49 | 
            +
                n = len(tokens)
         | 
| 50 | 
            +
                
         | 
| 51 | 
            +
                # Recognized verbs (compare lowercased)
         | 
| 52 | 
            +
                verb_set = {"love", "loves", "like", "likes", "enjoy", "enjoys"}
         | 
| 53 | 
            +
                
         | 
| 54 | 
            +
                interests_list = []
         | 
| 55 | 
            +
                seen = set()
         | 
| 56 | 
            +
                
         | 
| 57 | 
            +
                i = 0
         | 
| 58 | 
            +
                while i < n:
         | 
| 59 | 
            +
                    word_lower = tokens[i].lower()
         | 
| 60 | 
            +
                    
         | 
| 61 | 
            +
                    if word_lower in verb_set:
         | 
| 62 | 
            +
                        # Collect subsequent tokens until next verb or end
         | 
| 63 | 
            +
                        j = i + 1
         | 
| 64 | 
            +
                        while j < n and tokens[j].lower() not in verb_set:
         | 
| 65 | 
            +
                            j += 1
         | 
| 66 | 
            +
                        
         | 
| 67 | 
            +
                        # Now tokens i+1..j-1 form the chunk
         | 
| 68 | 
            +
                        chunk_tokens = tokens[i+1 : j]
         | 
| 69 | 
            +
                        if chunk_tokens:
         | 
| 70 | 
            +
                            # e.g. ["painting", "and"]
         | 
| 71 | 
            +
                            chunk_str = " ".join(chunk_tokens)
         | 
| 72 | 
            +
                            
         | 
| 73 | 
            +
                            # Key fix: split on standalone "and" or commas, ignoring case
         | 
| 74 | 
            +
                            sub_parts = re.split(r'\s*,\s*|\s*\band\b\s*', chunk_str, flags=re.IGNORECASE)
         | 
| 75 | 
            +
                            
         | 
| 76 | 
            +
                            for candidate in sub_parts:
         | 
| 77 | 
            +
                                candidate = candidate.strip()
         | 
| 78 | 
            +
                                if candidate and candidate not in seen:
         | 
| 79 | 
            +
                                    seen.add(candidate)
         | 
| 80 | 
            +
                                    
         | 
| 81 | 
            +
                                    # Zero-shot + sentiment
         | 
| 82 | 
            +
                                    z_result = zero_shot(candidate, categories, multi_label=False)
         | 
| 83 | 
            +
                                    s_result = sentiment(candidate)[0]
         | 
| 84 | 
            +
                                    
         | 
| 85 | 
            +
                                    interests_list.append({
         | 
| 86 | 
            +
                                        'phrase': candidate,  # preserve original
         | 
| 87 | 
            +
                                        'category': z_result['labels'][0],
         | 
| 88 | 
            +
                                        'confidence': z_result['scores'][0],
         | 
| 89 | 
            +
                                        'sentiment': s_result['label'],
         | 
| 90 | 
            +
                                        'sentiment_score': s_result['score']
         | 
| 91 | 
            +
                                    })
         | 
| 92 | 
            +
                        
         | 
| 93 | 
            +
                        i = j  # skip forward
         | 
| 94 | 
            +
                    else:
         | 
| 95 | 
            +
                        i += 1
         | 
| 96 | 
            +
                
         | 
| 97 | 
            +
                return interests_list
         | 
| 98 | 
            +
             | 
| 99 | 
            +
            def extract_dislikes(text: str) -> List[str]:
         | 
| 100 | 
            +
                text_lower = text.lower()
         | 
| 101 | 
            +
                dislike_pattern = r'(?:hates|dislikes|(?:doesn\'t|does\s+not)\s+like)\s+([^,.]+?)(?=\s+and\s+|$|,)'
         | 
| 102 | 
            +
                matches = re.findall(dislike_pattern, text_lower)
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                dislikes = []
         | 
| 105 | 
            +
                for match in matches:
         | 
| 106 | 
            +
                    parts = re.split(r'(?:,\s*|\s+and\s+)', match)
         | 
| 107 | 
            +
                    for p in parts:
         | 
| 108 | 
            +
                        cleaned = p.replace("doesn't like ", "").replace("does not like ", "").strip()
         | 
| 109 | 
            +
                        if cleaned:
         | 
| 110 | 
            +
                            dislikes.append(cleaned)
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                return dislikes
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            def format_profile(profile: Dict) -> str:
         | 
| 115 | 
            +
                output = []
         | 
| 116 | 
            +
                output.append("Profile Summary:")
         | 
| 117 | 
            +
                output.append(f"- Age: {profile['age'] or 'Unknown'}")
         | 
| 118 | 
            +
                output.append(f"- Gender: {profile['gender'].title()}")
         | 
| 119 | 
            +
                output.append("- Interests: " + ", ".join(i['phrase'] for i in profile['interests']))
         | 
| 120 | 
            +
                if profile['dislikes']:
         | 
| 121 | 
            +
                    output.append("- Dislikes: " + ", ".join(profile['dislikes']))
         | 
| 122 | 
            +
                return "\n".join(output)
         | 
