Spaces:
Paused
Paused
Upload 8 files
Browse files- src/__init__.py +2 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/__pycache__/recommender.cpython-313.pyc +0 -0
- src/recommender.py +95 -0
- src/utils/__init__.py +13 -0
- src/utils/__pycache__/__init__.cpython-313.pyc +0 -0
- src/utils/__pycache__/text_processors.cpython-313.pyc +0 -0
- src/utils/text_processors.py +122 -0
src/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__version__ = '0.1.0'
|
| 2 |
+
__author__ = 'Your Name'
|
src/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (204 Bytes). View file
|
|
|
src/__pycache__/recommender.cpython-313.pyc
ADDED
|
Binary file (4.08 kB). View file
|
|
|
src/recommender.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# recommender.py
|
| 2 |
+
|
| 3 |
+
from transformers import pipeline
|
| 4 |
+
|
| 5 |
+
zero_shot = pipeline(
|
| 6 |
+
"zero-shot-classification",
|
| 7 |
+
model="typeform/distilbert-base-uncased-mnli",
|
| 8 |
+
device=-1
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
def classify(text, labels):
|
| 12 |
+
"""Perform zero-shot classification with proper label mapping."""
|
| 13 |
+
result = zero_shot(text, labels, multi_label=True)
|
| 14 |
+
return [
|
| 15 |
+
{"label": res, "score": score}
|
| 16 |
+
for res, score in zip(result["labels"], result["scores"])
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
from .utils.text_processors import (
|
| 21 |
+
extract_age,
|
| 22 |
+
extract_gender,
|
| 23 |
+
extract_interests,
|
| 24 |
+
extract_dislikes
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
class GiftRecommender:
|
| 28 |
+
|
| 29 |
+
def __init__(self):
|
| 30 |
+
self.zero_shot = pipeline("zero-shot-classification")
|
| 31 |
+
self.sentiment = pipeline("sentiment-analysis")
|
| 32 |
+
|
| 33 |
+
# List of possible interest categories
|
| 34 |
+
self.interest_categories = [
|
| 35 |
+
"art", "music", "sports", "technology", "reading",
|
| 36 |
+
"travel", "cooking", "gaming", "fashion", "outdoor activities"
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
# Pre-defined gift suggestions for each category
|
| 40 |
+
self.gift_rules = {
|
| 41 |
+
"art": ["art supplies set", "digital drawing tablet", "museum membership"],
|
| 42 |
+
"music": ["wireless headphones", "concert tickets", "vinyl records"],
|
| 43 |
+
"sports": ["fitness tracker", "sports equipment", "team merchandise"],
|
| 44 |
+
"technology": ["smart devices", "electronics", "tech gadgets"],
|
| 45 |
+
"gaming": ["gaming console", "gaming accessories", "game subscription"],
|
| 46 |
+
"travel": ["travel gear", "language courses", "travel guides"],
|
| 47 |
+
"reading": ["e-reader", "book subscription", "rare books"],
|
| 48 |
+
"cooking": ["cooking classes", "kitchen gadgets", "recipe books"]
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
def get_gift_recommendations(self, text: str):
|
| 52 |
+
# Build the user's profile from the text
|
| 53 |
+
profile = {
|
| 54 |
+
'age': extract_age(text),
|
| 55 |
+
'gender': extract_gender(text),
|
| 56 |
+
'interests': extract_interests(text, self.interest_categories),
|
| 57 |
+
'dislikes': extract_dislikes(text)
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
# Match each extracted interest to possible gift ideas
|
| 61 |
+
recommendations = []
|
| 62 |
+
for interest in profile['interests']:
|
| 63 |
+
cat = interest['category']
|
| 64 |
+
if cat in self.gift_rules:
|
| 65 |
+
for gift in self.gift_rules[cat]:
|
| 66 |
+
recommendations.append({
|
| 67 |
+
'gift': gift,
|
| 68 |
+
'category': cat,
|
| 69 |
+
'reason': f"Based on interest in {interest['phrase']}"
|
| 70 |
+
})
|
| 71 |
+
|
| 72 |
+
# Limit to top 5 for demonstration
|
| 73 |
+
return {'profile': profile, 'recommendations': recommendations[:5]}
|
| 74 |
+
|
| 75 |
+
def format_recommendations(self, results: dict) -> str:
|
| 76 |
+
output = []
|
| 77 |
+
output.append("π Gift Recommendations\n")
|
| 78 |
+
|
| 79 |
+
profile = results['profile']
|
| 80 |
+
output.append("Profile Summary:")
|
| 81 |
+
output.append(f"Age: {profile['age'] or 'Unknown'}")
|
| 82 |
+
output.append(f"Gender: {profile['gender'].title()}")
|
| 83 |
+
|
| 84 |
+
if profile['interests']:
|
| 85 |
+
output.append("Interests: " + ", ".join(i['phrase'] for i in profile['interests']))
|
| 86 |
+
if profile['dislikes']:
|
| 87 |
+
output.append("Dislikes: " + ", ".join(profile['dislikes']))
|
| 88 |
+
|
| 89 |
+
if results['recommendations']:
|
| 90 |
+
output.append("\nTop Recommendations:")
|
| 91 |
+
for i, rec in enumerate(results['recommendations'], 1):
|
| 92 |
+
output.append(f"{i}. {rec['gift']}")
|
| 93 |
+
output.append(f" β’ {rec['reason']}")
|
| 94 |
+
|
| 95 |
+
return "\n".join(output)
|
src/utils/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .text_processors import (
|
| 2 |
+
extract_age,
|
| 3 |
+
extract_gender,
|
| 4 |
+
extract_interests,
|
| 5 |
+
extract_dislikes
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
__all__ = [
|
| 9 |
+
'extract_age',
|
| 10 |
+
'extract_gender',
|
| 11 |
+
'extract_interests',
|
| 12 |
+
'extract_dislikes'
|
| 13 |
+
]
|
src/utils/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (317 Bytes). View file
|
|
|
src/utils/__pycache__/text_processors.cpython-313.pyc
ADDED
|
Binary file (6.01 kB). View file
|
|
|
src/utils/text_processors.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
from typing import Dict, List, Optional
|
| 3 |
+
|
| 4 |
+
from transformers import pipeline
|
| 5 |
+
|
| 6 |
+
# Instantiate your pipelines just once
|
| 7 |
+
zero_shot = pipeline("zero-shot-classification")
|
| 8 |
+
sentiment = pipeline("sentiment-analysis")
|
| 9 |
+
|
| 10 |
+
def extract_age(text: str) -> Optional[int]:
|
| 11 |
+
age_pattern = r'\b(\d{1,2})\s*-?\s*years?\s*-?\s*old\b|\b(\d{1,2})\b'
|
| 12 |
+
matches = re.findall(age_pattern, text)
|
| 13 |
+
if matches:
|
| 14 |
+
age = next(int(num) for nums in matches for num in nums if num)
|
| 15 |
+
return age if 0 < age < 120 else None
|
| 16 |
+
return None
|
| 17 |
+
|
| 18 |
+
def extract_gender(text: str) -> str:
|
| 19 |
+
text_lower = text.lower()
|
| 20 |
+
gender_indicators = {
|
| 21 |
+
'male': ['he', 'him', 'his', 'brother', 'boyfriend', 'husband', 'son', 'dad', 'father'],
|
| 22 |
+
'female': ['she', 'her', 'hers', 'sister', 'girlfriend', 'wife', 'daughter', 'mom', 'mother']
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
for gender, indicators in gender_indicators.items():
|
| 26 |
+
if any(f" {indicator} " in f" {text_lower} " for indicator in indicators):
|
| 27 |
+
return gender
|
| 28 |
+
return "unknown"
|
| 29 |
+
|
| 30 |
+
def extract_interests(text: str, categories: List[str]) -> List[Dict]:
|
| 31 |
+
"""
|
| 32 |
+
Extracts all interests after verbs like "love(s)", "like(s)", or "enjoy(s)" until we hit
|
| 33 |
+
another recognized verb or the end of the text. Then splits on "and"/commas as standalone words,
|
| 34 |
+
preserving original casing (so "painting" is recognized properly).
|
| 35 |
+
|
| 36 |
+
Example:
|
| 37 |
+
"She loves painting and enjoys traveling" -> ["painting", "traveling"]
|
| 38 |
+
"She loves art and music" -> ["art", "music"]
|
| 39 |
+
"""
|
| 40 |
+
import re
|
| 41 |
+
from transformers import pipeline
|
| 42 |
+
|
| 43 |
+
# Fresh pipelines each call (or you can move these outside)
|
| 44 |
+
zero_shot = pipeline("zero-shot-classification")
|
| 45 |
+
sentiment = pipeline("sentiment-analysis")
|
| 46 |
+
|
| 47 |
+
# Tokenize by any non-whitespace
|
| 48 |
+
tokens = re.findall(r"\S+", text)
|
| 49 |
+
n = len(tokens)
|
| 50 |
+
|
| 51 |
+
# Recognized verbs (compare lowercased)
|
| 52 |
+
verb_set = {"love", "loves", "like", "likes", "enjoy", "enjoys"}
|
| 53 |
+
|
| 54 |
+
interests_list = []
|
| 55 |
+
seen = set()
|
| 56 |
+
|
| 57 |
+
i = 0
|
| 58 |
+
while i < n:
|
| 59 |
+
word_lower = tokens[i].lower()
|
| 60 |
+
|
| 61 |
+
if word_lower in verb_set:
|
| 62 |
+
# Collect subsequent tokens until next verb or end
|
| 63 |
+
j = i + 1
|
| 64 |
+
while j < n and tokens[j].lower() not in verb_set:
|
| 65 |
+
j += 1
|
| 66 |
+
|
| 67 |
+
# Now tokens i+1..j-1 form the chunk
|
| 68 |
+
chunk_tokens = tokens[i+1 : j]
|
| 69 |
+
if chunk_tokens:
|
| 70 |
+
# e.g. ["painting", "and"]
|
| 71 |
+
chunk_str = " ".join(chunk_tokens)
|
| 72 |
+
|
| 73 |
+
# Key fix: split on standalone "and" or commas, ignoring case
|
| 74 |
+
sub_parts = re.split(r'\s*,\s*|\s*\band\b\s*', chunk_str, flags=re.IGNORECASE)
|
| 75 |
+
|
| 76 |
+
for candidate in sub_parts:
|
| 77 |
+
candidate = candidate.strip()
|
| 78 |
+
if candidate and candidate not in seen:
|
| 79 |
+
seen.add(candidate)
|
| 80 |
+
|
| 81 |
+
# Zero-shot + sentiment
|
| 82 |
+
z_result = zero_shot(candidate, categories, multi_label=False)
|
| 83 |
+
s_result = sentiment(candidate)[0]
|
| 84 |
+
|
| 85 |
+
interests_list.append({
|
| 86 |
+
'phrase': candidate, # preserve original
|
| 87 |
+
'category': z_result['labels'][0],
|
| 88 |
+
'confidence': z_result['scores'][0],
|
| 89 |
+
'sentiment': s_result['label'],
|
| 90 |
+
'sentiment_score': s_result['score']
|
| 91 |
+
})
|
| 92 |
+
|
| 93 |
+
i = j # skip forward
|
| 94 |
+
else:
|
| 95 |
+
i += 1
|
| 96 |
+
|
| 97 |
+
return interests_list
|
| 98 |
+
|
| 99 |
+
def extract_dislikes(text: str) -> List[str]:
|
| 100 |
+
text_lower = text.lower()
|
| 101 |
+
dislike_pattern = r'(?:hates|dislikes|(?:doesn\'t|does\s+not)\s+like)\s+([^,.]+?)(?=\s+and\s+|$|,)'
|
| 102 |
+
matches = re.findall(dislike_pattern, text_lower)
|
| 103 |
+
|
| 104 |
+
dislikes = []
|
| 105 |
+
for match in matches:
|
| 106 |
+
parts = re.split(r'(?:,\s*|\s+and\s+)', match)
|
| 107 |
+
for p in parts:
|
| 108 |
+
cleaned = p.replace("doesn't like ", "").replace("does not like ", "").strip()
|
| 109 |
+
if cleaned:
|
| 110 |
+
dislikes.append(cleaned)
|
| 111 |
+
|
| 112 |
+
return dislikes
|
| 113 |
+
|
| 114 |
+
def format_profile(profile: Dict) -> str:
|
| 115 |
+
output = []
|
| 116 |
+
output.append("Profile Summary:")
|
| 117 |
+
output.append(f"- Age: {profile['age'] or 'Unknown'}")
|
| 118 |
+
output.append(f"- Gender: {profile['gender'].title()}")
|
| 119 |
+
output.append("- Interests: " + ", ".join(i['phrase'] for i in profile['interests']))
|
| 120 |
+
if profile['dislikes']:
|
| 121 |
+
output.append("- Dislikes: " + ", ".join(profile['dislikes']))
|
| 122 |
+
return "\n".join(output)
|