Spaces:
Runtime error
Runtime error
from pydantic import BaseModel | |
from typing import List, Dict, Self | |
from bs4 import BeautifulSoup | |
import re | |
import feedparser | |
from tqdm import tqdm | |
import requests | |
import time | |
feeds = [ | |
"https://www.dealnews.com/c142/Electronics/?rss=1", | |
"https://www.dealnews.com/c39/Computers/?rss=1", | |
"https://www.dealnews.com/c238/Automotive/?rss=1", | |
"https://www.dealnews.com/f1912/Smart-Home/?rss=1", | |
"https://www.dealnews.com/c196/Home-Garden/?rss=1", | |
] | |
def extract(html_snippet: str) -> str: | |
""" | |
Use Beautiful Soup to clean up this HTML snippet and extract useful text | |
""" | |
soup = BeautifulSoup(html_snippet, 'html.parser') | |
snippet_div = soup.find('div', class_='snippet summary') | |
if snippet_div: | |
description = snippet_div.get_text(strip=True) | |
description = BeautifulSoup(description, 'html.parser').get_text() | |
description = re.sub('<[^<]+?>', '', description) | |
result = description.strip() | |
else: | |
result = html_snippet | |
return result.replace('\n', ' ') | |
class ScrapedDeal: | |
""" | |
A class to represent a Deal retrieved from an RSS feed | |
""" | |
category: str | |
title: str | |
summary: str | |
url: str | |
details: str | |
features: str | |
def __init__(self, entry: Dict[str, str]): | |
""" | |
Populate this instance based on the provided dict | |
""" | |
self.title = entry['title'] | |
self.summary = extract(entry['summary']) | |
self.url = entry['links'][0]['href'] | |
stuff = requests.get(self.url).content | |
soup = BeautifulSoup(stuff, 'html.parser') | |
content = soup.find('div', class_='content-section').get_text() | |
content = content.replace('\nmore', '').replace('\n', ' ') | |
if "Features" in content: | |
self.details, self.features = content.split("Features") | |
else: | |
self.details = content | |
self.features = "" | |
def __repr__(self): | |
""" | |
Return a string to describe this deal | |
""" | |
return f"<{self.title}>" | |
def describe(self): | |
""" | |
Return a longer string to describe this deal for use in calling a model | |
""" | |
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}" | |
def fetch(cls, show_progress : bool = False) -> List[Self]: | |
""" | |
Retrieve all deals from the selected RSS feeds | |
""" | |
deals = [] | |
feed_iter = tqdm(feeds) if show_progress else feeds | |
for feed_url in feed_iter: | |
feed = feedparser.parse(feed_url) | |
for entry in feed.entries[:10]: | |
deals.append(cls(entry)) | |
time.sleep(0.5) | |
return deals | |
class Deal(BaseModel): | |
""" | |
A class to Represent a Deal with a summary description | |
""" | |
product_description: str | |
price: float | |
url: str | |
class DealSelection(BaseModel): | |
""" | |
A class to Represent a list of Deals | |
""" | |
deals: List[Deal] | |
class Opportunity(BaseModel): | |
""" | |
A class to represent a possible opportunity: a Deal where we estimate | |
it should cost more than it's being offered | |
""" | |
deal: Deal | |
estimate: float | |
discount: float |