Spaces:
Runtime error
Runtime error
Upload 43 files
Browse files- .gitattributes +5 -35
- README.md +13 -13
- agents/__pycache__/agent.cpython-312.pyc +3 -0
- agents/__pycache__/deals.cpython-312.pyc +3 -0
- agents/__pycache__/ensemble_agent.cpython-312.pyc +3 -0
- agents/__pycache__/frontier_agent.cpython-312.pyc +3 -0
- agents/__pycache__/messaging_agent.cpython-312.pyc +3 -0
- agents/__pycache__/planning_agent.cpython-312.pyc +3 -0
- agents/__pycache__/random_forest_agent.cpython-312.pyc +3 -0
- agents/__pycache__/scanner_agent.cpython-312.pyc +3 -0
- agents/__pycache__/specialist_agent.cpython-312.pyc +3 -0
- agents/agent.py +33 -0
- agents/deals.py +109 -0
- agents/ensemble_agent.py +48 -0
- agents/frontier_agent.py +113 -0
- agents/messaging_agent.py +79 -0
- agents/planning_agent.py +57 -0
- agents/random_forest_agent.py +37 -0
- agents/scanner_agent.py +94 -0
- agents/specialist_agent.py +29 -0
- app.py +166 -0
- deal_agent_framework.py +99 -0
- ensemble_model.pkl +3 -0
- hello.py +18 -0
- items.py +101 -0
- keep_warm.py +10 -0
- llama.py +44 -0
- log_utils.py +35 -0
- memory.json +29 -0
- price_is_right.py +62 -0
- pricer_ephemeral.py +66 -0
- pricer_service.py +66 -0
- pricer_service2.py +90 -0
- products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/data_level0.bin +3 -0
- products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/header.bin +3 -0
- products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/index_metadata.pickle +3 -0
- products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/length.bin +3 -0
- products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/link_lists.bin +3 -0
- products_vectorstore/chroma.sqlite3 +3 -0
- random_forest_model.pkl +3 -0
- requirements.txt +0 -0
- test.pkl +3 -0
- testing.py +75 -0
.gitattributes
CHANGED
@@ -1,35 +1,5 @@
|
|
1 |
-
*.
|
2 |
-
*.
|
3 |
-
*.
|
4 |
-
*.
|
5 |
-
*.
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.pyc filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
-
---
|
2 |
-
title: LLM Project
|
3 |
-
emoji: 🦀
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: green
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.15.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: LLM Project
|
3 |
+
emoji: 🦀
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: green
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.15.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
agents/__pycache__/agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e7b56519cbb8ed611dbd18c5a297044c37d0eb1e1c29bc49c9a9f5746b8a670
|
3 |
+
size 1271
|
agents/__pycache__/deals.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b505267d897a32a4381686ac6bed2c526b63cf35071791b6fb4d698c05b6e99a
|
3 |
+
size 5464
|
agents/__pycache__/ensemble_agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:565d116ca19f07a045e1bbb4fe7469d840d2fe42ba6e394f0bd0e632a4c6ba48
|
3 |
+
size 2829
|
agents/__pycache__/frontier_agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3e2fcd286d807aaf6d5f644231aecc103bc4d9ff2fcc95ff533cc74d5cb7ea1
|
3 |
+
size 6735
|
agents/__pycache__/messaging_agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dfd32738e5c5406e4d534caf2dbf109cc5fd3dc7dd1360d6089a828656378dd
|
3 |
+
size 4326
|
agents/__pycache__/planning_agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e2d65f2706c98c5a3caa0a01f37ef448202b4f0020d6707bcaec74dbe9c64f7
|
3 |
+
size 3962
|
agents/__pycache__/random_forest_agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b206314a37fdc3d67bd8c80fab047102b1da8a92d797bd999282183516e2a56
|
3 |
+
size 2143
|
agents/__pycache__/scanner_agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0df3f7fcc7315118766382b793926b01d831fb006b03c2610722bf62e48b994
|
3 |
+
size 6276
|
agents/__pycache__/specialist_agent.cpython-312.pyc
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a25eae39b16a3aeae83c0cc38356a2bef5366058adaaaab6f2a70e5052d6ff7f
|
3 |
+
size 1804
|
agents/agent.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
class Agent:
|
4 |
+
"""
|
5 |
+
An abstract superclass for Agents
|
6 |
+
Used to log messages in a way that can identify each Agent
|
7 |
+
"""
|
8 |
+
|
9 |
+
# Foreground colors
|
10 |
+
RED = '\033[31m'
|
11 |
+
GREEN = '\033[32m'
|
12 |
+
YELLOW = '\033[33m'
|
13 |
+
BLUE = '\033[34m'
|
14 |
+
MAGENTA = '\033[35m'
|
15 |
+
CYAN = '\033[36m'
|
16 |
+
WHITE = '\033[37m'
|
17 |
+
|
18 |
+
# Background color
|
19 |
+
BG_BLACK = '\033[40m'
|
20 |
+
|
21 |
+
# Reset code to return to default color
|
22 |
+
RESET = '\033[0m'
|
23 |
+
|
24 |
+
name: str = ""
|
25 |
+
color: str = '\033[37m'
|
26 |
+
|
27 |
+
def log(self, message):
|
28 |
+
"""
|
29 |
+
Log this as an info message, identifying the agent
|
30 |
+
"""
|
31 |
+
color_code = self.BG_BLACK + self.color
|
32 |
+
message = f"[{self.name}] {message}"
|
33 |
+
logging.info(color_code + message + self.RESET)
|
agents/deals.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from typing import List, Dict, Self
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import re
|
5 |
+
import feedparser
|
6 |
+
from tqdm import tqdm
|
7 |
+
import requests
|
8 |
+
import time
|
9 |
+
|
10 |
+
feeds = [
|
11 |
+
"https://www.dealnews.com/c142/Electronics/?rss=1",
|
12 |
+
"https://www.dealnews.com/c39/Computers/?rss=1",
|
13 |
+
"https://www.dealnews.com/c238/Automotive/?rss=1",
|
14 |
+
"https://www.dealnews.com/f1912/Smart-Home/?rss=1",
|
15 |
+
"https://www.dealnews.com/c196/Home-Garden/?rss=1",
|
16 |
+
]
|
17 |
+
|
18 |
+
def extract(html_snippet: str) -> str:
|
19 |
+
"""
|
20 |
+
Use Beautiful Soup to clean up this HTML snippet and extract useful text
|
21 |
+
"""
|
22 |
+
soup = BeautifulSoup(html_snippet, 'html.parser')
|
23 |
+
snippet_div = soup.find('div', class_='snippet summary')
|
24 |
+
|
25 |
+
if snippet_div:
|
26 |
+
description = snippet_div.get_text(strip=True)
|
27 |
+
description = BeautifulSoup(description, 'html.parser').get_text()
|
28 |
+
description = re.sub('<[^<]+?>', '', description)
|
29 |
+
result = description.strip()
|
30 |
+
else:
|
31 |
+
result = html_snippet
|
32 |
+
return result.replace('\n', ' ')
|
33 |
+
|
34 |
+
class ScrapedDeal:
|
35 |
+
"""
|
36 |
+
A class to represent a Deal retrieved from an RSS feed
|
37 |
+
"""
|
38 |
+
category: str
|
39 |
+
title: str
|
40 |
+
summary: str
|
41 |
+
url: str
|
42 |
+
details: str
|
43 |
+
features: str
|
44 |
+
|
45 |
+
def __init__(self, entry: Dict[str, str]):
|
46 |
+
"""
|
47 |
+
Populate this instance based on the provided dict
|
48 |
+
"""
|
49 |
+
self.title = entry['title']
|
50 |
+
self.summary = extract(entry['summary'])
|
51 |
+
self.url = entry['links'][0]['href']
|
52 |
+
stuff = requests.get(self.url).content
|
53 |
+
soup = BeautifulSoup(stuff, 'html.parser')
|
54 |
+
content = soup.find('div', class_='content-section').get_text()
|
55 |
+
content = content.replace('\nmore', '').replace('\n', ' ')
|
56 |
+
if "Features" in content:
|
57 |
+
self.details, self.features = content.split("Features")
|
58 |
+
else:
|
59 |
+
self.details = content
|
60 |
+
self.features = ""
|
61 |
+
|
62 |
+
def __repr__(self):
|
63 |
+
"""
|
64 |
+
Return a string to describe this deal
|
65 |
+
"""
|
66 |
+
return f"<{self.title}>"
|
67 |
+
|
68 |
+
def describe(self):
|
69 |
+
"""
|
70 |
+
Return a longer string to describe this deal for use in calling a model
|
71 |
+
"""
|
72 |
+
return f"Title: {self.title}\nDetails: {self.details.strip()}\nFeatures: {self.features.strip()}\nURL: {self.url}"
|
73 |
+
|
74 |
+
@classmethod
|
75 |
+
def fetch(cls, show_progress : bool = False) -> List[Self]:
|
76 |
+
"""
|
77 |
+
Retrieve all deals from the selected RSS feeds
|
78 |
+
"""
|
79 |
+
deals = []
|
80 |
+
feed_iter = tqdm(feeds) if show_progress else feeds
|
81 |
+
for feed_url in feed_iter:
|
82 |
+
feed = feedparser.parse(feed_url)
|
83 |
+
for entry in feed.entries[:10]:
|
84 |
+
deals.append(cls(entry))
|
85 |
+
time.sleep(0.5)
|
86 |
+
return deals
|
87 |
+
|
88 |
+
class Deal(BaseModel):
|
89 |
+
"""
|
90 |
+
A class to Represent a Deal with a summary description
|
91 |
+
"""
|
92 |
+
product_description: str
|
93 |
+
price: float
|
94 |
+
url: str
|
95 |
+
|
96 |
+
class DealSelection(BaseModel):
|
97 |
+
"""
|
98 |
+
A class to Represent a list of Deals
|
99 |
+
"""
|
100 |
+
deals: List[Deal]
|
101 |
+
|
102 |
+
class Opportunity(BaseModel):
|
103 |
+
"""
|
104 |
+
A class to represent a possible opportunity: a Deal where we estimate
|
105 |
+
it should cost more than it's being offered
|
106 |
+
"""
|
107 |
+
deal: Deal
|
108 |
+
estimate: float
|
109 |
+
discount: float
|
agents/ensemble_agent.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.linear_model import LinearRegression
|
3 |
+
import joblib
|
4 |
+
|
5 |
+
from agents.agent import Agent
|
6 |
+
from agents.specialist_agent import SpecialistAgent
|
7 |
+
from agents.frontier_agent import FrontierAgent
|
8 |
+
from agents.random_forest_agent import RandomForestAgent
|
9 |
+
|
10 |
+
class EnsembleAgent(Agent):
|
11 |
+
|
12 |
+
name = "Ensemble Agent"
|
13 |
+
color = Agent.YELLOW
|
14 |
+
|
15 |
+
def __init__(self, collection):
|
16 |
+
"""
|
17 |
+
Create an instance of Ensemble, by creating each of the models
|
18 |
+
And loading the weights of the Ensemble
|
19 |
+
"""
|
20 |
+
self.log("Initializing Ensemble Agent")
|
21 |
+
self.specialist = SpecialistAgent()
|
22 |
+
self.frontier = FrontierAgent(collection)
|
23 |
+
self.random_forest = RandomForestAgent()
|
24 |
+
self.model = joblib.load('ensemble_model.pkl')
|
25 |
+
self.log("Ensemble Agent is ready")
|
26 |
+
|
27 |
+
def price(self, description: str) -> float:
|
28 |
+
"""
|
29 |
+
Run this ensemble model
|
30 |
+
Ask each of the models to price the product
|
31 |
+
Then use the Linear Regression model to return the weighted price
|
32 |
+
:param description: the description of a product
|
33 |
+
:return: an estimate of its price
|
34 |
+
"""
|
35 |
+
self.log("Running Ensemble Agent - collaborating with specialist, frontier and random forest agents")
|
36 |
+
specialist = self.specialist.price(description)
|
37 |
+
frontier = self.frontier.price(description)
|
38 |
+
random_forest = self.random_forest.price(description)
|
39 |
+
X = pd.DataFrame({
|
40 |
+
'Specialist': [specialist],
|
41 |
+
'Frontier': [frontier],
|
42 |
+
'RandomForest': [random_forest],
|
43 |
+
'Min': [min(specialist, frontier, random_forest)],
|
44 |
+
'Max': [max(specialist, frontier, random_forest)],
|
45 |
+
})
|
46 |
+
y = self.model.predict(X)[0]
|
47 |
+
self.log(f"Ensemble Agent complete - returning ${y:.2f}")
|
48 |
+
return y
|
agents/frontier_agent.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# imports
|
2 |
+
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import math
|
6 |
+
import json
|
7 |
+
from typing import List, Dict
|
8 |
+
from openai import OpenAI
|
9 |
+
from sentence_transformers import SentenceTransformer
|
10 |
+
from datasets import load_dataset
|
11 |
+
import chromadb
|
12 |
+
from items import Item
|
13 |
+
from testing import Tester
|
14 |
+
from agents.agent import Agent
|
15 |
+
|
16 |
+
|
17 |
+
class FrontierAgent(Agent):
|
18 |
+
|
19 |
+
name = "Frontier Agent"
|
20 |
+
color = Agent.BLUE
|
21 |
+
|
22 |
+
MODEL = "gpt-4o-mini"
|
23 |
+
|
24 |
+
def __init__(self, collection):
|
25 |
+
"""
|
26 |
+
Set up this instance by connecting to OpenAI or DeepSeek, to the Chroma Datastore,
|
27 |
+
And setting up the vector encoding model
|
28 |
+
"""
|
29 |
+
self.log("Initializing Frontier Agent")
|
30 |
+
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
|
31 |
+
if deepseek_api_key:
|
32 |
+
self.client = OpenAI(api_key=deepseek_api_key, base_url="https://api.deepseek.com")
|
33 |
+
self.MODEL = "deepseek-chat"
|
34 |
+
self.log("Frontier Agent is set up with DeepSeek")
|
35 |
+
else:
|
36 |
+
self.client = OpenAI()
|
37 |
+
self.MODEL = "gpt-4o-mini"
|
38 |
+
self.log("Frontier Agent is setting up with OpenAI")
|
39 |
+
self.collection = collection
|
40 |
+
self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
41 |
+
self.log("Frontier Agent is ready")
|
42 |
+
|
43 |
+
def make_context(self, similars: List[str], prices: List[float]) -> str:
|
44 |
+
"""
|
45 |
+
Create context that can be inserted into the prompt
|
46 |
+
:param similars: similar products to the one being estimated
|
47 |
+
:param prices: prices of the similar products
|
48 |
+
:return: text to insert in the prompt that provides context
|
49 |
+
"""
|
50 |
+
message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
|
51 |
+
for similar, price in zip(similars, prices):
|
52 |
+
message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
|
53 |
+
return message
|
54 |
+
|
55 |
+
def messages_for(self, description: str, similars: List[str], prices: List[float]) -> List[Dict[str, str]]:
|
56 |
+
"""
|
57 |
+
Create the message list to be included in a call to OpenAI
|
58 |
+
With the system and user prompt
|
59 |
+
:param description: a description of the product
|
60 |
+
:param similars: similar products to this one
|
61 |
+
:param prices: prices of similar products
|
62 |
+
:return: the list of messages in the format expected by OpenAI
|
63 |
+
"""
|
64 |
+
system_message = "You estimate prices of items. Reply only with the price, no explanation"
|
65 |
+
user_prompt = self.make_context(similars, prices)
|
66 |
+
user_prompt += "And now the question for you:\n\n"
|
67 |
+
user_prompt += "How much does this cost?\n\n" + description
|
68 |
+
return [
|
69 |
+
{"role": "system", "content": system_message},
|
70 |
+
{"role": "user", "content": user_prompt},
|
71 |
+
{"role": "assistant", "content": "Price is $"}
|
72 |
+
]
|
73 |
+
|
74 |
+
def find_similars(self, description: str):
|
75 |
+
"""
|
76 |
+
Return a list of items similar to the given one by looking in the Chroma datastore
|
77 |
+
"""
|
78 |
+
self.log("Frontier Agent is performing a RAG search of the Chroma datastore to find 5 similar products")
|
79 |
+
vector = self.model.encode([description])
|
80 |
+
results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
|
81 |
+
documents = results['documents'][0][:]
|
82 |
+
prices = [m['price'] for m in results['metadatas'][0][:]]
|
83 |
+
self.log("Frontier Agent has found similar products")
|
84 |
+
return documents, prices
|
85 |
+
|
86 |
+
def get_price(self, s) -> float:
|
87 |
+
"""
|
88 |
+
A utility that plucks a floating point number out of a string
|
89 |
+
"""
|
90 |
+
s = s.replace('$','').replace(',','')
|
91 |
+
match = re.search(r"[-+]?\d*\.\d+|\d+", s)
|
92 |
+
return float(match.group()) if match else 0.0
|
93 |
+
|
94 |
+
def price(self, description: str) -> float:
|
95 |
+
"""
|
96 |
+
Make a call to OpenAI or DeepSeek to estimate the price of the described product,
|
97 |
+
by looking up 5 similar products and including them in the prompt to give context
|
98 |
+
:param description: a description of the product
|
99 |
+
:return: an estimate of the price
|
100 |
+
"""
|
101 |
+
documents, prices = self.find_similars(description)
|
102 |
+
self.log(f"Frontier Agent is about to call {self.MODEL} with context including 5 similar products")
|
103 |
+
response = self.client.chat.completions.create(
|
104 |
+
model=self.MODEL,
|
105 |
+
messages=self.messages_for(description, documents, prices),
|
106 |
+
seed=42,
|
107 |
+
max_tokens=5
|
108 |
+
)
|
109 |
+
reply = response.choices[0].message.content
|
110 |
+
result = self.get_price(reply)
|
111 |
+
self.log(f"Frontier Agent completed - predicting ${result:.2f}")
|
112 |
+
return result
|
113 |
+
|
agents/messaging_agent.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
# from twilio.rest import Client
|
3 |
+
from agents.deals import Opportunity
|
4 |
+
import http.client
|
5 |
+
import urllib
|
6 |
+
from agents.agent import Agent
|
7 |
+
|
8 |
+
# Uncomment the Twilio lines if you wish to use Twilio
|
9 |
+
|
10 |
+
DO_TEXT = False
|
11 |
+
DO_PUSH = True
|
12 |
+
|
13 |
+
class MessagingAgent(Agent):
|
14 |
+
|
15 |
+
name = "Messaging Agent"
|
16 |
+
color = Agent.WHITE
|
17 |
+
|
18 |
+
def __init__(self):
|
19 |
+
"""
|
20 |
+
Set up this object to either do push notifications via Pushover,
|
21 |
+
or SMS via Twilio,
|
22 |
+
whichever is specified in the constants
|
23 |
+
"""
|
24 |
+
self.log(f"Messaging Agent is initializing")
|
25 |
+
if DO_TEXT:
|
26 |
+
account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
|
27 |
+
auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
|
28 |
+
self.me_from = os.getenv('TWILIO_FROM', 'your-phone-number-if-not-using-env')
|
29 |
+
self.me_to = os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')
|
30 |
+
# self.client = Client(account_sid, auth_token)
|
31 |
+
self.log("Messaging Agent has initialized Twilio")
|
32 |
+
if DO_PUSH:
|
33 |
+
self.pushover_user = os.getenv('PUSHOVER_USER', 'your-pushover-user-if-not-using-env')
|
34 |
+
self.pushover_token = os.getenv('PUSHOVER_TOKEN', 'your-pushover-user-if-not-using-env')
|
35 |
+
self.log("Messaging Agent has initialized Pushover")
|
36 |
+
|
37 |
+
def message(self, text):
|
38 |
+
"""
|
39 |
+
Send an SMS message using the Twilio API
|
40 |
+
"""
|
41 |
+
self.log("Messaging Agent is sending a text message")
|
42 |
+
message = self.client.messages.create(
|
43 |
+
from_=self.me_from,
|
44 |
+
body=text,
|
45 |
+
to=self.me_to
|
46 |
+
)
|
47 |
+
|
48 |
+
def push(self, text):
|
49 |
+
"""
|
50 |
+
Send a Push Notification using the Pushover API
|
51 |
+
"""
|
52 |
+
self.log("Messaging Agent is sending a push notification")
|
53 |
+
conn = http.client.HTTPSConnection("api.pushover.net:443")
|
54 |
+
conn.request("POST", "/1/messages.json",
|
55 |
+
urllib.parse.urlencode({
|
56 |
+
"token": self.pushover_token,
|
57 |
+
"user": self.pushover_user,
|
58 |
+
"message": text,
|
59 |
+
"sound": "cashregister"
|
60 |
+
}), { "Content-type": "application/x-www-form-urlencoded" })
|
61 |
+
conn.getresponse()
|
62 |
+
|
63 |
+
def alert(self, opportunity: Opportunity):
|
64 |
+
"""
|
65 |
+
Make an alert about the specified Opportunity
|
66 |
+
"""
|
67 |
+
text = f"Deal Alert! Price=${opportunity.deal.price:.2f}, "
|
68 |
+
text += f"Estimate=${opportunity.estimate:.2f}, "
|
69 |
+
text += f"Discount=${opportunity.discount:.2f} :"
|
70 |
+
text += opportunity.deal.product_description[:10]+'... '
|
71 |
+
text += opportunity.deal.url
|
72 |
+
if DO_TEXT:
|
73 |
+
self.message(text)
|
74 |
+
if DO_PUSH:
|
75 |
+
self.push(text)
|
76 |
+
self.log("Messaging Agent has completed")
|
77 |
+
|
78 |
+
|
79 |
+
|
agents/planning_agent.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional, List
|
2 |
+
from agents.agent import Agent
|
3 |
+
from agents.deals import ScrapedDeal, DealSelection, Deal, Opportunity
|
4 |
+
from agents.scanner_agent import ScannerAgent
|
5 |
+
from agents.ensemble_agent import EnsembleAgent
|
6 |
+
from agents.messaging_agent import MessagingAgent
|
7 |
+
|
8 |
+
|
9 |
+
class PlanningAgent(Agent):
|
10 |
+
|
11 |
+
name = "Planning Agent"
|
12 |
+
color = Agent.GREEN
|
13 |
+
DEAL_THRESHOLD = 50
|
14 |
+
|
15 |
+
def __init__(self, collection):
|
16 |
+
"""
|
17 |
+
Create instances of the 3 Agents that this planner coordinates across
|
18 |
+
"""
|
19 |
+
self.log("Planning Agent is initializing")
|
20 |
+
self.scanner = ScannerAgent()
|
21 |
+
self.ensemble = EnsembleAgent(collection)
|
22 |
+
self.messenger = MessagingAgent()
|
23 |
+
self.log("Planning Agent is ready")
|
24 |
+
|
25 |
+
def run(self, deal: Deal) -> Opportunity:
|
26 |
+
"""
|
27 |
+
Run the workflow for a particular deal
|
28 |
+
:param deal: the deal, summarized from an RSS scrape
|
29 |
+
:returns: an opportunity including the discount
|
30 |
+
"""
|
31 |
+
self.log("Planning Agent is pricing up a potential deal")
|
32 |
+
estimate = self.ensemble.price(deal.product_description)
|
33 |
+
discount = estimate - deal.price
|
34 |
+
self.log(f"Planning Agent has processed a deal with discount ${discount:.2f}")
|
35 |
+
return Opportunity(deal=deal, estimate=estimate, discount=discount)
|
36 |
+
|
37 |
+
def plan(self, memory: List[str] = []) -> Optional[Opportunity]:
|
38 |
+
"""
|
39 |
+
Run the full workflow:
|
40 |
+
1. Use the ScannerAgent to find deals from RSS feeds
|
41 |
+
2. Use the EnsembleAgent to estimate them
|
42 |
+
3. Use the MessagingAgent to send a notification of deals
|
43 |
+
:param memory: a list of URLs that have been surfaced in the past
|
44 |
+
:return: an Opportunity if one was surfaced, otherwise None
|
45 |
+
"""
|
46 |
+
self.log("Planning Agent is kicking off a run")
|
47 |
+
selection = self.scanner.scan(memory=memory)
|
48 |
+
if selection:
|
49 |
+
opportunities = [self.run(deal) for deal in selection.deals[:5]]
|
50 |
+
opportunities.sort(key=lambda opp: opp.discount, reverse=True)
|
51 |
+
best = opportunities[0]
|
52 |
+
self.log(f"Planning Agent has identified the best deal has discount ${best.discount:.2f}")
|
53 |
+
if best.discount > self.DEAL_THRESHOLD:
|
54 |
+
self.messenger.alert(best)
|
55 |
+
self.log("Planning Agent has completed a run")
|
56 |
+
return best if best.discount > self.DEAL_THRESHOLD else None
|
57 |
+
return None
|
agents/random_forest_agent.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# imports
|
2 |
+
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
from typing import List
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
import joblib
|
8 |
+
from agents.agent import Agent
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
class RandomForestAgent(Agent):
|
13 |
+
|
14 |
+
name = "Random Forest Agent"
|
15 |
+
color = Agent.MAGENTA
|
16 |
+
|
17 |
+
def __init__(self):
|
18 |
+
"""
|
19 |
+
Initialize this object by loading in the saved model weights
|
20 |
+
and the SentenceTransformer vector encoding model
|
21 |
+
"""
|
22 |
+
self.log("Random Forest Agent is initializing")
|
23 |
+
self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
24 |
+
self.model = joblib.load('random_forest_model.pkl')
|
25 |
+
self.log("Random Forest Agent is ready")
|
26 |
+
|
27 |
+
def price(self, description: str) -> float:
|
28 |
+
"""
|
29 |
+
Use a Random Forest model to estimate the price of the described item
|
30 |
+
:param description: the product to be estimated
|
31 |
+
:return: the price as a float
|
32 |
+
"""
|
33 |
+
self.log("Random Forest Agent is starting a prediction")
|
34 |
+
vector = self.vectorizer.encode([description])
|
35 |
+
result = max(0, self.model.predict(vector)[0])
|
36 |
+
self.log(f"Random Forest Agent completed - predicting ${result:.2f}")
|
37 |
+
return result
|
agents/scanner_agent.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from typing import Optional, List
|
4 |
+
from openai import OpenAI
|
5 |
+
from agents.deals import ScrapedDeal, DealSelection
|
6 |
+
from agents.agent import Agent
|
7 |
+
|
8 |
+
|
9 |
+
class ScannerAgent(Agent):
|
10 |
+
|
11 |
+
MODEL = "gpt-4o-mini"
|
12 |
+
|
13 |
+
SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
|
14 |
+
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
|
15 |
+
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
|
16 |
+
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price.
|
17 |
+
|
18 |
+
{"deals": [
|
19 |
+
{
|
20 |
+
"product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
|
21 |
+
"price": 99.99,
|
22 |
+
"url": "the url as provided"
|
23 |
+
},
|
24 |
+
...
|
25 |
+
]}"""
|
26 |
+
|
27 |
+
USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price that is greater than 0.
|
28 |
+
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
|
29 |
+
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
|
30 |
+
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price.
|
31 |
+
|
32 |
+
Deals:
|
33 |
+
|
34 |
+
"""
|
35 |
+
|
36 |
+
USER_PROMPT_SUFFIX = "\n\nStrictly respond in JSON and include exactly 5 deals, no more."
|
37 |
+
|
38 |
+
name = "Scanner Agent"
|
39 |
+
color = Agent.CYAN
|
40 |
+
|
41 |
+
def __init__(self):
|
42 |
+
"""
|
43 |
+
Set up this instance by initializing OpenAI
|
44 |
+
"""
|
45 |
+
self.log("Scanner Agent is initializing")
|
46 |
+
self.openai = OpenAI()
|
47 |
+
self.log("Scanner Agent is ready")
|
48 |
+
|
49 |
+
def fetch_deals(self, memory) -> List[ScrapedDeal]:
|
50 |
+
"""
|
51 |
+
Look up deals published on RSS feeds
|
52 |
+
Return any new deals that are not already in the memory provided
|
53 |
+
"""
|
54 |
+
self.log("Scanner Agent is about to fetch deals from RSS feed")
|
55 |
+
urls = [opp.deal.url for opp in memory]
|
56 |
+
scraped = ScrapedDeal.fetch()
|
57 |
+
result = [scrape for scrape in scraped if scrape.url not in urls]
|
58 |
+
self.log(f"Scanner Agent received {len(result)} deals not already scraped")
|
59 |
+
return result
|
60 |
+
|
61 |
+
def make_user_prompt(self, scraped) -> str:
|
62 |
+
"""
|
63 |
+
Create a user prompt for OpenAI based on the scraped deals provided
|
64 |
+
"""
|
65 |
+
user_prompt = self.USER_PROMPT_PREFIX
|
66 |
+
user_prompt += '\n\n'.join([scrape.describe() for scrape in scraped])
|
67 |
+
user_prompt += self.USER_PROMPT_SUFFIX
|
68 |
+
return user_prompt
|
69 |
+
|
70 |
+
def scan(self, memory: List[str]=[]) -> Optional[DealSelection]:
|
71 |
+
"""
|
72 |
+
Call OpenAI to provide a high potential list of deals with good descriptions and prices
|
73 |
+
Use StructuredOutputs to ensure it conforms to our specifications
|
74 |
+
:param memory: a list of URLs representing deals already raised
|
75 |
+
:return: a selection of good deals, or None if there aren't any
|
76 |
+
"""
|
77 |
+
scraped = self.fetch_deals(memory)
|
78 |
+
if scraped:
|
79 |
+
user_prompt = self.make_user_prompt(scraped)
|
80 |
+
self.log("Scanner Agent is calling OpenAI using Structured Output")
|
81 |
+
result = self.openai.beta.chat.completions.parse(
|
82 |
+
model=self.MODEL,
|
83 |
+
messages=[
|
84 |
+
{"role": "system", "content": self.SYSTEM_PROMPT},
|
85 |
+
{"role": "user", "content": user_prompt}
|
86 |
+
],
|
87 |
+
response_format=DealSelection
|
88 |
+
)
|
89 |
+
result = result.choices[0].message.parsed
|
90 |
+
result.deals = [deal for deal in result.deals if deal.price>0]
|
91 |
+
self.log(f"Scanner Agent received {len(result.deals)} selected deals with price>0 from OpenAI")
|
92 |
+
return result
|
93 |
+
return None
|
94 |
+
|
agents/specialist_agent.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import modal
|
2 |
+
from agents.agent import Agent
|
3 |
+
|
4 |
+
|
5 |
+
class SpecialistAgent(Agent):
|
6 |
+
"""
|
7 |
+
An Agent that runs our fine-tuned LLM that's running remotely on Modal
|
8 |
+
"""
|
9 |
+
|
10 |
+
name = "Specialist Agent"
|
11 |
+
color = Agent.RED
|
12 |
+
|
13 |
+
def __init__(self):
|
14 |
+
"""
|
15 |
+
Set up this Agent by creating an instance of the modal class
|
16 |
+
"""
|
17 |
+
self.log("Specialist Agent is initializing - connecting to modal")
|
18 |
+
Pricer = modal.Cls.lookup("pricer-service", "Pricer")
|
19 |
+
self.pricer = Pricer()
|
20 |
+
self.log("Specialist Agent is ready")
|
21 |
+
|
22 |
+
def price(self, description: str) -> float:
|
23 |
+
"""
|
24 |
+
Make a remote call to return the estimate of the price of this item
|
25 |
+
"""
|
26 |
+
self.log("Specialist Agent is calling remote fine-tuned model")
|
27 |
+
result = self.pricer.price.remote(description)
|
28 |
+
self.log(f"Specialist Agent completed - predicting ${result:.2f}")
|
29 |
+
return result
|
app.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import queue
|
3 |
+
import threading
|
4 |
+
import time
|
5 |
+
import gradio as gr
|
6 |
+
from deal_agent_framework import DealAgentFramework
|
7 |
+
from agents.deals import Opportunity, Deal
|
8 |
+
from log_utils import reformat
|
9 |
+
import plotly.graph_objects as go
|
10 |
+
|
11 |
+
|
12 |
+
class QueueHandler(logging.Handler):
|
13 |
+
def __init__(self, log_queue):
|
14 |
+
super().__init__()
|
15 |
+
self.log_queue = log_queue
|
16 |
+
|
17 |
+
def emit(self, record):
|
18 |
+
self.log_queue.put(self.format(record))
|
19 |
+
|
20 |
+
def html_for(log_data):
|
21 |
+
output = '<br>'.join(log_data[-18:])
|
22 |
+
return f"""
|
23 |
+
<div id="scrollContent" style="height: 400px; overflow-y: auto; border: 1px solid #ccc; background-color: #222229; padding: 10px;">
|
24 |
+
{output}
|
25 |
+
</div>
|
26 |
+
"""
|
27 |
+
|
28 |
+
def setup_logging(log_queue):
|
29 |
+
handler = QueueHandler(log_queue)
|
30 |
+
formatter = logging.Formatter(
|
31 |
+
"[%(asctime)s] %(message)s",
|
32 |
+
datefmt="%Y-%m-%d %H:%M:%S %z",
|
33 |
+
)
|
34 |
+
handler.setFormatter(formatter)
|
35 |
+
logger = logging.getLogger()
|
36 |
+
logger.addHandler(handler)
|
37 |
+
logger.setLevel(logging.INFO)
|
38 |
+
|
39 |
+
|
40 |
+
class App:
|
41 |
+
|
42 |
+
def __init__(self):
|
43 |
+
self.agent_framework = None
|
44 |
+
|
45 |
+
def get_agent_framework(self):
|
46 |
+
if not self.agent_framework:
|
47 |
+
self.agent_framework = DealAgentFramework()
|
48 |
+
self.agent_framework.init_agents_as_needed()
|
49 |
+
return self.agent_framework
|
50 |
+
|
51 |
+
def run(self):
|
52 |
+
with gr.Blocks(title="The Price is Right", fill_width=True) as ui:
|
53 |
+
|
54 |
+
log_data = gr.State([])
|
55 |
+
|
56 |
+
def table_for(opps):
|
57 |
+
return [[opp.deal.product_description, f"${opp.deal.price:.2f}", f"${opp.estimate:.2f}", f"${opp.discount:.2f}", opp.deal.url] for opp in opps]
|
58 |
+
|
59 |
+
def update_output(log_data, log_queue, result_queue):
|
60 |
+
initial_result = table_for(self.get_agent_framework().memory)
|
61 |
+
final_result = None
|
62 |
+
while True:
|
63 |
+
try:
|
64 |
+
message = log_queue.get_nowait()
|
65 |
+
log_data.append(reformat(message))
|
66 |
+
yield log_data, html_for(log_data), final_result or initial_result
|
67 |
+
except queue.Empty:
|
68 |
+
try:
|
69 |
+
final_result = result_queue.get_nowait()
|
70 |
+
yield log_data, html_for(log_data), final_result or initial_result
|
71 |
+
except queue.Empty:
|
72 |
+
if final_result is not None:
|
73 |
+
break
|
74 |
+
time.sleep(0.1)
|
75 |
+
|
76 |
+
def get_initial_plot():
|
77 |
+
fig = go.Figure()
|
78 |
+
fig.update_layout(
|
79 |
+
title='Loading vector DB...',
|
80 |
+
height=400,
|
81 |
+
)
|
82 |
+
return fig
|
83 |
+
|
84 |
+
def get_plot():
|
85 |
+
documents, vectors, colors = DealAgentFramework.get_plot_data(max_datapoints=1000)
|
86 |
+
# Create the 3D scatter plot
|
87 |
+
fig = go.Figure(data=[go.Scatter3d(
|
88 |
+
x=vectors[:, 0],
|
89 |
+
y=vectors[:, 1],
|
90 |
+
z=vectors[:, 2],
|
91 |
+
mode='markers',
|
92 |
+
marker=dict(size=2, color=colors, opacity=0.7),
|
93 |
+
)])
|
94 |
+
|
95 |
+
fig.update_layout(
|
96 |
+
scene=dict(xaxis_title='x',
|
97 |
+
yaxis_title='y',
|
98 |
+
zaxis_title='z',
|
99 |
+
aspectmode='manual',
|
100 |
+
aspectratio=dict(x=2.2, y=2.2, z=1), # Make x-axis twice as long
|
101 |
+
camera=dict(
|
102 |
+
eye=dict(x=1.6, y=1.6, z=0.8) # Adjust camera position
|
103 |
+
)),
|
104 |
+
height=400,
|
105 |
+
margin=dict(r=5, b=1, l=5, t=2)
|
106 |
+
)
|
107 |
+
|
108 |
+
return fig
|
109 |
+
|
110 |
+
def do_run():
|
111 |
+
new_opportunities = self.get_agent_framework().run()
|
112 |
+
table = table_for(new_opportunities)
|
113 |
+
return table
|
114 |
+
|
115 |
+
def run_with_logging(initial_log_data):
|
116 |
+
log_queue = queue.Queue()
|
117 |
+
result_queue = queue.Queue()
|
118 |
+
setup_logging(log_queue)
|
119 |
+
|
120 |
+
def worker():
|
121 |
+
result = do_run()
|
122 |
+
result_queue.put(result)
|
123 |
+
|
124 |
+
thread = threading.Thread(target=worker)
|
125 |
+
thread.start()
|
126 |
+
|
127 |
+
for log_data, output, final_result in update_output(initial_log_data, log_queue, result_queue):
|
128 |
+
yield log_data, output, final_result
|
129 |
+
|
130 |
+
def do_select(selected_index: gr.SelectData):
|
131 |
+
opportunities = self.get_agent_framework().memory
|
132 |
+
row = selected_index.index[0]
|
133 |
+
opportunity = opportunities[row]
|
134 |
+
self.get_agent_framework().planner.messenger.alert(opportunity)
|
135 |
+
|
136 |
+
with gr.Row():
|
137 |
+
gr.Markdown('<div style="text-align: center;font-size:24px"><strong>The Price is Right</strong> - Autonomous Agent Framework that hunts for deals</div>')
|
138 |
+
with gr.Row():
|
139 |
+
gr.Markdown('<div style="text-align: center;font-size:14px">A proprietary fine-tuned LLM deployed on Modal and a RAG pipeline with a frontier model collaborate to send push notifications with great online deals.</div>')
|
140 |
+
with gr.Row():
|
141 |
+
opportunities_dataframe = gr.Dataframe(
|
142 |
+
headers=["Deals found so far", "Price", "Estimate", "Discount", "URL"],
|
143 |
+
wrap=True,
|
144 |
+
column_widths=[6, 1, 1, 1, 3],
|
145 |
+
row_count=10,
|
146 |
+
col_count=5,
|
147 |
+
max_height=400,
|
148 |
+
)
|
149 |
+
with gr.Row():
|
150 |
+
with gr.Column(scale=1):
|
151 |
+
logs = gr.HTML()
|
152 |
+
with gr.Column(scale=1):
|
153 |
+
plot = gr.Plot(value=get_plot(), show_label=False)
|
154 |
+
|
155 |
+
ui.load(run_with_logging, inputs=[log_data], outputs=[log_data, logs, opportunities_dataframe])
|
156 |
+
|
157 |
+
timer = gr.Timer(value=300, active=True)
|
158 |
+
timer.tick(run_with_logging, inputs=[log_data], outputs=[log_data, logs, opportunities_dataframe])
|
159 |
+
|
160 |
+
opportunities_dataframe.select(do_select)
|
161 |
+
|
162 |
+
ui.launch(share=False, inbrowser=True)
|
163 |
+
|
164 |
+
if __name__=="__main__":
|
165 |
+
App().run()
|
166 |
+
|
deal_agent_framework.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import logging
|
4 |
+
import json
|
5 |
+
from typing import List, Optional
|
6 |
+
# from twilio.rest import Client
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
import chromadb
|
9 |
+
from agents.planning_agent import PlanningAgent
|
10 |
+
from agents.deals import Opportunity
|
11 |
+
from sklearn.manifold import TSNE
|
12 |
+
import numpy as np
|
13 |
+
|
14 |
+
|
15 |
+
# Colors for logging
|
16 |
+
BG_BLUE = '\033[44m'
|
17 |
+
WHITE = '\033[37m'
|
18 |
+
RESET = '\033[0m'
|
19 |
+
|
20 |
+
# Colors for plot
|
21 |
+
CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']
|
22 |
+
COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']
|
23 |
+
|
24 |
+
def init_logging():
|
25 |
+
root = logging.getLogger()
|
26 |
+
root.setLevel(logging.INFO)
|
27 |
+
|
28 |
+
handler = logging.StreamHandler(sys.stdout)
|
29 |
+
handler.setLevel(logging.INFO)
|
30 |
+
formatter = logging.Formatter(
|
31 |
+
"[%(asctime)s] [Agents] [%(levelname)s] %(message)s",
|
32 |
+
datefmt="%Y-%m-%d %H:%M:%S %z",
|
33 |
+
)
|
34 |
+
handler.setFormatter(formatter)
|
35 |
+
root.addHandler(handler)
|
36 |
+
|
37 |
+
class DealAgentFramework:
|
38 |
+
|
39 |
+
DB = "products_vectorstore"
|
40 |
+
MEMORY_FILENAME = "memory.json"
|
41 |
+
|
42 |
+
def __init__(self):
|
43 |
+
init_logging()
|
44 |
+
load_dotenv()
|
45 |
+
client = chromadb.PersistentClient(path=self.DB)
|
46 |
+
self.memory = self.read_memory()
|
47 |
+
self.collection = client.get_or_create_collection('products')
|
48 |
+
self.planner = None
|
49 |
+
|
50 |
+
def init_agents_as_needed(self):
|
51 |
+
if not self.planner:
|
52 |
+
self.log("Initializing Agent Framework")
|
53 |
+
self.planner = PlanningAgent(self.collection)
|
54 |
+
self.log("Agent Framework is ready")
|
55 |
+
|
56 |
+
def read_memory(self) -> List[Opportunity]:
|
57 |
+
if os.path.exists(self.MEMORY_FILENAME):
|
58 |
+
with open(self.MEMORY_FILENAME, "r") as file:
|
59 |
+
data = json.load(file)
|
60 |
+
opportunities = [Opportunity(**item) for item in data]
|
61 |
+
return opportunities
|
62 |
+
return []
|
63 |
+
|
64 |
+
def write_memory(self) -> None:
|
65 |
+
data = [opportunity.dict() for opportunity in self.memory]
|
66 |
+
with open(self.MEMORY_FILENAME, "w") as file:
|
67 |
+
json.dump(data, file, indent=2)
|
68 |
+
|
69 |
+
def log(self, message: str):
|
70 |
+
text = BG_BLUE + WHITE + "[Agent Framework] " + message + RESET
|
71 |
+
logging.info(text)
|
72 |
+
|
73 |
+
def run(self) -> List[Opportunity]:
|
74 |
+
self.init_agents_as_needed()
|
75 |
+
logging.info("Kicking off Planning Agent")
|
76 |
+
result = self.planner.plan(memory=self.memory)
|
77 |
+
logging.info(f"Planning Agent has completed and returned: {result}")
|
78 |
+
if result:
|
79 |
+
self.memory.append(result)
|
80 |
+
self.write_memory()
|
81 |
+
return self.memory
|
82 |
+
|
83 |
+
@classmethod
|
84 |
+
def get_plot_data(cls, max_datapoints=10000):
|
85 |
+
client = chromadb.PersistentClient(path=cls.DB)
|
86 |
+
collection = client.get_or_create_collection('products')
|
87 |
+
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
|
88 |
+
vectors = np.array(result['embeddings'])
|
89 |
+
documents = result['documents']
|
90 |
+
categories = [metadata['category'] for metadata in result['metadatas']]
|
91 |
+
colors = [COLORS[CATEGORIES.index(c)] for c in categories]
|
92 |
+
tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
|
93 |
+
reduced_vectors = tsne.fit_transform(vectors)
|
94 |
+
return documents, reduced_vectors, colors
|
95 |
+
|
96 |
+
|
97 |
+
if __name__=="__main__":
|
98 |
+
DealAgentFramework().run()
|
99 |
+
|
ensemble_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1ea0ada33938b007f0489526c70ba8ff1268e743477fbe9e4201857cf7ea471
|
3 |
+
size 976
|
hello.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import modal
|
2 |
+
from modal import App, Image
|
3 |
+
|
4 |
+
# Setup
|
5 |
+
|
6 |
+
app = modal.App("hello")
|
7 |
+
image = Image.debian_slim().pip_install("requests")
|
8 |
+
|
9 |
+
# Hello!
|
10 |
+
|
11 |
+
@app.function(image=image)
|
12 |
+
def hello() -> str:
|
13 |
+
import requests
|
14 |
+
|
15 |
+
response = requests.get('https://ipinfo.io/json')
|
16 |
+
data = response.json()
|
17 |
+
city, region, country = data['city'], data['region'], data['country']
|
18 |
+
return f"Hello from {city}, {region}, {country}!!"
|
items.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional
|
2 |
+
from transformers import AutoTokenizer
|
3 |
+
import re
|
4 |
+
|
5 |
+
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
6 |
+
MIN_TOKENS = 150
|
7 |
+
MAX_TOKENS = 160
|
8 |
+
MIN_CHARS = 300
|
9 |
+
CEILING_CHARS = MAX_TOKENS * 7
|
10 |
+
|
11 |
+
class Item:
|
12 |
+
"""
|
13 |
+
An Item is a cleaned, curated datapoint of a Product with a Price
|
14 |
+
"""
|
15 |
+
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
17 |
+
PREFIX = "Price is $"
|
18 |
+
QUESTION = "How much does this cost to the nearest dollar?"
|
19 |
+
REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "]
|
20 |
+
|
21 |
+
title: str
|
22 |
+
price: float
|
23 |
+
category: str
|
24 |
+
token_count: int = 0
|
25 |
+
details: Optional[str]
|
26 |
+
prompt: Optional[str] = None
|
27 |
+
include = False
|
28 |
+
|
29 |
+
def __init__(self, data, price):
|
30 |
+
self.title = data['title']
|
31 |
+
self.price = price
|
32 |
+
self.parse(data)
|
33 |
+
|
34 |
+
def scrub_details(self):
|
35 |
+
"""
|
36 |
+
Clean up the details string by removing common text that doesn't add value
|
37 |
+
"""
|
38 |
+
details = self.details
|
39 |
+
for remove in self.REMOVALS:
|
40 |
+
details = details.replace(remove, "")
|
41 |
+
return details
|
42 |
+
|
43 |
+
def scrub(self, stuff):
|
44 |
+
"""
|
45 |
+
Clean up the provided text by removing unnecessary characters and whitespace
|
46 |
+
Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers
|
47 |
+
"""
|
48 |
+
stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip()
|
49 |
+
stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",")
|
50 |
+
words = stuff.split(' ')
|
51 |
+
select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]
|
52 |
+
return " ".join(select)
|
53 |
+
|
54 |
+
def parse(self, data):
|
55 |
+
"""
|
56 |
+
Parse this datapoint and if it fits within the allowed Token range,
|
57 |
+
then set include to True
|
58 |
+
"""
|
59 |
+
contents = '\n'.join(data['description'])
|
60 |
+
if contents:
|
61 |
+
contents += '\n'
|
62 |
+
features = '\n'.join(data['features'])
|
63 |
+
if features:
|
64 |
+
contents += features + '\n'
|
65 |
+
self.details = data['details']
|
66 |
+
if self.details:
|
67 |
+
contents += self.scrub_details() + '\n'
|
68 |
+
if len(contents) > MIN_CHARS:
|
69 |
+
contents = contents[:CEILING_CHARS]
|
70 |
+
text = f"{self.scrub(self.title)}\n{self.scrub(contents)}"
|
71 |
+
tokens = self.tokenizer.encode(text, add_special_tokens=False)
|
72 |
+
if len(tokens) > MIN_TOKENS:
|
73 |
+
tokens = tokens[:MAX_TOKENS]
|
74 |
+
text = self.tokenizer.decode(tokens)
|
75 |
+
self.make_prompt(text)
|
76 |
+
self.include = True
|
77 |
+
|
78 |
+
def make_prompt(self, text):
|
79 |
+
"""
|
80 |
+
Set the prompt instance variable to be a prompt appropriate for training
|
81 |
+
"""
|
82 |
+
self.prompt = f"{self.QUESTION}\n\n{text}\n\n"
|
83 |
+
self.prompt += f"{self.PREFIX}{str(round(self.price))}.00"
|
84 |
+
self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))
|
85 |
+
|
86 |
+
def test_prompt(self):
|
87 |
+
"""
|
88 |
+
Return a prompt suitable for testing, with the actual price removed
|
89 |
+
"""
|
90 |
+
return self.prompt.split(self.PREFIX)[0] + self.PREFIX
|
91 |
+
|
92 |
+
def __repr__(self):
|
93 |
+
"""
|
94 |
+
Return a String version of this Item
|
95 |
+
"""
|
96 |
+
return f"<{self.title} = ${self.price}>"
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
|
keep_warm.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import modal
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
Pricer = modal.Cls.lookup("pricer-service", "Pricer")
|
6 |
+
pricer = Pricer()
|
7 |
+
while True:
|
8 |
+
reply = pricer.wake_up.remote()
|
9 |
+
print(f"{datetime.now()}: {reply}")
|
10 |
+
time.sleep(30)
|
llama.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import modal
|
2 |
+
from modal import App, Volume, Image
|
3 |
+
|
4 |
+
# Setup
|
5 |
+
|
6 |
+
app = modal.App("llama")
|
7 |
+
image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate")
|
8 |
+
secrets = [modal.Secret.from_name("hf-secret")]
|
9 |
+
GPU = "T4"
|
10 |
+
MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B" # "google/gemma-2-2b"
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
15 |
+
def generate(prompt: str) -> str:
|
16 |
+
import os
|
17 |
+
import torch
|
18 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
19 |
+
|
20 |
+
# Quant Config
|
21 |
+
quant_config = BitsAndBytesConfig(
|
22 |
+
load_in_4bit=True,
|
23 |
+
bnb_4bit_use_double_quant=True,
|
24 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
25 |
+
bnb_4bit_quant_type="nf4"
|
26 |
+
)
|
27 |
+
|
28 |
+
# Load model and tokenizer
|
29 |
+
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
31 |
+
tokenizer.pad_token = tokenizer.eos_token
|
32 |
+
tokenizer.padding_side = "right"
|
33 |
+
|
34 |
+
model = AutoModelForCausalLM.from_pretrained(
|
35 |
+
MODEL_NAME,
|
36 |
+
quantization_config=quant_config,
|
37 |
+
device_map="auto"
|
38 |
+
)
|
39 |
+
|
40 |
+
set_seed(42)
|
41 |
+
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
42 |
+
attention_mask = torch.ones(inputs.shape, device="cuda")
|
43 |
+
outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
44 |
+
return tokenizer.decode(outputs[0])
|
log_utils.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Foreground colors
|
2 |
+
RED = '\033[31m'
|
3 |
+
GREEN = '\033[32m'
|
4 |
+
YELLOW = '\033[33m'
|
5 |
+
BLUE = '\033[34m'
|
6 |
+
MAGENTA = '\033[35m'
|
7 |
+
CYAN = '\033[36m'
|
8 |
+
WHITE = '\033[37m'
|
9 |
+
|
10 |
+
# Background color
|
11 |
+
BG_BLACK = '\033[40m'
|
12 |
+
BG_BLUE = '\033[44m'
|
13 |
+
|
14 |
+
# Reset code to return to default color
|
15 |
+
RESET = '\033[0m'
|
16 |
+
|
17 |
+
mapper = {
|
18 |
+
BG_BLACK+RED: "#dd0000",
|
19 |
+
BG_BLACK+GREEN: "#00dd00",
|
20 |
+
BG_BLACK+YELLOW: "#dddd00",
|
21 |
+
BG_BLACK+BLUE: "#0000ee",
|
22 |
+
BG_BLACK+MAGENTA: "#aa00dd",
|
23 |
+
BG_BLACK+CYAN: "#00dddd",
|
24 |
+
BG_BLACK+WHITE: "#87CEEB",
|
25 |
+
BG_BLUE+WHITE: "#ff7800"
|
26 |
+
}
|
27 |
+
|
28 |
+
|
29 |
+
def reformat(message):
|
30 |
+
for key, value in mapper.items():
|
31 |
+
message = message.replace(key, f'<span style="color: {value}">')
|
32 |
+
message = message.replace(RESET, '</span>')
|
33 |
+
return message
|
34 |
+
|
35 |
+
|
memory.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"deal": {
|
4 |
+
"product_description": "The Samsung Galaxy Watch Ultra is a premium 47mm LTE Titanium smartwatch designed for both style and functionality. It features a circular display made with durable materials suitable for outdoor activities, providing GPS tracking, health monitoring, and custom apps for various needs. The robust design integrates a range of smart features including notifications, music control, and heart rate tracking, making it an ideal companion for fitness enthusiasts and tech-savvy users alike.",
|
5 |
+
"price": 350.0,
|
6 |
+
"url": "https://www.dealnews.com/Samsung-Galaxy-Watch-Ultra-47-mm-LTE-Titanium-Smartwatch-up-to-350-off-w-Trade-in-free-shipping/21663266.html?iref=rss-c142"
|
7 |
+
},
|
8 |
+
"estimate": 773.8138460593241,
|
9 |
+
"discount": 423.8138460593241
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"deal": {
|
13 |
+
"product_description": "The Refurbished Unlocked Apple iPhone 14 Pro Max offers an impressive 256GB storage and a huge display, perfect for both media consumption and productivity. Enjoy advanced camera technology for stunning photos. This model is designed to provide a seamless user experience with 5G capabilities for faster downloads and streaming. Refurbished to high standards, it comes in various colors and can support all the latest apps from the App Store, accommodating any Apple enthusiast's needs.",
|
14 |
+
"price": 705.0,
|
15 |
+
"url": "https://www.dealnews.com/products/Apple/Unlocked-Apple-iPhone-14-Pro-Max-256-GB-Smartphone/462808.html?iref=rss-c142"
|
16 |
+
},
|
17 |
+
"estimate": 930.8824204895075,
|
18 |
+
"discount": 225.88242048950747
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"deal": {
|
22 |
+
"product_description": "The Certified Refurb ViewSonic WXGA DLP HDMI Projector offers a maximum resolution of 1280x800 and a high brightness of 3600 Lumens, making it suitable for various settings including home theaters and business presentations. It features multiple input options including HDMI, composite video, and two VGA inputs for versatile connectivity. Being a certified refurbished unit, it comes with a two-year warranty from Allstate, ensuring reliability and peace of mind for users. This projector is designed to deliver sharp and vibrant images, perfect for both entertainment and professional use.",
|
23 |
+
"price": 160.0,
|
24 |
+
"url": "https://www.dealnews.com/products/View-Sonic/View-Sonic-WXGA-DLP-HDMI-Projector/486990.html?iref=rss-c39"
|
25 |
+
},
|
26 |
+
"estimate": 572.7903759232101,
|
27 |
+
"discount": 412.79037592321015
|
28 |
+
}
|
29 |
+
]
|
price_is_right.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from deal_agent_framework import DealAgentFramework
|
3 |
+
from agents.deals import Opportunity, Deal
|
4 |
+
|
5 |
+
class App:
|
6 |
+
|
7 |
+
def __init__(self):
|
8 |
+
self.agent_framework = None
|
9 |
+
|
10 |
+
def run(self):
|
11 |
+
with gr.Blocks(title="The Price is Right", fill_width=True) as ui:
|
12 |
+
|
13 |
+
def table_for(opps):
|
14 |
+
return [[opp.deal.product_description, f"${opp.deal.price:.2f}", f"${opp.estimate:.2f}", f"${opp.discount:.2f}", opp.deal.url] for opp in opps]
|
15 |
+
|
16 |
+
def start():
|
17 |
+
self.agent_framework = DealAgentFramework()
|
18 |
+
self.agent_framework.init_agents_as_needed()
|
19 |
+
opportunities = self.agent_framework.memory
|
20 |
+
table = table_for(opportunities)
|
21 |
+
return table
|
22 |
+
|
23 |
+
def go():
|
24 |
+
self.agent_framework.run()
|
25 |
+
new_opportunities = self.agent_framework.memory
|
26 |
+
table = table_for(new_opportunities)
|
27 |
+
return table
|
28 |
+
|
29 |
+
def do_select(selected_index: gr.SelectData):
|
30 |
+
opportunities = self.agent_framework.memory
|
31 |
+
row = selected_index.index[0]
|
32 |
+
opportunity = opportunities[row]
|
33 |
+
self.agent_framework.planner.messenger.alert(opportunity)
|
34 |
+
|
35 |
+
with gr.Row():
|
36 |
+
gr.Markdown('<div style="text-align: center;font-size:24px">"The Price is Right" - Deal Hunting Agentic AI</div>')
|
37 |
+
with gr.Row():
|
38 |
+
gr.Markdown('<div style="text-align: center;font-size:14px">Autonomous agent framework that finds online deals, collaborating with a proprietary fine-tuned LLM deployed on Modal, and a RAG pipeline with a frontier model and Chroma.</div>')
|
39 |
+
with gr.Row():
|
40 |
+
gr.Markdown('<div style="text-align: center;font-size:14px">Deals surfaced so far:</div>')
|
41 |
+
with gr.Row():
|
42 |
+
opportunities_dataframe = gr.Dataframe(
|
43 |
+
headers=["Description", "Price", "Estimate", "Discount", "URL"],
|
44 |
+
wrap=True,
|
45 |
+
column_widths=[4, 1, 1, 1, 2],
|
46 |
+
row_count=10,
|
47 |
+
col_count=5,
|
48 |
+
max_height=400,
|
49 |
+
)
|
50 |
+
|
51 |
+
ui.load(start, inputs=[], outputs=[opportunities_dataframe])
|
52 |
+
|
53 |
+
timer = gr.Timer(value=60)
|
54 |
+
timer.tick(go, inputs=[], outputs=[opportunities_dataframe])
|
55 |
+
|
56 |
+
opportunities_dataframe.select(do_select)
|
57 |
+
|
58 |
+
ui.launch(share=False, inbrowser=True)
|
59 |
+
|
60 |
+
if __name__=="__main__":
|
61 |
+
App().run()
|
62 |
+
|
pricer_ephemeral.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import modal
|
2 |
+
from modal import App, Image
|
3 |
+
|
4 |
+
# Setup
|
5 |
+
|
6 |
+
app = modal.App("pricer")
|
7 |
+
image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate", "peft")
|
8 |
+
secrets = [modal.Secret.from_name("hf-secret")]
|
9 |
+
|
10 |
+
# Constants
|
11 |
+
|
12 |
+
GPU = "T4"
|
13 |
+
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
14 |
+
PROJECT_NAME = "pricer"
|
15 |
+
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
|
16 |
+
RUN_NAME = "2024-09-13_13.04.39"
|
17 |
+
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
|
18 |
+
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
|
19 |
+
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
|
20 |
+
|
21 |
+
|
22 |
+
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
23 |
+
def price(description: str) -> float:
|
24 |
+
import os
|
25 |
+
import re
|
26 |
+
import torch
|
27 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
28 |
+
from peft import PeftModel
|
29 |
+
|
30 |
+
QUESTION = "How much does this cost to the nearest dollar?"
|
31 |
+
PREFIX = "Price is $"
|
32 |
+
|
33 |
+
prompt = f"{QUESTION}\n{description}\n{PREFIX}"
|
34 |
+
|
35 |
+
# Quant Config
|
36 |
+
quant_config = BitsAndBytesConfig(
|
37 |
+
load_in_4bit=True,
|
38 |
+
bnb_4bit_use_double_quant=True,
|
39 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
40 |
+
bnb_4bit_quant_type="nf4"
|
41 |
+
)
|
42 |
+
|
43 |
+
# Load model and tokenizer
|
44 |
+
|
45 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
46 |
+
tokenizer.pad_token = tokenizer.eos_token
|
47 |
+
tokenizer.padding_side = "right"
|
48 |
+
|
49 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
50 |
+
BASE_MODEL,
|
51 |
+
quantization_config=quant_config,
|
52 |
+
device_map="auto"
|
53 |
+
)
|
54 |
+
|
55 |
+
fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
|
56 |
+
|
57 |
+
set_seed(42)
|
58 |
+
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
59 |
+
attention_mask = torch.ones(inputs.shape, device="cuda")
|
60 |
+
outputs = fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
61 |
+
result = tokenizer.decode(outputs[0])
|
62 |
+
|
63 |
+
contents = result.split("Price is $")[1]
|
64 |
+
contents = contents.replace(',','')
|
65 |
+
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
|
66 |
+
return float(match.group()) if match else 0
|
pricer_service.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import modal
|
2 |
+
from modal import App, Image
|
3 |
+
|
4 |
+
# Setup - define our infrastructure with code!
|
5 |
+
|
6 |
+
app = modal.App("pricer-service")
|
7 |
+
image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate", "peft")
|
8 |
+
secrets = [modal.Secret.from_name("hf-secret")]
|
9 |
+
|
10 |
+
# Constants
|
11 |
+
|
12 |
+
GPU = "T4"
|
13 |
+
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
14 |
+
PROJECT_NAME = "pricer"
|
15 |
+
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
|
16 |
+
RUN_NAME = "2024-09-13_13.04.39"
|
17 |
+
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
|
18 |
+
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
|
19 |
+
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
|
20 |
+
|
21 |
+
|
22 |
+
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
23 |
+
def price(description: str) -> float:
|
24 |
+
import os
|
25 |
+
import re
|
26 |
+
import torch
|
27 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
28 |
+
from peft import PeftModel
|
29 |
+
|
30 |
+
QUESTION = "How much does this cost to the nearest dollar?"
|
31 |
+
PREFIX = "Price is $"
|
32 |
+
|
33 |
+
prompt = f"{QUESTION}\n{description}\n{PREFIX}"
|
34 |
+
|
35 |
+
# Quant Config
|
36 |
+
quant_config = BitsAndBytesConfig(
|
37 |
+
load_in_4bit=True,
|
38 |
+
bnb_4bit_use_double_quant=True,
|
39 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
40 |
+
bnb_4bit_quant_type="nf4"
|
41 |
+
)
|
42 |
+
|
43 |
+
# Load model and tokenizer
|
44 |
+
|
45 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
46 |
+
tokenizer.pad_token = tokenizer.eos_token
|
47 |
+
tokenizer.padding_side = "right"
|
48 |
+
|
49 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
50 |
+
BASE_MODEL,
|
51 |
+
quantization_config=quant_config,
|
52 |
+
device_map="auto"
|
53 |
+
)
|
54 |
+
|
55 |
+
fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
|
56 |
+
|
57 |
+
set_seed(42)
|
58 |
+
inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
59 |
+
attention_mask = torch.ones(inputs.shape, device="cuda")
|
60 |
+
outputs = fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
61 |
+
result = tokenizer.decode(outputs[0])
|
62 |
+
|
63 |
+
contents = result.split("Price is $")[1]
|
64 |
+
contents = contents.replace(',','')
|
65 |
+
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
|
66 |
+
return float(match.group()) if match else 0
|
pricer_service2.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import modal
|
2 |
+
from modal import App, Volume, Image
|
3 |
+
|
4 |
+
# Setup - define our infrastructure with code!
|
5 |
+
|
6 |
+
app = modal.App("pricer-service")
|
7 |
+
image = Image.debian_slim().pip_install("huggingface", "torch", "transformers", "bitsandbytes", "accelerate", "peft")
|
8 |
+
secrets = [modal.Secret.from_name("hf-secret")]
|
9 |
+
|
10 |
+
# Constants
|
11 |
+
|
12 |
+
GPU = "T4"
|
13 |
+
BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
|
14 |
+
PROJECT_NAME = "pricer"
|
15 |
+
HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
|
16 |
+
RUN_NAME = "2024-09-13_13.04.39"
|
17 |
+
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
|
18 |
+
REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
|
19 |
+
FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
|
20 |
+
MODEL_DIR = "hf-cache/"
|
21 |
+
BASE_DIR = MODEL_DIR + BASE_MODEL
|
22 |
+
FINETUNED_DIR = MODEL_DIR + FINETUNED_MODEL
|
23 |
+
|
24 |
+
QUESTION = "How much does this cost to the nearest dollar?"
|
25 |
+
PREFIX = "Price is $"
|
26 |
+
|
27 |
+
|
28 |
+
@app.cls(image=image, secrets=secrets, gpu=GPU, timeout=1800)
|
29 |
+
class Pricer:
|
30 |
+
@modal.build()
|
31 |
+
def download_model_to_folder(self):
|
32 |
+
from huggingface_hub import snapshot_download
|
33 |
+
import os
|
34 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
35 |
+
snapshot_download(BASE_MODEL, local_dir=BASE_DIR)
|
36 |
+
snapshot_download(FINETUNED_MODEL, revision=REVISION, local_dir=FINETUNED_DIR)
|
37 |
+
|
38 |
+
@modal.enter()
|
39 |
+
def setup(self):
|
40 |
+
import os
|
41 |
+
import torch
|
42 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
43 |
+
from peft import PeftModel
|
44 |
+
|
45 |
+
# Quant Config
|
46 |
+
quant_config = BitsAndBytesConfig(
|
47 |
+
load_in_4bit=True,
|
48 |
+
bnb_4bit_use_double_quant=True,
|
49 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
50 |
+
bnb_4bit_quant_type="nf4"
|
51 |
+
)
|
52 |
+
|
53 |
+
# Load model and tokenizer
|
54 |
+
|
55 |
+
self.tokenizer = AutoTokenizer.from_pretrained(BASE_DIR)
|
56 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
57 |
+
self.tokenizer.padding_side = "right"
|
58 |
+
|
59 |
+
self.base_model = AutoModelForCausalLM.from_pretrained(
|
60 |
+
BASE_DIR,
|
61 |
+
quantization_config=quant_config,
|
62 |
+
device_map="auto"
|
63 |
+
)
|
64 |
+
|
65 |
+
self.fine_tuned_model = PeftModel.from_pretrained(self.base_model, FINETUNED_DIR, revision=REVISION)
|
66 |
+
|
67 |
+
@modal.method()
|
68 |
+
def price(self, description: str) -> float:
|
69 |
+
import os
|
70 |
+
import re
|
71 |
+
import torch
|
72 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
|
73 |
+
from peft import PeftModel
|
74 |
+
|
75 |
+
set_seed(42)
|
76 |
+
prompt = f"{QUESTION}\n\n{description}\n\n{PREFIX}"
|
77 |
+
inputs = self.tokenizer.encode(prompt, return_tensors="pt").to("cuda")
|
78 |
+
attention_mask = torch.ones(inputs.shape, device="cuda")
|
79 |
+
outputs = self.fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
|
80 |
+
result = self.tokenizer.decode(outputs[0])
|
81 |
+
|
82 |
+
contents = result.split("Price is $")[1]
|
83 |
+
contents = contents.replace(',','')
|
84 |
+
match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
|
85 |
+
return float(match.group()) if match else 0
|
86 |
+
|
87 |
+
@modal.method()
|
88 |
+
def wake_up(self) -> str:
|
89 |
+
return "ok"
|
90 |
+
|
products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:738e91710f5850eaeb33f48b0e38ba324ef7318e0bd642fe7010ebfaeb1b03b1
|
3 |
+
size 33520000
|
products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d68f3ac77e630ef88ffe98166d95cb4cec2d95ca2eaef84862265ce764550b3
|
3 |
+
size 100
|
products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0b66426908857b8eecd6add2e313137799630458b2ae5f8b764d52a37848ec8
|
3 |
+
size 607037
|
products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:206e5f58a9c2bbb44d5b7e6749a7df3f212f8e2ad2efc18744a2ef8cdd940040
|
3 |
+
size 80000
|
products_vectorstore/bc7562b6-30b1-424d-88bb-155673482d56/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85fa996a0f5e5fa8b732ba9462410f4329590927e4070b823aaa5e2511fcede6
|
3 |
+
size 172072
|
products_vectorstore/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3b87458085f483aa4d71440456c03ad4f31df45b1c33e05fe5435ab243a8130
|
3 |
+
size 104968192
|
random_forest_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46dac3dd274d8d7960e4dbb2657d8eeb33675de7356a432c041a41ff067ab360
|
3 |
+
size 652222049
|
requirements.txt
ADDED
Binary file (14.2 kB). View file
|
|
test.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7262b5e575ae404f239cd46db6238e33114ecde79bbed68c44ab32bbfa16e13
|
3 |
+
size 3073425
|
testing.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
|
4 |
+
GREEN = "\033[92m"
|
5 |
+
YELLOW = "\033[93m"
|
6 |
+
RED = "\033[91m"
|
7 |
+
RESET = "\033[0m"
|
8 |
+
COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
|
9 |
+
|
10 |
+
class Tester:
|
11 |
+
|
12 |
+
def __init__(self, predictor, data, title=None, size=250):
|
13 |
+
self.predictor = predictor
|
14 |
+
self.data = data
|
15 |
+
self.title = title or predictor.__name__.replace("_", " ").title()
|
16 |
+
self.size = size
|
17 |
+
self.guesses = []
|
18 |
+
self.truths = []
|
19 |
+
self.errors = []
|
20 |
+
self.sles = []
|
21 |
+
self.colors = []
|
22 |
+
|
23 |
+
def color_for(self, error, truth):
|
24 |
+
if error<40 or error/truth < 0.2:
|
25 |
+
return "green"
|
26 |
+
elif error<80 or error/truth < 0.4:
|
27 |
+
return "orange"
|
28 |
+
else:
|
29 |
+
return "red"
|
30 |
+
|
31 |
+
def run_datapoint(self, i):
|
32 |
+
datapoint = self.data[i]
|
33 |
+
guess = self.predictor(datapoint)
|
34 |
+
truth = datapoint.price
|
35 |
+
error = abs(guess - truth)
|
36 |
+
log_error = math.log(truth+1) - math.log(guess+1)
|
37 |
+
sle = log_error ** 2
|
38 |
+
color = self.color_for(error, truth)
|
39 |
+
title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
|
40 |
+
self.guesses.append(guess)
|
41 |
+
self.truths.append(truth)
|
42 |
+
self.errors.append(error)
|
43 |
+
self.sles.append(sle)
|
44 |
+
self.colors.append(color)
|
45 |
+
print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
|
46 |
+
|
47 |
+
def chart(self, title):
|
48 |
+
max_error = max(self.errors)
|
49 |
+
plt.figure(figsize=(12, 8))
|
50 |
+
max_val = max(max(self.truths), max(self.guesses))
|
51 |
+
plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
|
52 |
+
plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
|
53 |
+
plt.xlabel('Ground Truth')
|
54 |
+
plt.ylabel('Model Estimate')
|
55 |
+
plt.xlim(0, max_val)
|
56 |
+
plt.ylim(0, max_val)
|
57 |
+
plt.title(title)
|
58 |
+
plt.show()
|
59 |
+
|
60 |
+
def report(self):
|
61 |
+
average_error = sum(self.errors) / self.size
|
62 |
+
rmsle = math.sqrt(sum(self.sles) / self.size)
|
63 |
+
hits = sum(1 for color in self.colors if color=="green")
|
64 |
+
title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
|
65 |
+
self.chart(title)
|
66 |
+
|
67 |
+
def run(self):
|
68 |
+
self.error = 0
|
69 |
+
for i in range(self.size):
|
70 |
+
self.run_datapoint(i)
|
71 |
+
self.report()
|
72 |
+
|
73 |
+
@classmethod
|
74 |
+
def test(cls, function, data):
|
75 |
+
cls(function, data).run()
|