Commit
·
a38c422
1
Parent(s):
160a197
updates
Browse files- __pycache__/app.cpython-310.pyc +0 -0
- app.py +52 -129
__pycache__/app.cpython-310.pyc
ADDED
Binary file (21 kB). View file
|
|
app.py
CHANGED
@@ -5,9 +5,7 @@ import asyncio
|
|
5 |
from typing import List
|
6 |
import time
|
7 |
from functools import lru_cache
|
8 |
-
import requests
|
9 |
import json
|
10 |
-
import re
|
11 |
from datetime import datetime, timedelta
|
12 |
import threading
|
13 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
@@ -59,6 +57,14 @@ GPU_SPECS = {
|
|
59 |
"RTX 4080 Super": {"memory_gb": 16, "compute_capability": "8.9", "tflops_fp32": 52.2, "category": "Consumer"},
|
60 |
"RTX 4090": {"memory_gb": 24, "compute_capability": "8.9", "tflops_fp32": 83.0, "category": "Consumer"},
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
# Professional/Workstation RTX A Series (Ampere) - SM_8.6
|
63 |
"RTX A2000": {"memory_gb": 12, "compute_capability": "8.6", "tflops_fp32": 8.0, "category": "Workstation"},
|
64 |
"RTX A4000": {"memory_gb": 16, "compute_capability": "8.6", "tflops_fp32": 19.2, "category": "Workstation"},
|
@@ -81,7 +87,7 @@ GPU_SPECS = {
|
|
81 |
"H200 141GB": {"memory_gb": 141, "compute_capability": "9.0", "tflops_fp32": 67.0, "category": "Datacenter"},
|
82 |
|
83 |
# Datacenter B200 (Blackwell) - SM_10.0
|
84 |
-
"B200
|
85 |
|
86 |
# Datacenter L40/L40S (Ada Lovelace) - SM_8.9
|
87 |
"L40": {"memory_gb": 48, "compute_capability": "8.9", "tflops_fp32": 91.6, "category": "Datacenter"},
|
@@ -96,9 +102,9 @@ def fetch_single_gpu_price(gpu_name):
|
|
96 |
"""Fetch price for a single GPU (used in parallel)"""
|
97 |
try:
|
98 |
print(f"Fetching price for {gpu_name}...")
|
99 |
-
price =
|
100 |
if price:
|
101 |
-
print(f"
|
102 |
return gpu_name, price
|
103 |
else:
|
104 |
print(f"✗ No price found for {gpu_name}, using fallback")
|
@@ -109,7 +115,7 @@ def fetch_single_gpu_price(gpu_name):
|
|
109 |
|
110 |
def preload_gpu_prices():
|
111 |
"""Pre-fetch all GPU prices in parallel on startup"""
|
112 |
-
print("
|
113 |
start_time = time.time()
|
114 |
|
115 |
# Get list of GPUs to price
|
@@ -133,8 +139,8 @@ def preload_gpu_prices():
|
|
133 |
|
134 |
end_time = time.time()
|
135 |
total_time = end_time - start_time
|
136 |
-
print(f"
|
137 |
-
print(f"
|
138 |
|
139 |
def start_price_preloading():
|
140 |
"""Start price preloading in background thread"""
|
@@ -144,10 +150,10 @@ def start_price_preloading():
|
|
144 |
# Start preloading in background
|
145 |
preload_thread = threading.Thread(target=preload_worker, daemon=True)
|
146 |
preload_thread.start()
|
147 |
-
print("
|
148 |
|
149 |
-
def
|
150 |
-
"""
|
151 |
current_time = datetime.now()
|
152 |
|
153 |
# Check cache first
|
@@ -157,103 +163,19 @@ def get_gpu_price_from_multiple_sources(gpu_name):
|
|
157 |
if current_time - cached_data["timestamp"] < PRICE_CACHE_DURATION:
|
158 |
return cached_data["price"]
|
159 |
|
160 |
-
price =
|
161 |
-
|
162 |
-
try:
|
163 |
-
gpu_specs = GPU_SPECS.get(gpu_name, {})
|
164 |
-
gpu_category = gpu_specs.get("category", "Consumer")
|
165 |
-
|
166 |
-
if gpu_category == "Datacenter":
|
167 |
-
price = get_fallback_price(gpu_name)
|
168 |
-
else:
|
169 |
-
price = fetch_newegg_price(gpu_name)
|
170 |
-
if not price:
|
171 |
-
price = fetch_amazon_price(gpu_name)
|
172 |
-
if not price:
|
173 |
-
price = get_fallback_price(gpu_name)
|
174 |
-
|
175 |
-
except Exception as e:
|
176 |
-
print(f"Error fetching price for {gpu_name}: {e}")
|
177 |
-
price = get_fallback_price(gpu_name)
|
178 |
|
179 |
# Cache the result
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
}
|
185 |
|
186 |
return price
|
187 |
|
188 |
-
def fetch_newegg_price(gpu_name):
|
189 |
-
"""Fetch price from Newegg search (simplified approach)"""
|
190 |
-
try:
|
191 |
-
# Simple approach: search for GPU and extract price patterns
|
192 |
-
search_term = gpu_name.replace(" ", "+")
|
193 |
-
url = f"https://www.newegg.com/p/pl?d={search_term}"
|
194 |
-
|
195 |
-
headers = {
|
196 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
197 |
-
}
|
198 |
-
|
199 |
-
response = requests.get(url, headers=headers, timeout=2)
|
200 |
-
if response.status_code == 200:
|
201 |
-
# Look for price patterns in the HTML
|
202 |
-
price_patterns = [
|
203 |
-
r'\$([0-9,]+\.?\d*)',
|
204 |
-
r'price.*?(\d+[,.]?\d*)',
|
205 |
-
r'(\d{3,4})\.\d{2}'
|
206 |
-
]
|
207 |
-
|
208 |
-
for pattern in price_patterns:
|
209 |
-
matches = re.findall(pattern, response.text)
|
210 |
-
if matches:
|
211 |
-
# Get the first reasonable price (between $200-$3000)
|
212 |
-
for match in matches:
|
213 |
-
try:
|
214 |
-
price = float(match.replace(',', ''))
|
215 |
-
if 200 <= price <= 3000:
|
216 |
-
return price
|
217 |
-
except:
|
218 |
-
continue
|
219 |
-
except:
|
220 |
-
pass
|
221 |
-
return None
|
222 |
-
|
223 |
-
def fetch_amazon_price(gpu_name):
|
224 |
-
"""Fetch price from Amazon search (simplified approach)"""
|
225 |
-
try:
|
226 |
-
search_term = gpu_name.replace(" ", "+")
|
227 |
-
url = f"https://www.amazon.com/s?k={search_term}+graphics+card"
|
228 |
-
|
229 |
-
headers = {
|
230 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
231 |
-
}
|
232 |
-
|
233 |
-
response = requests.get(url, headers=headers, timeout=2)
|
234 |
-
if response.status_code == 200:
|
235 |
-
# Look for Amazon price patterns
|
236 |
-
price_patterns = [
|
237 |
-
r'\$([0-9,]+\.?\d*)',
|
238 |
-
r'a-price-whole.*?(\d+)',
|
239 |
-
]
|
240 |
-
|
241 |
-
for pattern in price_patterns:
|
242 |
-
matches = re.findall(pattern, response.text)
|
243 |
-
if matches:
|
244 |
-
for match in matches:
|
245 |
-
try:
|
246 |
-
price = float(match.replace(',', ''))
|
247 |
-
if 200 <= price <= 3000:
|
248 |
-
return price
|
249 |
-
except:
|
250 |
-
continue
|
251 |
-
except:
|
252 |
-
pass
|
253 |
-
return None
|
254 |
|
255 |
def get_fallback_price(gpu_name):
|
256 |
-
"""
|
257 |
fallback_prices = {
|
258 |
# Consumer RTX 30 Series
|
259 |
"RTX 3060": 280,
|
@@ -277,6 +199,14 @@ def get_fallback_price(gpu_name):
|
|
277 |
"RTX 4080 Super": 880,
|
278 |
"RTX 4090": 1500,
|
279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
# Professional/Workstation GPUs
|
281 |
"RTX A2000": 650,
|
282 |
"RTX A4000": 1200,
|
@@ -285,15 +215,15 @@ def get_fallback_price(gpu_name):
|
|
285 |
"RTX A6000": 4500,
|
286 |
"RTX 6000 Ada": 6800,
|
287 |
|
288 |
-
# Datacenter GPUs (
|
289 |
"A100 40GB": 12000,
|
290 |
"A100 80GB": 15000,
|
291 |
-
"H100 80GB":
|
292 |
-
"H100 94GB":
|
293 |
-
"H200 141GB":
|
294 |
-
"B200
|
295 |
-
"L40":
|
296 |
-
"L40S":
|
297 |
}
|
298 |
return fallback_prices.get(gpu_name, 1000)
|
299 |
|
@@ -677,11 +607,6 @@ def calculate_multi_gpu_configs(total_memory_needed, suitable_gpus):
|
|
677 |
else:
|
678 |
config_name = f"{count}x {gpu['name']} (TP={count})"
|
679 |
|
680 |
-
category_emoji = {
|
681 |
-
"Consumer": "🎮",
|
682 |
-
"Workstation": "🏢",
|
683 |
-
"Datacenter": "🏭"
|
684 |
-
}.get(gpu.get("category", "Consumer"), "🎮")
|
685 |
|
686 |
multi_gpu_configs.append({
|
687 |
"config": config_name,
|
@@ -691,7 +616,6 @@ def calculate_multi_gpu_configs(total_memory_needed, suitable_gpus):
|
|
691 |
"utilization": utilization,
|
692 |
"total_cost": total_cost,
|
693 |
"cost_per_tflop": cost_per_tflop_total,
|
694 |
-
"category_emoji": category_emoji,
|
695 |
"base_gpu": gpu
|
696 |
})
|
697 |
|
@@ -724,7 +648,7 @@ def recommend_gpus(kv_cache_size_gb, config=None, dtype="fp16/bf16", ctx_len=128
|
|
724 |
|
725 |
for gpu_name, specs in GPU_SPECS.items():
|
726 |
# Get real-time price (will use cache if available)
|
727 |
-
current_price =
|
728 |
if current_price:
|
729 |
cost_per_tflop = current_price / specs["tflops_fp32"]
|
730 |
all_gpus.append({
|
@@ -758,23 +682,22 @@ def recommend_gpus(kv_cache_size_gb, config=None, dtype="fp16/bf16", ctx_len=128
|
|
758 |
# Format recommendations
|
759 |
recommendations = []
|
760 |
for i, config in enumerate(multi_gpu_configs):
|
761 |
-
|
762 |
-
rank = rank_icons[i] if i < len(rank_icons) else "💎"
|
763 |
|
764 |
-
price_source = "
|
765 |
|
766 |
# Format configuration display
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
memory_display = f"{config['total_memory_gb']:.0f} GB ({config['utilization']:.0f}% util)"
|
773 |
|
774 |
recommendations.append([
|
775 |
config_display,
|
776 |
-
f"{
|
777 |
-
f"{
|
|
|
778 |
])
|
779 |
|
780 |
return recommendations
|
@@ -826,9 +749,9 @@ with gr.Blocks(title="KV Cache Calculator", theme=gr.themes.Soft()) as demo:
|
|
826 |
)
|
827 |
|
828 |
gpu_recommendations = gr.Dataframe(
|
829 |
-
label="
|
830 |
-
headers=["Configuration", "
|
831 |
-
datatype=["str", "str", "str"],
|
832 |
wrap=False,
|
833 |
visible=False
|
834 |
)
|
|
|
5 |
from typing import List
|
6 |
import time
|
7 |
from functools import lru_cache
|
|
|
8 |
import json
|
|
|
9 |
from datetime import datetime, timedelta
|
10 |
import threading
|
11 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
57 |
"RTX 4080 Super": {"memory_gb": 16, "compute_capability": "8.9", "tflops_fp32": 52.2, "category": "Consumer"},
|
58 |
"RTX 4090": {"memory_gb": 24, "compute_capability": "8.9", "tflops_fp32": 83.0, "category": "Consumer"},
|
59 |
|
60 |
+
# Consumer RTX 50 Series (Blackwell - GB202/GB203/GB205/GB206/GB207) - SM_10.0
|
61 |
+
"RTX 5060": {"memory_gb": 8, "compute_capability": "10.0", "tflops_fp32": 18.5, "category": "Consumer"},
|
62 |
+
"RTX 5060 Ti": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 28.2, "category": "Consumer"},
|
63 |
+
"RTX 5070": {"memory_gb": 12, "compute_capability": "10.0", "tflops_fp32": 35.1, "category": "Consumer"},
|
64 |
+
"RTX 5070 Ti": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 48.3, "category": "Consumer"},
|
65 |
+
"RTX 5080": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 60.5, "category": "Consumer"},
|
66 |
+
"RTX 5090": {"memory_gb": 32, "compute_capability": "10.0", "tflops_fp32": 125.0, "category": "Consumer"},
|
67 |
+
|
68 |
# Professional/Workstation RTX A Series (Ampere) - SM_8.6
|
69 |
"RTX A2000": {"memory_gb": 12, "compute_capability": "8.6", "tflops_fp32": 8.0, "category": "Workstation"},
|
70 |
"RTX A4000": {"memory_gb": 16, "compute_capability": "8.6", "tflops_fp32": 19.2, "category": "Workstation"},
|
|
|
87 |
"H200 141GB": {"memory_gb": 141, "compute_capability": "9.0", "tflops_fp32": 67.0, "category": "Datacenter"},
|
88 |
|
89 |
# Datacenter B200 (Blackwell) - SM_10.0
|
90 |
+
"B200 180GB": {"memory_gb": 180, "compute_capability": "10.0", "tflops_fp32": 80.0, "category": "Datacenter"},
|
91 |
|
92 |
# Datacenter L40/L40S (Ada Lovelace) - SM_8.9
|
93 |
"L40": {"memory_gb": 48, "compute_capability": "8.9", "tflops_fp32": 91.6, "category": "Datacenter"},
|
|
|
102 |
"""Fetch price for a single GPU (used in parallel)"""
|
103 |
try:
|
104 |
print(f"Fetching price for {gpu_name}...")
|
105 |
+
price = get_gpu_price(gpu_name)
|
106 |
if price:
|
107 |
+
print(f"Found price for {gpu_name}: ${price}")
|
108 |
return gpu_name, price
|
109 |
else:
|
110 |
print(f"✗ No price found for {gpu_name}, using fallback")
|
|
|
115 |
|
116 |
def preload_gpu_prices():
|
117 |
"""Pre-fetch all GPU prices in parallel on startup"""
|
118 |
+
print("Pre-loading GPU prices...")
|
119 |
start_time = time.time()
|
120 |
|
121 |
# Get list of GPUs to price
|
|
|
139 |
|
140 |
end_time = time.time()
|
141 |
total_time = end_time - start_time
|
142 |
+
print(f"Loaded prices for {len(gpu_names)} GPUs in {total_time:.1f} seconds")
|
143 |
+
print(f"Cache contains {len(price_cache)} price entries")
|
144 |
|
145 |
def start_price_preloading():
|
146 |
"""Start price preloading in background thread"""
|
|
|
150 |
# Start preloading in background
|
151 |
preload_thread = threading.Thread(target=preload_worker, daemon=True)
|
152 |
preload_thread.start()
|
153 |
+
print("Price preloading started in background...")
|
154 |
|
155 |
+
def get_gpu_price(gpu_name):
|
156 |
+
"""Get GPU price from curated pricing data"""
|
157 |
current_time = datetime.now()
|
158 |
|
159 |
# Check cache first
|
|
|
163 |
if current_time - cached_data["timestamp"] < PRICE_CACHE_DURATION:
|
164 |
return cached_data["price"]
|
165 |
|
166 |
+
price = get_fallback_price(gpu_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
# Cache the result
|
169 |
+
price_cache[cache_key] = {
|
170 |
+
"price": price,
|
171 |
+
"timestamp": current_time
|
172 |
+
}
|
|
|
173 |
|
174 |
return price
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
|
177 |
def get_fallback_price(gpu_name):
|
178 |
+
"""Curated GPU pricing data"""
|
179 |
fallback_prices = {
|
180 |
# Consumer RTX 30 Series
|
181 |
"RTX 3060": 280,
|
|
|
199 |
"RTX 4080 Super": 880,
|
200 |
"RTX 4090": 1500,
|
201 |
|
202 |
+
# Consumer RTX 50 Series (Expected pricing)
|
203 |
+
"RTX 5060": 400,
|
204 |
+
"RTX 5060 Ti": 600,
|
205 |
+
"RTX 5070": 800,
|
206 |
+
"RTX 5070 Ti": 1000,
|
207 |
+
"RTX 5080": 1200,
|
208 |
+
"RTX 5090": 2000,
|
209 |
+
|
210 |
# Professional/Workstation GPUs
|
211 |
"RTX A2000": 650,
|
212 |
"RTX A4000": 1200,
|
|
|
215 |
"RTX A6000": 4500,
|
216 |
"RTX 6000 Ada": 6800,
|
217 |
|
218 |
+
# Datacenter GPUs (current enterprise pricing)
|
219 |
"A100 40GB": 12000,
|
220 |
"A100 80GB": 15000,
|
221 |
+
"H100 80GB": 30000,
|
222 |
+
"H100 94GB": 35000,
|
223 |
+
"H200 141GB": 40000,
|
224 |
+
"B200 180GB": 50000,
|
225 |
+
"L40": 9000,
|
226 |
+
"L40S": 10000,
|
227 |
}
|
228 |
return fallback_prices.get(gpu_name, 1000)
|
229 |
|
|
|
607 |
else:
|
608 |
config_name = f"{count}x {gpu['name']} (TP={count})"
|
609 |
|
|
|
|
|
|
|
|
|
|
|
610 |
|
611 |
multi_gpu_configs.append({
|
612 |
"config": config_name,
|
|
|
616 |
"utilization": utilization,
|
617 |
"total_cost": total_cost,
|
618 |
"cost_per_tflop": cost_per_tflop_total,
|
|
|
619 |
"base_gpu": gpu
|
620 |
})
|
621 |
|
|
|
648 |
|
649 |
for gpu_name, specs in GPU_SPECS.items():
|
650 |
# Get real-time price (will use cache if available)
|
651 |
+
current_price = get_gpu_price(gpu_name)
|
652 |
if current_price:
|
653 |
cost_per_tflop = current_price / specs["tflops_fp32"]
|
654 |
all_gpus.append({
|
|
|
682 |
# Format recommendations
|
683 |
recommendations = []
|
684 |
for i, config in enumerate(multi_gpu_configs):
|
685 |
+
rank = f"#{i+1}"
|
|
|
686 |
|
687 |
+
price_source = "Live" if config["base_gpu"]["name"].lower().replace(" ", "_") in price_cache else "Est"
|
688 |
|
689 |
# Format configuration display
|
690 |
+
config_display = f"{rank} {config['config']}"
|
691 |
+
|
692 |
+
# Calculate FLOP/dollar (TFLOPS per dollar)
|
693 |
+
total_tflops = config["base_gpu"]["tflops_fp32"] * config["gpu_count"]
|
694 |
+
flops_per_dollar = total_tflops / config['total_cost']
|
|
|
695 |
|
696 |
recommendations.append([
|
697 |
config_display,
|
698 |
+
f"{flops_per_dollar:.3f}",
|
699 |
+
f"{total_memory_needed:.1f}GB",
|
700 |
+
f"${config['total_cost']:.0f}"
|
701 |
])
|
702 |
|
703 |
return recommendations
|
|
|
749 |
)
|
750 |
|
751 |
gpu_recommendations = gr.Dataframe(
|
752 |
+
label="GPU Recommendations",
|
753 |
+
headers=["Configuration", "TFLOPS/$", "Memory", "Price"],
|
754 |
+
datatype=["str", "str", "str", "str"],
|
755 |
wrap=False,
|
756 |
visible=False
|
757 |
)
|