Commit
·
a38c422
1
Parent(s):
160a197
updates
Browse files- __pycache__/app.cpython-310.pyc +0 -0
- app.py +52 -129
__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (21 kB). View file
|
|
|
app.py
CHANGED
|
@@ -5,9 +5,7 @@ import asyncio
|
|
| 5 |
from typing import List
|
| 6 |
import time
|
| 7 |
from functools import lru_cache
|
| 8 |
-
import requests
|
| 9 |
import json
|
| 10 |
-
import re
|
| 11 |
from datetime import datetime, timedelta
|
| 12 |
import threading
|
| 13 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
@@ -59,6 +57,14 @@ GPU_SPECS = {
|
|
| 59 |
"RTX 4080 Super": {"memory_gb": 16, "compute_capability": "8.9", "tflops_fp32": 52.2, "category": "Consumer"},
|
| 60 |
"RTX 4090": {"memory_gb": 24, "compute_capability": "8.9", "tflops_fp32": 83.0, "category": "Consumer"},
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# Professional/Workstation RTX A Series (Ampere) - SM_8.6
|
| 63 |
"RTX A2000": {"memory_gb": 12, "compute_capability": "8.6", "tflops_fp32": 8.0, "category": "Workstation"},
|
| 64 |
"RTX A4000": {"memory_gb": 16, "compute_capability": "8.6", "tflops_fp32": 19.2, "category": "Workstation"},
|
|
@@ -81,7 +87,7 @@ GPU_SPECS = {
|
|
| 81 |
"H200 141GB": {"memory_gb": 141, "compute_capability": "9.0", "tflops_fp32": 67.0, "category": "Datacenter"},
|
| 82 |
|
| 83 |
# Datacenter B200 (Blackwell) - SM_10.0
|
| 84 |
-
"B200
|
| 85 |
|
| 86 |
# Datacenter L40/L40S (Ada Lovelace) - SM_8.9
|
| 87 |
"L40": {"memory_gb": 48, "compute_capability": "8.9", "tflops_fp32": 91.6, "category": "Datacenter"},
|
|
@@ -96,9 +102,9 @@ def fetch_single_gpu_price(gpu_name):
|
|
| 96 |
"""Fetch price for a single GPU (used in parallel)"""
|
| 97 |
try:
|
| 98 |
print(f"Fetching price for {gpu_name}...")
|
| 99 |
-
price =
|
| 100 |
if price:
|
| 101 |
-
print(f"
|
| 102 |
return gpu_name, price
|
| 103 |
else:
|
| 104 |
print(f"✗ No price found for {gpu_name}, using fallback")
|
|
@@ -109,7 +115,7 @@ def fetch_single_gpu_price(gpu_name):
|
|
| 109 |
|
| 110 |
def preload_gpu_prices():
|
| 111 |
"""Pre-fetch all GPU prices in parallel on startup"""
|
| 112 |
-
print("
|
| 113 |
start_time = time.time()
|
| 114 |
|
| 115 |
# Get list of GPUs to price
|
|
@@ -133,8 +139,8 @@ def preload_gpu_prices():
|
|
| 133 |
|
| 134 |
end_time = time.time()
|
| 135 |
total_time = end_time - start_time
|
| 136 |
-
print(f"
|
| 137 |
-
print(f"
|
| 138 |
|
| 139 |
def start_price_preloading():
|
| 140 |
"""Start price preloading in background thread"""
|
|
@@ -144,10 +150,10 @@ def start_price_preloading():
|
|
| 144 |
# Start preloading in background
|
| 145 |
preload_thread = threading.Thread(target=preload_worker, daemon=True)
|
| 146 |
preload_thread.start()
|
| 147 |
-
print("
|
| 148 |
|
| 149 |
-
def
|
| 150 |
-
"""
|
| 151 |
current_time = datetime.now()
|
| 152 |
|
| 153 |
# Check cache first
|
|
@@ -157,103 +163,19 @@ def get_gpu_price_from_multiple_sources(gpu_name):
|
|
| 157 |
if current_time - cached_data["timestamp"] < PRICE_CACHE_DURATION:
|
| 158 |
return cached_data["price"]
|
| 159 |
|
| 160 |
-
price =
|
| 161 |
-
|
| 162 |
-
try:
|
| 163 |
-
gpu_specs = GPU_SPECS.get(gpu_name, {})
|
| 164 |
-
gpu_category = gpu_specs.get("category", "Consumer")
|
| 165 |
-
|
| 166 |
-
if gpu_category == "Datacenter":
|
| 167 |
-
price = get_fallback_price(gpu_name)
|
| 168 |
-
else:
|
| 169 |
-
price = fetch_newegg_price(gpu_name)
|
| 170 |
-
if not price:
|
| 171 |
-
price = fetch_amazon_price(gpu_name)
|
| 172 |
-
if not price:
|
| 173 |
-
price = get_fallback_price(gpu_name)
|
| 174 |
-
|
| 175 |
-
except Exception as e:
|
| 176 |
-
print(f"Error fetching price for {gpu_name}: {e}")
|
| 177 |
-
price = get_fallback_price(gpu_name)
|
| 178 |
|
| 179 |
# Cache the result
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
}
|
| 185 |
|
| 186 |
return price
|
| 187 |
|
| 188 |
-
def fetch_newegg_price(gpu_name):
|
| 189 |
-
"""Fetch price from Newegg search (simplified approach)"""
|
| 190 |
-
try:
|
| 191 |
-
# Simple approach: search for GPU and extract price patterns
|
| 192 |
-
search_term = gpu_name.replace(" ", "+")
|
| 193 |
-
url = f"https://www.newegg.com/p/pl?d={search_term}"
|
| 194 |
-
|
| 195 |
-
headers = {
|
| 196 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 197 |
-
}
|
| 198 |
-
|
| 199 |
-
response = requests.get(url, headers=headers, timeout=2)
|
| 200 |
-
if response.status_code == 200:
|
| 201 |
-
# Look for price patterns in the HTML
|
| 202 |
-
price_patterns = [
|
| 203 |
-
r'\$([0-9,]+\.?\d*)',
|
| 204 |
-
r'price.*?(\d+[,.]?\d*)',
|
| 205 |
-
r'(\d{3,4})\.\d{2}'
|
| 206 |
-
]
|
| 207 |
-
|
| 208 |
-
for pattern in price_patterns:
|
| 209 |
-
matches = re.findall(pattern, response.text)
|
| 210 |
-
if matches:
|
| 211 |
-
# Get the first reasonable price (between $200-$3000)
|
| 212 |
-
for match in matches:
|
| 213 |
-
try:
|
| 214 |
-
price = float(match.replace(',', ''))
|
| 215 |
-
if 200 <= price <= 3000:
|
| 216 |
-
return price
|
| 217 |
-
except:
|
| 218 |
-
continue
|
| 219 |
-
except:
|
| 220 |
-
pass
|
| 221 |
-
return None
|
| 222 |
-
|
| 223 |
-
def fetch_amazon_price(gpu_name):
|
| 224 |
-
"""Fetch price from Amazon search (simplified approach)"""
|
| 225 |
-
try:
|
| 226 |
-
search_term = gpu_name.replace(" ", "+")
|
| 227 |
-
url = f"https://www.amazon.com/s?k={search_term}+graphics+card"
|
| 228 |
-
|
| 229 |
-
headers = {
|
| 230 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 231 |
-
}
|
| 232 |
-
|
| 233 |
-
response = requests.get(url, headers=headers, timeout=2)
|
| 234 |
-
if response.status_code == 200:
|
| 235 |
-
# Look for Amazon price patterns
|
| 236 |
-
price_patterns = [
|
| 237 |
-
r'\$([0-9,]+\.?\d*)',
|
| 238 |
-
r'a-price-whole.*?(\d+)',
|
| 239 |
-
]
|
| 240 |
-
|
| 241 |
-
for pattern in price_patterns:
|
| 242 |
-
matches = re.findall(pattern, response.text)
|
| 243 |
-
if matches:
|
| 244 |
-
for match in matches:
|
| 245 |
-
try:
|
| 246 |
-
price = float(match.replace(',', ''))
|
| 247 |
-
if 200 <= price <= 3000:
|
| 248 |
-
return price
|
| 249 |
-
except:
|
| 250 |
-
continue
|
| 251 |
-
except:
|
| 252 |
-
pass
|
| 253 |
-
return None
|
| 254 |
|
| 255 |
def get_fallback_price(gpu_name):
|
| 256 |
-
"""
|
| 257 |
fallback_prices = {
|
| 258 |
# Consumer RTX 30 Series
|
| 259 |
"RTX 3060": 280,
|
|
@@ -277,6 +199,14 @@ def get_fallback_price(gpu_name):
|
|
| 277 |
"RTX 4080 Super": 880,
|
| 278 |
"RTX 4090": 1500,
|
| 279 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
# Professional/Workstation GPUs
|
| 281 |
"RTX A2000": 650,
|
| 282 |
"RTX A4000": 1200,
|
|
@@ -285,15 +215,15 @@ def get_fallback_price(gpu_name):
|
|
| 285 |
"RTX A6000": 4500,
|
| 286 |
"RTX 6000 Ada": 6800,
|
| 287 |
|
| 288 |
-
# Datacenter GPUs (
|
| 289 |
"A100 40GB": 12000,
|
| 290 |
"A100 80GB": 15000,
|
| 291 |
-
"H100 80GB":
|
| 292 |
-
"H100 94GB":
|
| 293 |
-
"H200 141GB":
|
| 294 |
-
"B200
|
| 295 |
-
"L40":
|
| 296 |
-
"L40S":
|
| 297 |
}
|
| 298 |
return fallback_prices.get(gpu_name, 1000)
|
| 299 |
|
|
@@ -677,11 +607,6 @@ def calculate_multi_gpu_configs(total_memory_needed, suitable_gpus):
|
|
| 677 |
else:
|
| 678 |
config_name = f"{count}x {gpu['name']} (TP={count})"
|
| 679 |
|
| 680 |
-
category_emoji = {
|
| 681 |
-
"Consumer": "🎮",
|
| 682 |
-
"Workstation": "🏢",
|
| 683 |
-
"Datacenter": "🏭"
|
| 684 |
-
}.get(gpu.get("category", "Consumer"), "🎮")
|
| 685 |
|
| 686 |
multi_gpu_configs.append({
|
| 687 |
"config": config_name,
|
|
@@ -691,7 +616,6 @@ def calculate_multi_gpu_configs(total_memory_needed, suitable_gpus):
|
|
| 691 |
"utilization": utilization,
|
| 692 |
"total_cost": total_cost,
|
| 693 |
"cost_per_tflop": cost_per_tflop_total,
|
| 694 |
-
"category_emoji": category_emoji,
|
| 695 |
"base_gpu": gpu
|
| 696 |
})
|
| 697 |
|
|
@@ -724,7 +648,7 @@ def recommend_gpus(kv_cache_size_gb, config=None, dtype="fp16/bf16", ctx_len=128
|
|
| 724 |
|
| 725 |
for gpu_name, specs in GPU_SPECS.items():
|
| 726 |
# Get real-time price (will use cache if available)
|
| 727 |
-
current_price =
|
| 728 |
if current_price:
|
| 729 |
cost_per_tflop = current_price / specs["tflops_fp32"]
|
| 730 |
all_gpus.append({
|
|
@@ -758,23 +682,22 @@ def recommend_gpus(kv_cache_size_gb, config=None, dtype="fp16/bf16", ctx_len=128
|
|
| 758 |
# Format recommendations
|
| 759 |
recommendations = []
|
| 760 |
for i, config in enumerate(multi_gpu_configs):
|
| 761 |
-
|
| 762 |
-
rank = rank_icons[i] if i < len(rank_icons) else "💎"
|
| 763 |
|
| 764 |
-
price_source = "
|
| 765 |
|
| 766 |
# Format configuration display
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
memory_display = f"{config['total_memory_gb']:.0f} GB ({config['utilization']:.0f}% util)"
|
| 773 |
|
| 774 |
recommendations.append([
|
| 775 |
config_display,
|
| 776 |
-
f"{
|
| 777 |
-
f"{
|
|
|
|
| 778 |
])
|
| 779 |
|
| 780 |
return recommendations
|
|
@@ -826,9 +749,9 @@ with gr.Blocks(title="KV Cache Calculator", theme=gr.themes.Soft()) as demo:
|
|
| 826 |
)
|
| 827 |
|
| 828 |
gpu_recommendations = gr.Dataframe(
|
| 829 |
-
label="
|
| 830 |
-
headers=["Configuration", "
|
| 831 |
-
datatype=["str", "str", "str"],
|
| 832 |
wrap=False,
|
| 833 |
visible=False
|
| 834 |
)
|
|
|
|
| 5 |
from typing import List
|
| 6 |
import time
|
| 7 |
from functools import lru_cache
|
|
|
|
| 8 |
import json
|
|
|
|
| 9 |
from datetime import datetime, timedelta
|
| 10 |
import threading
|
| 11 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
| 57 |
"RTX 4080 Super": {"memory_gb": 16, "compute_capability": "8.9", "tflops_fp32": 52.2, "category": "Consumer"},
|
| 58 |
"RTX 4090": {"memory_gb": 24, "compute_capability": "8.9", "tflops_fp32": 83.0, "category": "Consumer"},
|
| 59 |
|
| 60 |
+
# Consumer RTX 50 Series (Blackwell - GB202/GB203/GB205/GB206/GB207) - SM_10.0
|
| 61 |
+
"RTX 5060": {"memory_gb": 8, "compute_capability": "10.0", "tflops_fp32": 18.5, "category": "Consumer"},
|
| 62 |
+
"RTX 5060 Ti": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 28.2, "category": "Consumer"},
|
| 63 |
+
"RTX 5070": {"memory_gb": 12, "compute_capability": "10.0", "tflops_fp32": 35.1, "category": "Consumer"},
|
| 64 |
+
"RTX 5070 Ti": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 48.3, "category": "Consumer"},
|
| 65 |
+
"RTX 5080": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 60.5, "category": "Consumer"},
|
| 66 |
+
"RTX 5090": {"memory_gb": 32, "compute_capability": "10.0", "tflops_fp32": 125.0, "category": "Consumer"},
|
| 67 |
+
|
| 68 |
# Professional/Workstation RTX A Series (Ampere) - SM_8.6
|
| 69 |
"RTX A2000": {"memory_gb": 12, "compute_capability": "8.6", "tflops_fp32": 8.0, "category": "Workstation"},
|
| 70 |
"RTX A4000": {"memory_gb": 16, "compute_capability": "8.6", "tflops_fp32": 19.2, "category": "Workstation"},
|
|
|
|
| 87 |
"H200 141GB": {"memory_gb": 141, "compute_capability": "9.0", "tflops_fp32": 67.0, "category": "Datacenter"},
|
| 88 |
|
| 89 |
# Datacenter B200 (Blackwell) - SM_10.0
|
| 90 |
+
"B200 180GB": {"memory_gb": 180, "compute_capability": "10.0", "tflops_fp32": 80.0, "category": "Datacenter"},
|
| 91 |
|
| 92 |
# Datacenter L40/L40S (Ada Lovelace) - SM_8.9
|
| 93 |
"L40": {"memory_gb": 48, "compute_capability": "8.9", "tflops_fp32": 91.6, "category": "Datacenter"},
|
|
|
|
| 102 |
"""Fetch price for a single GPU (used in parallel)"""
|
| 103 |
try:
|
| 104 |
print(f"Fetching price for {gpu_name}...")
|
| 105 |
+
price = get_gpu_price(gpu_name)
|
| 106 |
if price:
|
| 107 |
+
print(f"Found price for {gpu_name}: ${price}")
|
| 108 |
return gpu_name, price
|
| 109 |
else:
|
| 110 |
print(f"✗ No price found for {gpu_name}, using fallback")
|
|
|
|
| 115 |
|
| 116 |
def preload_gpu_prices():
|
| 117 |
"""Pre-fetch all GPU prices in parallel on startup"""
|
| 118 |
+
print("Pre-loading GPU prices...")
|
| 119 |
start_time = time.time()
|
| 120 |
|
| 121 |
# Get list of GPUs to price
|
|
|
|
| 139 |
|
| 140 |
end_time = time.time()
|
| 141 |
total_time = end_time - start_time
|
| 142 |
+
print(f"Loaded prices for {len(gpu_names)} GPUs in {total_time:.1f} seconds")
|
| 143 |
+
print(f"Cache contains {len(price_cache)} price entries")
|
| 144 |
|
| 145 |
def start_price_preloading():
|
| 146 |
"""Start price preloading in background thread"""
|
|
|
|
| 150 |
# Start preloading in background
|
| 151 |
preload_thread = threading.Thread(target=preload_worker, daemon=True)
|
| 152 |
preload_thread.start()
|
| 153 |
+
print("Price preloading started in background...")
|
| 154 |
|
| 155 |
+
def get_gpu_price(gpu_name):
|
| 156 |
+
"""Get GPU price from curated pricing data"""
|
| 157 |
current_time = datetime.now()
|
| 158 |
|
| 159 |
# Check cache first
|
|
|
|
| 163 |
if current_time - cached_data["timestamp"] < PRICE_CACHE_DURATION:
|
| 164 |
return cached_data["price"]
|
| 165 |
|
| 166 |
+
price = get_fallback_price(gpu_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
# Cache the result
|
| 169 |
+
price_cache[cache_key] = {
|
| 170 |
+
"price": price,
|
| 171 |
+
"timestamp": current_time
|
| 172 |
+
}
|
|
|
|
| 173 |
|
| 174 |
return price
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
def get_fallback_price(gpu_name):
|
| 178 |
+
"""Curated GPU pricing data"""
|
| 179 |
fallback_prices = {
|
| 180 |
# Consumer RTX 30 Series
|
| 181 |
"RTX 3060": 280,
|
|
|
|
| 199 |
"RTX 4080 Super": 880,
|
| 200 |
"RTX 4090": 1500,
|
| 201 |
|
| 202 |
+
# Consumer RTX 50 Series (Expected pricing)
|
| 203 |
+
"RTX 5060": 400,
|
| 204 |
+
"RTX 5060 Ti": 600,
|
| 205 |
+
"RTX 5070": 800,
|
| 206 |
+
"RTX 5070 Ti": 1000,
|
| 207 |
+
"RTX 5080": 1200,
|
| 208 |
+
"RTX 5090": 2000,
|
| 209 |
+
|
| 210 |
# Professional/Workstation GPUs
|
| 211 |
"RTX A2000": 650,
|
| 212 |
"RTX A4000": 1200,
|
|
|
|
| 215 |
"RTX A6000": 4500,
|
| 216 |
"RTX 6000 Ada": 6800,
|
| 217 |
|
| 218 |
+
# Datacenter GPUs (current enterprise pricing)
|
| 219 |
"A100 40GB": 12000,
|
| 220 |
"A100 80GB": 15000,
|
| 221 |
+
"H100 80GB": 30000,
|
| 222 |
+
"H100 94GB": 35000,
|
| 223 |
+
"H200 141GB": 40000,
|
| 224 |
+
"B200 180GB": 50000,
|
| 225 |
+
"L40": 9000,
|
| 226 |
+
"L40S": 10000,
|
| 227 |
}
|
| 228 |
return fallback_prices.get(gpu_name, 1000)
|
| 229 |
|
|
|
|
| 607 |
else:
|
| 608 |
config_name = f"{count}x {gpu['name']} (TP={count})"
|
| 609 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
|
| 611 |
multi_gpu_configs.append({
|
| 612 |
"config": config_name,
|
|
|
|
| 616 |
"utilization": utilization,
|
| 617 |
"total_cost": total_cost,
|
| 618 |
"cost_per_tflop": cost_per_tflop_total,
|
|
|
|
| 619 |
"base_gpu": gpu
|
| 620 |
})
|
| 621 |
|
|
|
|
| 648 |
|
| 649 |
for gpu_name, specs in GPU_SPECS.items():
|
| 650 |
# Get real-time price (will use cache if available)
|
| 651 |
+
current_price = get_gpu_price(gpu_name)
|
| 652 |
if current_price:
|
| 653 |
cost_per_tflop = current_price / specs["tflops_fp32"]
|
| 654 |
all_gpus.append({
|
|
|
|
| 682 |
# Format recommendations
|
| 683 |
recommendations = []
|
| 684 |
for i, config in enumerate(multi_gpu_configs):
|
| 685 |
+
rank = f"#{i+1}"
|
|
|
|
| 686 |
|
| 687 |
+
price_source = "Live" if config["base_gpu"]["name"].lower().replace(" ", "_") in price_cache else "Est"
|
| 688 |
|
| 689 |
# Format configuration display
|
| 690 |
+
config_display = f"{rank} {config['config']}"
|
| 691 |
+
|
| 692 |
+
# Calculate FLOP/dollar (TFLOPS per dollar)
|
| 693 |
+
total_tflops = config["base_gpu"]["tflops_fp32"] * config["gpu_count"]
|
| 694 |
+
flops_per_dollar = total_tflops / config['total_cost']
|
|
|
|
| 695 |
|
| 696 |
recommendations.append([
|
| 697 |
config_display,
|
| 698 |
+
f"{flops_per_dollar:.3f}",
|
| 699 |
+
f"{total_memory_needed:.1f}GB",
|
| 700 |
+
f"${config['total_cost']:.0f}"
|
| 701 |
])
|
| 702 |
|
| 703 |
return recommendations
|
|
|
|
| 749 |
)
|
| 750 |
|
| 751 |
gpu_recommendations = gr.Dataframe(
|
| 752 |
+
label="GPU Recommendations",
|
| 753 |
+
headers=["Configuration", "TFLOPS/$", "Memory", "Price"],
|
| 754 |
+
datatype=["str", "str", "str", "str"],
|
| 755 |
wrap=False,
|
| 756 |
visible=False
|
| 757 |
)
|