AndreHathora commited on
Commit
a38c422
·
1 Parent(s): 160a197
Files changed (2) hide show
  1. __pycache__/app.cpython-310.pyc +0 -0
  2. app.py +52 -129
__pycache__/app.cpython-310.pyc ADDED
Binary file (21 kB). View file
 
app.py CHANGED
@@ -5,9 +5,7 @@ import asyncio
5
  from typing import List
6
  import time
7
  from functools import lru_cache
8
- import requests
9
  import json
10
- import re
11
  from datetime import datetime, timedelta
12
  import threading
13
  from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -59,6 +57,14 @@ GPU_SPECS = {
59
  "RTX 4080 Super": {"memory_gb": 16, "compute_capability": "8.9", "tflops_fp32": 52.2, "category": "Consumer"},
60
  "RTX 4090": {"memory_gb": 24, "compute_capability": "8.9", "tflops_fp32": 83.0, "category": "Consumer"},
61
 
 
 
 
 
 
 
 
 
62
  # Professional/Workstation RTX A Series (Ampere) - SM_8.6
63
  "RTX A2000": {"memory_gb": 12, "compute_capability": "8.6", "tflops_fp32": 8.0, "category": "Workstation"},
64
  "RTX A4000": {"memory_gb": 16, "compute_capability": "8.6", "tflops_fp32": 19.2, "category": "Workstation"},
@@ -81,7 +87,7 @@ GPU_SPECS = {
81
  "H200 141GB": {"memory_gb": 141, "compute_capability": "9.0", "tflops_fp32": 67.0, "category": "Datacenter"},
82
 
83
  # Datacenter B200 (Blackwell) - SM_10.0
84
- "B200 192GB": {"memory_gb": 180, "compute_capability": "10.0", "tflops_fp32": 80.0, "category": "Datacenter"},
85
 
86
  # Datacenter L40/L40S (Ada Lovelace) - SM_8.9
87
  "L40": {"memory_gb": 48, "compute_capability": "8.9", "tflops_fp32": 91.6, "category": "Datacenter"},
@@ -96,9 +102,9 @@ def fetch_single_gpu_price(gpu_name):
96
  """Fetch price for a single GPU (used in parallel)"""
97
  try:
98
  print(f"Fetching price for {gpu_name}...")
99
- price = get_gpu_price_from_multiple_sources(gpu_name)
100
  if price:
101
- print(f"Found price for {gpu_name}: ${price}")
102
  return gpu_name, price
103
  else:
104
  print(f"✗ No price found for {gpu_name}, using fallback")
@@ -109,7 +115,7 @@ def fetch_single_gpu_price(gpu_name):
109
 
110
  def preload_gpu_prices():
111
  """Pre-fetch all GPU prices in parallel on startup"""
112
- print("🚀 Pre-loading GPU prices...")
113
  start_time = time.time()
114
 
115
  # Get list of GPUs to price
@@ -133,8 +139,8 @@ def preload_gpu_prices():
133
 
134
  end_time = time.time()
135
  total_time = end_time - start_time
136
- print(f"Loaded prices for {len(gpu_names)} GPUs in {total_time:.1f} seconds")
137
- print(f"💰 Cache contains {len(price_cache)} price entries")
138
 
139
  def start_price_preloading():
140
  """Start price preloading in background thread"""
@@ -144,10 +150,10 @@ def start_price_preloading():
144
  # Start preloading in background
145
  preload_thread = threading.Thread(target=preload_worker, daemon=True)
146
  preload_thread.start()
147
- print("🔄 Price preloading started in background...")
148
 
149
- def get_gpu_price_from_multiple_sources(gpu_name):
150
- """Fetch GPU price from multiple sources with fallbacks"""
151
  current_time = datetime.now()
152
 
153
  # Check cache first
@@ -157,103 +163,19 @@ def get_gpu_price_from_multiple_sources(gpu_name):
157
  if current_time - cached_data["timestamp"] < PRICE_CACHE_DURATION:
158
  return cached_data["price"]
159
 
160
- price = None
161
-
162
- try:
163
- gpu_specs = GPU_SPECS.get(gpu_name, {})
164
- gpu_category = gpu_specs.get("category", "Consumer")
165
-
166
- if gpu_category == "Datacenter":
167
- price = get_fallback_price(gpu_name)
168
- else:
169
- price = fetch_newegg_price(gpu_name)
170
- if not price:
171
- price = fetch_amazon_price(gpu_name)
172
- if not price:
173
- price = get_fallback_price(gpu_name)
174
-
175
- except Exception as e:
176
- print(f"Error fetching price for {gpu_name}: {e}")
177
- price = get_fallback_price(gpu_name)
178
 
179
  # Cache the result
180
- if price:
181
- price_cache[cache_key] = {
182
- "price": price,
183
- "timestamp": current_time
184
- }
185
 
186
  return price
187
 
188
- def fetch_newegg_price(gpu_name):
189
- """Fetch price from Newegg search (simplified approach)"""
190
- try:
191
- # Simple approach: search for GPU and extract price patterns
192
- search_term = gpu_name.replace(" ", "+")
193
- url = f"https://www.newegg.com/p/pl?d={search_term}"
194
-
195
- headers = {
196
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
197
- }
198
-
199
- response = requests.get(url, headers=headers, timeout=2)
200
- if response.status_code == 200:
201
- # Look for price patterns in the HTML
202
- price_patterns = [
203
- r'\$([0-9,]+\.?\d*)',
204
- r'price.*?(\d+[,.]?\d*)',
205
- r'(\d{3,4})\.\d{2}'
206
- ]
207
-
208
- for pattern in price_patterns:
209
- matches = re.findall(pattern, response.text)
210
- if matches:
211
- # Get the first reasonable price (between $200-$3000)
212
- for match in matches:
213
- try:
214
- price = float(match.replace(',', ''))
215
- if 200 <= price <= 3000:
216
- return price
217
- except:
218
- continue
219
- except:
220
- pass
221
- return None
222
-
223
- def fetch_amazon_price(gpu_name):
224
- """Fetch price from Amazon search (simplified approach)"""
225
- try:
226
- search_term = gpu_name.replace(" ", "+")
227
- url = f"https://www.amazon.com/s?k={search_term}+graphics+card"
228
-
229
- headers = {
230
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
231
- }
232
-
233
- response = requests.get(url, headers=headers, timeout=2)
234
- if response.status_code == 200:
235
- # Look for Amazon price patterns
236
- price_patterns = [
237
- r'\$([0-9,]+\.?\d*)',
238
- r'a-price-whole.*?(\d+)',
239
- ]
240
-
241
- for pattern in price_patterns:
242
- matches = re.findall(pattern, response.text)
243
- if matches:
244
- for match in matches:
245
- try:
246
- price = float(match.replace(',', ''))
247
- if 200 <= price <= 3000:
248
- return price
249
- except:
250
- continue
251
- except:
252
- pass
253
- return None
254
 
255
  def get_fallback_price(gpu_name):
256
- """Fallback prices based on typical market values (updated periodically)"""
257
  fallback_prices = {
258
  # Consumer RTX 30 Series
259
  "RTX 3060": 280,
@@ -277,6 +199,14 @@ def get_fallback_price(gpu_name):
277
  "RTX 4080 Super": 880,
278
  "RTX 4090": 1500,
279
 
 
 
 
 
 
 
 
 
280
  # Professional/Workstation GPUs
281
  "RTX A2000": 650,
282
  "RTX A4000": 1200,
@@ -285,15 +215,15 @@ def get_fallback_price(gpu_name):
285
  "RTX A6000": 4500,
286
  "RTX 6000 Ada": 6800,
287
 
288
- # Datacenter GPUs (estimated enterprise pricing)
289
  "A100 40GB": 12000,
290
  "A100 80GB": 15000,
291
- "H100 80GB": 28000,
292
- "H100 94GB": 32000,
293
- "H200 141GB": 35000,
294
- "B200 192GB": 45000,
295
- "L40": 8500,
296
- "L40S": 9500,
297
  }
298
  return fallback_prices.get(gpu_name, 1000)
299
 
@@ -677,11 +607,6 @@ def calculate_multi_gpu_configs(total_memory_needed, suitable_gpus):
677
  else:
678
  config_name = f"{count}x {gpu['name']} (TP={count})"
679
 
680
- category_emoji = {
681
- "Consumer": "🎮",
682
- "Workstation": "🏢",
683
- "Datacenter": "🏭"
684
- }.get(gpu.get("category", "Consumer"), "🎮")
685
 
686
  multi_gpu_configs.append({
687
  "config": config_name,
@@ -691,7 +616,6 @@ def calculate_multi_gpu_configs(total_memory_needed, suitable_gpus):
691
  "utilization": utilization,
692
  "total_cost": total_cost,
693
  "cost_per_tflop": cost_per_tflop_total,
694
- "category_emoji": category_emoji,
695
  "base_gpu": gpu
696
  })
697
 
@@ -724,7 +648,7 @@ def recommend_gpus(kv_cache_size_gb, config=None, dtype="fp16/bf16", ctx_len=128
724
 
725
  for gpu_name, specs in GPU_SPECS.items():
726
  # Get real-time price (will use cache if available)
727
- current_price = get_gpu_price_from_multiple_sources(gpu_name)
728
  if current_price:
729
  cost_per_tflop = current_price / specs["tflops_fp32"]
730
  all_gpus.append({
@@ -758,23 +682,22 @@ def recommend_gpus(kv_cache_size_gb, config=None, dtype="fp16/bf16", ctx_len=128
758
  # Format recommendations
759
  recommendations = []
760
  for i, config in enumerate(multi_gpu_configs):
761
- rank_icons = ["🥇", "🥈", "🥉", "🏅", "⭐", "💫", "🌟", "✨"]
762
- rank = rank_icons[i] if i < len(rank_icons) else "💎"
763
 
764
- price_source = "💲 Live" if config["base_gpu"]["name"].lower().replace(" ", "_") in price_cache else "📊 Est"
765
 
766
  # Format configuration display
767
- if config["gpu_count"] == 1:
768
- config_display = f"{rank} {config['category_emoji']} {config['config']}"
769
- memory_display = f"{config['total_memory_gb']:.0f} GB"
770
- else:
771
- config_display = f"{rank} {config['category_emoji']} {config['config']}"
772
- memory_display = f"{config['total_memory_gb']:.0f} GB ({config['utilization']:.0f}% util)"
773
 
774
  recommendations.append([
775
  config_display,
776
- f"{total_memory_needed:.1f}GB required",
777
- f"{price_source} ${config['total_cost']:.0f}"
 
778
  ])
779
 
780
  return recommendations
@@ -826,9 +749,9 @@ with gr.Blocks(title="KV Cache Calculator", theme=gr.themes.Soft()) as demo:
826
  )
827
 
828
  gpu_recommendations = gr.Dataframe(
829
- label="💡 GPU Recommendations",
830
- headers=["Configuration", "Memory Required", "Total Price"],
831
- datatype=["str", "str", "str"],
832
  wrap=False,
833
  visible=False
834
  )
 
5
  from typing import List
6
  import time
7
  from functools import lru_cache
 
8
  import json
 
9
  from datetime import datetime, timedelta
10
  import threading
11
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
57
  "RTX 4080 Super": {"memory_gb": 16, "compute_capability": "8.9", "tflops_fp32": 52.2, "category": "Consumer"},
58
  "RTX 4090": {"memory_gb": 24, "compute_capability": "8.9", "tflops_fp32": 83.0, "category": "Consumer"},
59
 
60
+ # Consumer RTX 50 Series (Blackwell - GB202/GB203/GB205/GB206/GB207) - SM_10.0
61
+ "RTX 5060": {"memory_gb": 8, "compute_capability": "10.0", "tflops_fp32": 18.5, "category": "Consumer"},
62
+ "RTX 5060 Ti": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 28.2, "category": "Consumer"},
63
+ "RTX 5070": {"memory_gb": 12, "compute_capability": "10.0", "tflops_fp32": 35.1, "category": "Consumer"},
64
+ "RTX 5070 Ti": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 48.3, "category": "Consumer"},
65
+ "RTX 5080": {"memory_gb": 16, "compute_capability": "10.0", "tflops_fp32": 60.5, "category": "Consumer"},
66
+ "RTX 5090": {"memory_gb": 32, "compute_capability": "10.0", "tflops_fp32": 125.0, "category": "Consumer"},
67
+
68
  # Professional/Workstation RTX A Series (Ampere) - SM_8.6
69
  "RTX A2000": {"memory_gb": 12, "compute_capability": "8.6", "tflops_fp32": 8.0, "category": "Workstation"},
70
  "RTX A4000": {"memory_gb": 16, "compute_capability": "8.6", "tflops_fp32": 19.2, "category": "Workstation"},
 
87
  "H200 141GB": {"memory_gb": 141, "compute_capability": "9.0", "tflops_fp32": 67.0, "category": "Datacenter"},
88
 
89
  # Datacenter B200 (Blackwell) - SM_10.0
90
+ "B200 180GB": {"memory_gb": 180, "compute_capability": "10.0", "tflops_fp32": 80.0, "category": "Datacenter"},
91
 
92
  # Datacenter L40/L40S (Ada Lovelace) - SM_8.9
93
  "L40": {"memory_gb": 48, "compute_capability": "8.9", "tflops_fp32": 91.6, "category": "Datacenter"},
 
102
  """Fetch price for a single GPU (used in parallel)"""
103
  try:
104
  print(f"Fetching price for {gpu_name}...")
105
+ price = get_gpu_price(gpu_name)
106
  if price:
107
+ print(f"Found price for {gpu_name}: ${price}")
108
  return gpu_name, price
109
  else:
110
  print(f"✗ No price found for {gpu_name}, using fallback")
 
115
 
116
  def preload_gpu_prices():
117
  """Pre-fetch all GPU prices in parallel on startup"""
118
+ print("Pre-loading GPU prices...")
119
  start_time = time.time()
120
 
121
  # Get list of GPUs to price
 
139
 
140
  end_time = time.time()
141
  total_time = end_time - start_time
142
+ print(f"Loaded prices for {len(gpu_names)} GPUs in {total_time:.1f} seconds")
143
+ print(f"Cache contains {len(price_cache)} price entries")
144
 
145
  def start_price_preloading():
146
  """Start price preloading in background thread"""
 
150
  # Start preloading in background
151
  preload_thread = threading.Thread(target=preload_worker, daemon=True)
152
  preload_thread.start()
153
+ print("Price preloading started in background...")
154
 
155
+ def get_gpu_price(gpu_name):
156
+ """Get GPU price from curated pricing data"""
157
  current_time = datetime.now()
158
 
159
  # Check cache first
 
163
  if current_time - cached_data["timestamp"] < PRICE_CACHE_DURATION:
164
  return cached_data["price"]
165
 
166
+ price = get_fallback_price(gpu_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  # Cache the result
169
+ price_cache[cache_key] = {
170
+ "price": price,
171
+ "timestamp": current_time
172
+ }
 
173
 
174
  return price
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
  def get_fallback_price(gpu_name):
178
+ """Curated GPU pricing data"""
179
  fallback_prices = {
180
  # Consumer RTX 30 Series
181
  "RTX 3060": 280,
 
199
  "RTX 4080 Super": 880,
200
  "RTX 4090": 1500,
201
 
202
+ # Consumer RTX 50 Series (Expected pricing)
203
+ "RTX 5060": 400,
204
+ "RTX 5060 Ti": 600,
205
+ "RTX 5070": 800,
206
+ "RTX 5070 Ti": 1000,
207
+ "RTX 5080": 1200,
208
+ "RTX 5090": 2000,
209
+
210
  # Professional/Workstation GPUs
211
  "RTX A2000": 650,
212
  "RTX A4000": 1200,
 
215
  "RTX A6000": 4500,
216
  "RTX 6000 Ada": 6800,
217
 
218
+ # Datacenter GPUs (current enterprise pricing)
219
  "A100 40GB": 12000,
220
  "A100 80GB": 15000,
221
+ "H100 80GB": 30000,
222
+ "H100 94GB": 35000,
223
+ "H200 141GB": 40000,
224
+ "B200 180GB": 50000,
225
+ "L40": 9000,
226
+ "L40S": 10000,
227
  }
228
  return fallback_prices.get(gpu_name, 1000)
229
 
 
607
  else:
608
  config_name = f"{count}x {gpu['name']} (TP={count})"
609
 
 
 
 
 
 
610
 
611
  multi_gpu_configs.append({
612
  "config": config_name,
 
616
  "utilization": utilization,
617
  "total_cost": total_cost,
618
  "cost_per_tflop": cost_per_tflop_total,
 
619
  "base_gpu": gpu
620
  })
621
 
 
648
 
649
  for gpu_name, specs in GPU_SPECS.items():
650
  # Get real-time price (will use cache if available)
651
+ current_price = get_gpu_price(gpu_name)
652
  if current_price:
653
  cost_per_tflop = current_price / specs["tflops_fp32"]
654
  all_gpus.append({
 
682
  # Format recommendations
683
  recommendations = []
684
  for i, config in enumerate(multi_gpu_configs):
685
+ rank = f"#{i+1}"
 
686
 
687
+ price_source = "Live" if config["base_gpu"]["name"].lower().replace(" ", "_") in price_cache else "Est"
688
 
689
  # Format configuration display
690
+ config_display = f"{rank} {config['config']}"
691
+
692
+ # Calculate FLOP/dollar (TFLOPS per dollar)
693
+ total_tflops = config["base_gpu"]["tflops_fp32"] * config["gpu_count"]
694
+ flops_per_dollar = total_tflops / config['total_cost']
 
695
 
696
  recommendations.append([
697
  config_display,
698
+ f"{flops_per_dollar:.3f}",
699
+ f"{total_memory_needed:.1f}GB",
700
+ f"${config['total_cost']:.0f}"
701
  ])
702
 
703
  return recommendations
 
749
  )
750
 
751
  gpu_recommendations = gr.Dataframe(
752
+ label="GPU Recommendations",
753
+ headers=["Configuration", "TFLOPS/$", "Memory", "Price"],
754
+ datatype=["str", "str", "str", "str"],
755
  wrap=False,
756
  visible=False
757
  )