# /// script # dependencies = [ # "matplotlib", # ] # /// import json import matplotlib.pyplot as plt import numpy as np from pathlib import Path import os # Get result directories from environment variables gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.') megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.') print(f"Loading benchmark results from:") print(f" GPT-OSS dir: {gptoss_dir}") print(f" MegaBlocks dir: {megablocks_dir}") # Load benchmark results gptoss_file = Path(gptoss_dir) / 'gptoss_results.json' megablocks_file = Path(megablocks_dir) / 'megablocks_results.json' print(f"Loading results from:") print(f" GPT-OSS: {gptoss_file}") print(f" MegaBlocks: {megablocks_file}") if not gptoss_file.exists(): print(f"Warning: {gptoss_file} not found") if not megablocks_file.exists(): print(f"Warning: {megablocks_file} not found") with open(gptoss_file, 'r') as f: gptoss_results = json.load(f) with open(megablocks_file, 'r') as f: megablocks_results = json.load(f) print(f"GPT-OSS results keys: {list(gptoss_results.keys())}") print(f"MegaBlocks results keys: {list(megablocks_results.keys())}") # Helper function to extract metrics from either old or new JSON format def get_metric(results, metric_name, default=0): """Extract metric from results, handling both old and new JSON formats.""" # New format (with stats dict) if 'stats' in results: return results['stats'].get(metric_name, default) # Old format (direct keys) elif metric_name in results: return results[metric_name] else: return default # Create comparison plots fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10)) # Performance comparison implementations = ['GPT-OSS', 'MegaBlocks'] # Extract timing metrics (handle both avg_ms and avg_time_ms) gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0)) mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0)) times = [gpt_time, mega_time] # Extract throughput metrics gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0)) mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0)) throughputs = [gpt_throughput, mega_throughput] # Extract memory metrics gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0) mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0) memory_usage = [gpt_memory, mega_memory] gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0) mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0) memory_increase = [gpt_mem_inc, mega_mem_inc] print(f"Extracted metrics:") print(f" Times (ms): {times}") print(f" Throughputs: {throughputs}") print(f" Memory usage (GB): {memory_usage}") print(f" Memory increase (GB): {memory_increase}") colors = ['#2E8B57', '#4169E1'] # Latency comparison bars1 = ax1.bar(implementations, times, color=colors) ax1.set_ylabel('Average Time (ms)') ax1.set_title('Latency Comparison') ax1.grid(True, alpha=0.3) # Add values on bars for bar, time in zip(bars1, times): height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01, f'{time:.2f}ms', ha='center', va='bottom') # Throughput comparison bars2 = ax2.bar(implementations, throughputs, color=colors) ax2.set_ylabel('Tokens per Second') ax2.set_title('Throughput Comparison') ax2.grid(True, alpha=0.3) # Add values on bars for bar, throughput in zip(bars2, throughputs): height = bar.get_height() ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01, f'{throughput:.0f}', ha='center', va='bottom') # Memory usage comparison bars3 = ax3.bar(implementations, memory_usage, color=colors) ax3.set_ylabel('Memory Allocated (GB)') ax3.set_title('Memory Usage Comparison') ax3.grid(True, alpha=0.3) # Add values on bars for bar, mem in zip(bars3, memory_usage): height = bar.get_height() ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01, f'{mem:.2f}GB', ha='center', va='bottom') # Memory increase comparison bars4 = ax4.bar(implementations, memory_increase, color=colors) ax4.set_ylabel('Memory Increase (GB)') ax4.set_title('Memory Increase Comparison') ax4.grid(True, alpha=0.3) # Add values on bars for bar, mem_inc in zip(bars4, memory_increase): height = bar.get_height() ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01, f'{mem_inc:.3f}GB', ha='center', va='bottom') plt.tight_layout() plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight') plt.show() # Print summary table print("\n" + "="*60) print("PERFORMANCE COMPARISON SUMMARY") print("="*60) print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}") print("-" * 60) # Determine winners latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks" throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks" memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks" mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks" print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}") print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}") print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}") print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}") # Speed ratio speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1] faster_impl = latency_winner print(f"\n{faster_impl} is {speed_ratio:.2f}x faster") # Throughput ratio throughput_ratio = max(throughputs) / min(throughputs) higher_throughput = throughput_winner print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput") print("="*60)