# /// script
# dependencies = [
#     "matplotlib",
# ]
# ///

import json
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import os

# Get result directories from environment variables
gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.')
megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.')

print(f"Loading benchmark results from:")
print(f"  GPT-OSS dir: {gptoss_dir}")
print(f"  MegaBlocks dir: {megablocks_dir}")

# Load benchmark results
gptoss_file = Path(gptoss_dir) / 'gptoss_results.json'
megablocks_file = Path(megablocks_dir) / 'megablocks_results.json'

print(f"Loading results from:")
print(f"  GPT-OSS: {gptoss_file}")
print(f"  MegaBlocks: {megablocks_file}")

if not gptoss_file.exists():
    print(f"Warning: {gptoss_file} not found")
if not megablocks_file.exists():
    print(f"Warning: {megablocks_file} not found")

with open(gptoss_file, 'r') as f:
    gptoss_results = json.load(f)

with open(megablocks_file, 'r') as f:
    megablocks_results = json.load(f)

print(f"GPT-OSS results keys: {list(gptoss_results.keys())}")
print(f"MegaBlocks results keys: {list(megablocks_results.keys())}")

# Helper function to extract metrics from either old or new JSON format
def get_metric(results, metric_name, default=0):
    """Extract metric from results, handling both old and new JSON formats."""
    # New format (with stats dict)
    if 'stats' in results:
        return results['stats'].get(metric_name, default)
    # Old format (direct keys)
    elif metric_name in results:
        return results[metric_name]
    else:
        return default

# Create comparison plots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Performance comparison
implementations = ['GPT-OSS', 'MegaBlocks']

# Extract timing metrics (handle both avg_ms and avg_time_ms)
gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0))
mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0))
times = [gpt_time, mega_time]

# Extract throughput metrics
gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0))
mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0))
throughputs = [gpt_throughput, mega_throughput]

# Extract memory metrics
gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0)
mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0)
memory_usage = [gpt_memory, mega_memory]

gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0)
mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0)
memory_increase = [gpt_mem_inc, mega_mem_inc]

print(f"Extracted metrics:")
print(f"  Times (ms): {times}")
print(f"  Throughputs: {throughputs}")
print(f"  Memory usage (GB): {memory_usage}")
print(f"  Memory increase (GB): {memory_increase}")

colors = ['#2E8B57', '#4169E1']

# Latency comparison
bars1 = ax1.bar(implementations, times, color=colors)
ax1.set_ylabel('Average Time (ms)')
ax1.set_title('Latency Comparison')
ax1.grid(True, alpha=0.3)

# Add values on bars
for bar, time in zip(bars1, times):
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{time:.2f}ms', ha='center', va='bottom')

# Throughput comparison  
bars2 = ax2.bar(implementations, throughputs, color=colors)
ax2.set_ylabel('Tokens per Second')
ax2.set_title('Throughput Comparison')
ax2.grid(True, alpha=0.3)

# Add values on bars
for bar, throughput in zip(bars2, throughputs):
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{throughput:.0f}', ha='center', va='bottom')

# Memory usage comparison
bars3 = ax3.bar(implementations, memory_usage, color=colors)
ax3.set_ylabel('Memory Allocated (GB)')
ax3.set_title('Memory Usage Comparison')
ax3.grid(True, alpha=0.3)

# Add values on bars
for bar, mem in zip(bars3, memory_usage):
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{mem:.2f}GB', ha='center', va='bottom')

# Memory increase comparison
bars4 = ax4.bar(implementations, memory_increase, color=colors)
ax4.set_ylabel('Memory Increase (GB)')
ax4.set_title('Memory Increase Comparison')
ax4.grid(True, alpha=0.3)

# Add values on bars
for bar, mem_inc in zip(bars4, memory_increase):
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01,
             f'{mem_inc:.3f}GB', ha='center', va='bottom')

plt.tight_layout()
plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight')
plt.show()

# Print summary table
print("\n" + "="*60)
print("PERFORMANCE COMPARISON SUMMARY")
print("="*60)
print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}")
print("-" * 60)

# Determine winners
latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks"
throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks" 
memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks"
mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks"

print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}")
print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}")
print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}")
print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}")

# Speed ratio
speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1]
faster_impl = latency_winner
print(f"\n{faster_impl} is {speed_ratio:.2f}x faster")

# Throughput ratio  
throughput_ratio = max(throughputs) / min(throughputs)
higher_throughput = throughput_winner
print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput")

print("="*60)