|
import json |
|
import pickle |
|
from datetime import datetime, date |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
import plotly.graph_objects as go |
|
|
|
|
|
def create_big_five_capex_plot(): |
|
|
|
big_five_capex = [] |
|
with open("big_five_capex.jsonl", 'r') as file: |
|
for line in file: |
|
big_five_capex.append(json.loads(line)) |
|
|
|
df = pd.DataFrame(big_five_capex) |
|
|
|
fig = go.Figure() |
|
|
|
companies = ['Microsoft', 'Google', 'Meta', 'Apple', 'Amazon'] |
|
colors = ['#80bb00', '#ee161f', '#0065e3', '#000000', '#ff6200'] |
|
|
|
for company, color in zip(companies, colors): |
|
fig.add_trace(go.Bar( |
|
x=df['Quarter'], |
|
y=df[company], |
|
name=company, |
|
marker_color=color |
|
)) |
|
|
|
fig.update_layout( |
|
title='Capital Expenditure of the Big Five Tech Companies in Millions of U.S. Dollars per Quarter', |
|
xaxis_title='Quarter', |
|
yaxis_title='Capex (Millions of U.S. Dollars)', |
|
barmode='stack', |
|
legend_title='Companies', |
|
height=800 |
|
) |
|
|
|
return fig |
|
|
|
|
|
def create_chip_designers_data_center_revenue_plot(): |
|
|
|
data_center_revenue_by_company = [] |
|
with open("chip_designers_data_center_revenue.jsonl", 'r') as file: |
|
for line in file: |
|
data_center_revenue_by_company.append(json.loads(line)) |
|
|
|
df = pd.DataFrame(data_center_revenue_by_company) |
|
|
|
fig = go.Figure() |
|
|
|
companies = ['NVIDIA', 'AMD', 'Intel'] |
|
colors = ['#80bb00', '#ee161f', '#0065e3'] |
|
|
|
for company, color in zip(companies, colors): |
|
fig.add_trace(go.Bar( |
|
x=df['Quarter'], |
|
y=df[company], |
|
name=company, |
|
marker_color=color |
|
)) |
|
|
|
fig.update_layout( |
|
title='Data Center Revenue of NVIDIA, AMD and Intel in Millions of U.S. Dollars per Quarter', |
|
xaxis_title='Quarter', |
|
yaxis_title='Data Center Revenue (Millions of U.S. Dollars)', |
|
barmode='stack', |
|
legend_title='Companies', |
|
height=800 |
|
) |
|
|
|
return fig |
|
|
|
|
|
def create_size_for_performance_plot(category_to_display: str, |
|
parameter_type_to_display: str, |
|
model_to_compare: str) -> (go.Figure, gr.Dropdown, gr.Dropdown): |
|
with open('elo_results_20240915.pkl', 'rb') as file: |
|
elo_results = pickle.load(file) |
|
categories: list[str] = list(elo_results["text"].keys()) |
|
if category_to_display not in categories: |
|
raise gr.Error(message=f"Category '{category_to_display}' not found.") |
|
elo_ratings_for_category: dict = dict(elo_results["text"][category_to_display]["elo_rating_final"]) |
|
|
|
models: list[dict] = [] |
|
with open("models.jsonl", 'r') as file: |
|
for line in file: |
|
models.append(json.loads(line)) |
|
|
|
size_for_performance_data: list[dict] = [] |
|
for model_name, model_elo_rating in elo_ratings_for_category.items(): |
|
model_entries_found = [model for model in models if model["Name"] == model_name] |
|
if model_entries_found: |
|
size_for_performance_data.append({ |
|
"Name": model_name, |
|
"Release Date": model_entries_found[0]["Release Date"], |
|
"ELO Rating": model_elo_rating, |
|
parameter_type_to_display: model_entries_found[0][parameter_type_to_display] |
|
}) |
|
else: |
|
print(f"[WARNING] Model '{model_name}' not found in models.jsonl") |
|
|
|
comparison_model_elo_score = elo_ratings_for_category[model_to_compare] |
|
filtered_models = [model for model in size_for_performance_data |
|
if model[parameter_type_to_display] > 0 and |
|
model['ELO Rating'] >= comparison_model_elo_score] |
|
|
|
filtered_models.sort(key=lambda x: datetime.strptime(x['Release Date'], "%Y-%m-%d")) |
|
|
|
x_dates = [datetime.strptime(model['Release Date'], "%Y-%m-%d") for model in filtered_models] |
|
y_params = [] |
|
min_param = float('inf') |
|
for model in filtered_models: |
|
param = model[parameter_type_to_display] |
|
if param <= min_param: |
|
min_param = param |
|
y_params.append(min_param) |
|
|
|
fig = go.Figure() |
|
|
|
fig.add_trace(go.Scatter( |
|
x=x_dates, |
|
y=y_params, |
|
mode='lines', |
|
line=dict(shape='hv', width=2), |
|
name='Model Parameters' |
|
)) |
|
|
|
fig.update_layout( |
|
title=f'Model Size Progression for Open-Weights Models Reaching Performance of "{model_to_compare}" in "{category_to_display}" Category', |
|
xaxis_title='Release Date', |
|
yaxis_title=parameter_type_to_display, |
|
yaxis_type='log', |
|
hovermode='x unified', |
|
xaxis=dict( |
|
range=[date(2023, 2, 27), date(2024, 9, 15)], |
|
type='date' |
|
), |
|
height=800 |
|
) |
|
|
|
for i, model in enumerate(filtered_models): |
|
if i == 0 or y_params[i] < y_params[i - 1]: |
|
fig.add_trace(go.Scatter( |
|
x=[x_dates[i]], |
|
y=[y_params[i]], |
|
mode='markers+text', |
|
marker=dict(size=10), |
|
text=[model['Name']], |
|
textposition="top center", |
|
name=model['Name'] |
|
)) |
|
|
|
return (fig, |
|
gr.Dropdown(choices=categories, value=category_to_display, interactive=True), |
|
gr.Dropdown(choices=list(elo_ratings_for_category.keys()), value=model_to_compare, interactive=True)) |
|
|
|
|
|
def create_arc_agi_plot() -> go.Figure: |
|
arc_agi_leaderboard = [] |
|
with open("arc_agi_leaderboard.jsonl", 'r') as file: |
|
for line in file: |
|
arc_agi_leaderboard.append(json.loads(line)) |
|
|
|
models = [] |
|
with open("models.jsonl", 'r') as file: |
|
for line in file: |
|
models.append(json.loads(line)) |
|
|
|
data = [] |
|
for entry in arc_agi_leaderboard: |
|
model_name = entry['model'] |
|
score = entry['score'] |
|
model_info = next((m for m in models if m['Name'] == model_name), None) |
|
if model_info: |
|
release_date = datetime.strptime(model_info['Release Date'], "%Y-%m-%d") |
|
data.append({'model': model_name, 'score': score, 'release_date': release_date}) |
|
else: |
|
print(f"[WARNING] Model '{model_name}' not found in models.jsonl") |
|
|
|
data.sort(key=lambda x: x['release_date']) |
|
|
|
x_dates = [d['release_date'] for d in data] |
|
y_scores = [] |
|
max_score = 0 |
|
for entry in data: |
|
if entry['score'] > max_score: |
|
max_score = entry['score'] |
|
y_scores.append(max_score) |
|
|
|
fig = go.Figure() |
|
|
|
fig.add_trace(go.Scatter( |
|
x=x_dates, |
|
y=y_scores, |
|
mode='lines', |
|
line=dict(shape='hv', width=2), |
|
name='ARC-AGI Score' |
|
)) |
|
|
|
for i, entry in enumerate(data): |
|
if i == 0 or y_scores[i] > y_scores[i - 1]: |
|
fig.add_trace(go.Scatter( |
|
x=[entry['release_date']], |
|
y=[entry['score']], |
|
mode='markers+text', |
|
marker=dict(size=10), |
|
text=[entry['model']], |
|
textposition="top center", |
|
name=entry['model'] |
|
)) |
|
|
|
fig.update_layout( |
|
title='ARC-AGI Score Progression Over Time', |
|
xaxis_title='Release Date', |
|
yaxis_title='ARC-AGI Score', |
|
hovermode='x unified', |
|
xaxis=dict( |
|
range=[date(2024, 5, 13), date(2024, 9, 17)], |
|
type='date' |
|
), |
|
yaxis=dict( |
|
range=[0, 100] |
|
), |
|
height=800 |
|
) |
|
|
|
return fig |
|
|
|
|
|
def create_simple_bench_plot() -> go.Figure: |
|
simple_bench_leaderboard = [] |
|
with open("simple_bench_leaderboard.jsonl", 'r') as file: |
|
for line in file: |
|
simple_bench_leaderboard.append(json.loads(line)) |
|
|
|
models = [] |
|
with open("models.jsonl", 'r') as file: |
|
for line in file: |
|
models.append(json.loads(line)) |
|
|
|
data = [] |
|
for entry in simple_bench_leaderboard: |
|
model_name = entry['model'] |
|
score = entry['score'] |
|
model_info = next((m for m in models if m['Name'] == model_name), None) |
|
if model_info: |
|
release_date = datetime.strptime(model_info['Release Date'], "%Y-%m-%d") |
|
data.append({'model': model_name, 'score': score, 'release_date': release_date}) |
|
else: |
|
print(f"[WARNING] Model '{model_name}' not found in models.jsonl") |
|
|
|
data.sort(key=lambda x: x['release_date']) |
|
|
|
x_dates = [d['release_date'] for d in data] |
|
y_scores = [] |
|
max_score = 0 |
|
for entry in data: |
|
if entry['score'] > max_score: |
|
max_score = entry['score'] |
|
y_scores.append(max_score) |
|
|
|
fig = go.Figure() |
|
|
|
fig.add_trace(go.Scatter( |
|
x=x_dates, |
|
y=y_scores, |
|
mode='lines', |
|
line=dict(shape='hv', width=2), |
|
name='Simple Bench Score' |
|
)) |
|
|
|
for i, entry in enumerate(data): |
|
if i == 0 or y_scores[i] > y_scores[i - 1]: |
|
fig.add_trace(go.Scatter( |
|
x=[entry['release_date']], |
|
y=[entry['score']], |
|
mode='markers+text', |
|
marker=dict(size=10), |
|
text=[entry['model']], |
|
textposition="top center", |
|
name=entry['model'] |
|
)) |
|
|
|
fig.update_layout( |
|
title='Simple Bench Score Progression Over Time', |
|
xaxis_title='Release Date', |
|
yaxis_title='Simple Bench Score', |
|
hovermode='x unified', |
|
xaxis=dict( |
|
range=[date(2023, 6, 13), date(2024, 8, 14)], |
|
type='date' |
|
), |
|
yaxis=dict( |
|
range=[0, 100] |
|
), |
|
height=800 |
|
) |
|
|
|
return fig |
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Tab("Finance"): |
|
with gr.Tab("Big Five Capex") as big_five_capex_tab: |
|
big_five_capex_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("Chip Designers Data Center Revenue") as chip_designers_data_center_revenue_tab: |
|
chip_designers_data_center_revenue_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("Model Efficiency Over Time"): |
|
with gr.Tab("Parameters Necessary for Specific Performance Level") as size_for_performance_tab: |
|
with gr.Row(): |
|
size_for_performance_category_dropdown: gr.Dropdown = gr.Dropdown(label="Category", |
|
value="full", |
|
choices=["full"], |
|
interactive=False) |
|
size_for_performance_parameter_number_dropdown: gr.Dropdown = gr.Dropdown(label="Parameter Number", |
|
choices=["Total Parameters", |
|
"Active Parameters"], |
|
value="Total Parameters", |
|
interactive=True) |
|
size_for_performance_comparison_model_dropdown: gr.Dropdown = gr.Dropdown(label="Model for Comparison", |
|
value="gpt-4-0314", |
|
choices=["gpt-4-0314"], |
|
interactive=False) |
|
size_for_performance_plot: gr.Plot = gr.Plot() |
|
size_for_performance_button: gr.Button = gr.Button("Show") |
|
size_for_performance_markdown: gr.Markdown = gr.Markdown( |
|
value="""Model performance as reported on [LMSYS Chatbot Arena Leaderboard](https://lmarena.ai/?leaderboard).""" |
|
) |
|
with gr.Tab("API Cost for Specific Performance Level", interactive=False): |
|
api_cost_for_performance_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("System Performance Over Time"): |
|
with gr.Tab("ARC-AGI") as arc_agi_tab: |
|
arc_agi_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("Simple Bench") as simple_bench_tab: |
|
simple_bench_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("BigCodeBench", interactive=False): |
|
bigcodebench_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("Codeforces", interactive=False): |
|
codeforces_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("GAIA", interactive=False): |
|
gaia_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("GPQA", interactive=False): |
|
gpqa_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("HumanEval", interactive=False): |
|
humaneval_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("LMSYS", interactive=False): |
|
lmsys_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("MATH", interactive=False): |
|
math_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("OpenCompass", interactive=False): |
|
opencompass_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("SWE-bench", interactive=False): |
|
swe_bench_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("WebArena", interactive=False): |
|
webarena_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("ZeroEval", interactive=False): |
|
zeroeval_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("Frontier Language Model Training Runs", interactive=False): |
|
with gr.Tab("Street Price of GPUs Used"): |
|
gpu_street_price_plot: gr.Plot = gr.Plot() |
|
with gr.Tab("TDP of GPUs Used"): |
|
tdp_gpus_plot: gr.Plot = gr.Plot() |
|
big_five_capex_tab.select(fn=create_big_five_capex_plot, outputs=big_five_capex_plot) |
|
chip_designers_data_center_revenue_tab.select(fn=create_chip_designers_data_center_revenue_plot, |
|
outputs=chip_designers_data_center_revenue_plot) |
|
size_for_performance_button.click(fn=create_size_for_performance_plot, |
|
inputs=[size_for_performance_category_dropdown, |
|
size_for_performance_parameter_number_dropdown, |
|
size_for_performance_comparison_model_dropdown], |
|
outputs=[size_for_performance_plot, |
|
size_for_performance_category_dropdown, |
|
size_for_performance_comparison_model_dropdown]) |
|
arc_agi_tab.select(fn=create_arc_agi_plot, outputs=arc_agi_plot) |
|
simple_bench_tab.select(fn=create_simple_bench_plot, outputs=simple_bench_plot) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|