Spaces:

textgeflecht
/

inference-benchmarking-results-phi4-200-tokens

Running

App Files Files Community

loghugging25 commited on May 21

Commit

f7cde70

1 Parent(s): 1e69227

initial commit

Browse files

Files changed (7) hide show

app.py +209 -0
parse_results.py +70 -0
requirements.txt +12 -0
results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json +296 -0
results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json +296 -0
results/microsoft_phi-4_2025-05-21-12-47-52.json +296 -0
results/microsoft_phi-4_2025-05-21-13-17-26.json +296 -0

app.py ADDED Viewed

	@@ -0,0 +1,209 @@

+from contextlib import ExitStack
+from dataclasses import dataclass
+from typing import List
+import click
+import gradio as gr
+import pandas as pd
+from parse_results import build_results
+@dataclass
+class PlotConfig:
+    x_title: str
+    y_title: str
+    title: str
+    percentiles: List[float] = None
+def run(from_results_dir, datasource, port):
+    css = '''
+    .summary span {
+        font-size: 10px;
+        padding-top:0;
+        padding-bottom:0;
+    }
+    '''
+    summary_desc = '''
+    ## Summary
+    This table shows the average of the metrics for each model and QPS rate.
+    The metrics are:
+    * Inter token latency: Time to generate a new output token for each user querying the system.
+      It translates as the “speed” perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
+    * Time to First Token: Time the user has to wait before seeing the first token of its answer.
+      Lower waiting time are essential for real-time interactions, less so for offline workloads.
+    * End-to-end latency: The overall time the system took to generate the full response to the user.
+    * Throughput: The number of tokens per second the system can generate across all requests
+    * Successful requests: The number of requests the system was able to honor in the benchmark timeframe
+    * Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
+    '''
+    df_bench = pd.DataFrame()
+    line_plots_bench = []
+    column_mappings = {'inter_token_latency_ms_p90': 'ITL P90 (ms)', 'time_to_first_token_ms_p90': 'TTFT P90 (ms)',
+                       'e2e_latency_ms_p90': 'E2E P90 (ms)', 'token_throughput_secs': 'Throughput (tokens/s)',
+                       'successful_requests': 'Successful requests', 'error_rate': 'Error rate (%)', 'model': 'Model',
+                       'rate': 'QPS', 'run_id': 'Run ID'}
+    default_df = pd.DataFrame.from_dict(
+        {"rate": [1, 2], "inter_token_latency_ms_p90": [10, 20],
+         "version": ["default", "default"],
+         "model": ["default", "default"]})
+    def load_demo(model_bench, percentiles):
+        return update_bench(model_bench, percentiles)
+    def update_bench(model, percentiles):
+        res = []
+        for plot in line_plots_bench:
+            if plot['config'].percentiles:
+                k = plot['metric'] + '_' + str(percentiles)
+                df_bench[plot['metric']] = df_bench[k] if k in df_bench.columns else 0
+            res.append(df_bench[(df_bench['model'] == model)])
+        return res + [summary_table()]
+    def summary_table() -> pd.DataFrame:
+        data = df_bench.groupby(['model', 'run_id', 'rate']).agg(
+            {'inter_token_latency_ms_p90': 'mean', 'time_to_first_token_ms_p90': 'mean',
+             'e2e_latency_ms_p90': 'mean', 'token_throughput_secs': 'mean',
+             'successful_requests': 'mean', 'error_rate': 'mean'}).reset_index()
+        data = data[
+            ['run_id', 'model', 'rate', 'inter_token_latency_ms_p90', 'time_to_first_token_ms_p90',
+             'e2e_latency_ms_p90',
+             'token_throughput_secs']]
+        for metric in ['inter_token_latency_ms_p90', 'time_to_first_token_ms_p90', 'e2e_latency_ms_p90',
+                       'token_throughput_secs']:
+            data[metric] = data[metric].apply(lambda x: f"{x:.2f}")
+        data = data.rename(
+            columns=column_mappings)
+        return data
+    def load_bench_results(source) -> pd.DataFrame:
+        data = pd.read_parquet(source)
+        # remove warmup and throughput
+        data = data[(data['id'] != 'warmup') & (data['id'] != 'throughput')]
+        # only keep constant rate
+        data = data[data['executor_type'] == 'ConstantArrivalRate']
+        return data
+    def select_region(selection: gr.SelectData, model):
+        min_w, max_w = selection.index
+        data = df_bench[(df_bench['model'] == model) & (df_bench['rate'] >= min_w) & (
+                df_bench['rate'] <= max_w)]
+        res = []
+        for plot in line_plots_bench:
+            # find the y values for the selected region
+            metric = plot["metric"]
+            y_min = data[metric].min()
+            y_max = data[metric].max()
+            res.append(gr.LinePlot(x_lim=[min_w, max_w], y_lim=[y_min, y_max]))
+        return res
+    def reset_region():
+        res = []
+        for _ in line_plots_bench:
+            res.append(gr.LinePlot(x_lim=None, y_lim=None))
+        return res
+    def load_datasource(datasource, fn):
+        if datasource.startswith('file://'):
+            return fn(datasource)
+        elif datasource.startswith('s3://'):
+            return fn(datasource)
+        else:
+            raise ValueError(f"Unknown datasource: {datasource}")
+    if from_results_dir is not None:
+        build_results(from_results_dir, 'benchmarks.parquet', None)
+    # Load data
+    df_bench = load_datasource(datasource, load_bench_results)
+    # Define metrics
+    metrics = {
+        "inter_token_latency_ms": PlotConfig(title="Inter Token Latency (lower is better)", x_title="QPS",
+                                             y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
+        "time_to_first_token_ms": PlotConfig(title="TTFT (lower is better)", x_title="QPS",
+                                             y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
+        "e2e_latency_ms": PlotConfig(title="End to End Latency (lower is better)", x_title="QPS",
+                                     y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
+        "token_throughput_secs": PlotConfig(title="Request Output Throughput (higher is better)", x_title="QPS",
+                                            y_title="Tokens/s"),
+        "successful_requests": PlotConfig(title="Successful requests (higher is better)", x_title="QPS",
+                                          y_title="Count"),
+        "error_rate": PlotConfig(title="Error rate", x_title="QPS", y_title="%"),
+        "prompt_tokens": PlotConfig(title="Prompt tokens", x_title="QPS", y_title="Count"),
+        "decoded_tokens": PlotConfig(title="Decoded tokens", x_title="QPS", y_title="Count")
+    }
+    models = df_bench["model"].unique()
+    run_ids = df_bench["run_id"].unique()
+    # get all available percentiles
+    percentiles = set()
+    for k, v in metrics.items():
+        if v.percentiles:
+            percentiles.update(v.percentiles)
+    percentiles = map(lambda p: f'p{int(float(p) * 100)}', percentiles)
+    percentiles = sorted(list(percentiles))
+    percentiles.append('avg')
+    with gr.Blocks(css=css, title="Inference Benchmarker") as demo:
+        with gr.Row():
+            gr.Markdown("# Inference-benchmarker 🤗\n## Benchmarks results")
+        with gr.Row():
+            gr.Markdown(summary_desc)
+        with gr.Row():
+            table = gr.DataFrame(
+                pd.DataFrame(),
+                elem_classes=["summary"],
+            )
+        with gr.Row():
+            details_desc = gr.Markdown("## Details")
+        with gr.Row():
+            model = gr.Dropdown(list(models), label="Select model", value=models[0])
+        with gr.Row():
+            percentiles_bench = gr.Radio(percentiles, label="", value="avg")
+        i = 0
+        with ExitStack() as stack:
+            for k, v in metrics.items():
+                if i % 2 == 0:
+                    stack.close()
+                    gs = stack.enter_context(gr.Row())
+                line_plots_bench.append(
+                    {"component": gr.LinePlot(default_df, label=f'{v.title}', x="rate", y=k,
+                                              y_title=v.y_title, x_title=v.x_title,
+                                              color="run_id"
+                                              ),
+                     "model": model.value,
+                     "metric": k,
+                     "config": v
+                     },
+                )
+                i += 1
+        for component in [model, percentiles_bench]:
+            component.change(update_bench, [model, percentiles_bench],
+                             [item["component"] for item in line_plots_bench] + [table])
+        gr.on([plot["component"].select for plot in line_plots_bench], select_region, [model],
+              outputs=[item["component"] for item in line_plots_bench])
+        gr.on([plot["component"].double_click for plot in line_plots_bench], reset_region, None,
+              outputs=[item["component"] for item in line_plots_bench])
+        demo.load(load_demo, [model, percentiles_bench],
+                  [item["component"] for item in line_plots_bench] + [table])
+    demo.launch(server_port=port, server_name="0.0.0.0")
+@click.command()
+@click.option('--from-results-dir', default=None, help='Load inference-benchmarker results from a directory')
+@click.option('--datasource', default='file://benchmarks.parquet', help='Load a Parquet file already generated')
+@click.option('--port', default=7860, help='Port to run the dashboard')
+def main(from_results_dir, datasource, port):
+    run(from_results_dir, datasource, port)
+if __name__ == '__main__':
+    main(auto_envvar_prefix='DASHBOARD')

parse_results.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import argparse
+import json
+import os
+import pandas as pd
+def build_df(model: str, data_files: dict[str, str]) -> pd.DataFrame:
+    df = pd.DataFrame()
+    # Load the results
+    for key, filename in data_files.items():
+        with open(filename, 'r') as f:
+            data = json.load(f)
+            if data['config']['meta'] is None:
+                data['config']['meta'] = {}
+            for result in data['results']:
+                entry = pd.json_normalize(result).to_dict(orient='records')[0]
+                if 'engine' in data['config']['meta']:
+                    entry['engine'] = data['config']['meta']['engine']
+                if 'tp' in data['config']['meta']:
+                    entry['tp'] = data['config']['meta']['tp']
+                if 'version' in data['config']['meta']:
+                    entry['version'] = data['config']['meta']['version']
+                if 'device' in data['config']['meta']:
+                    entry['device'] = data['config']['meta']['device']
+                entry['model'] = data['config']['model_name']
+                entry['run_id'] = data['config']['run_id']
+                df_tmp = pd.DataFrame(entry, index=[0])
+                # rename columns that start with 'config.'
+                df_tmp = df_tmp.rename(columns={c: c.split('config.')[-1] for c in df_tmp.columns})
+                # replace . with _ in column names
+                df_tmp.columns = [c.replace('.', '_') for c in df_tmp.columns]
+                df = pd.concat([df, df_tmp])
+    return df
+def build_results_df(results_dir) -> pd.DataFrame:
+    df = pd.DataFrame()
+    # list directories
+    directories = [f'{results_dir}/{d}' for d in os.listdir(results_dir) if os.path.isdir(f'{results_dir}/{d}')] + [results_dir]
+    for directory in directories:
+        # list json files in results directory
+        data_files = {}
+        for filename in os.listdir(directory):
+            if filename.endswith('.json'):
+                data_files[filename.split('.')[-2]] = f'{directory}/{filename}'
+        df = pd.concat([df, build_df(directory.split('/')[-1], data_files)])
+    return df
+def build_results(results_dir, results_file, device):
+    df = build_results_df(results_dir)
+    if 'device' not in df.columns:
+        df['device'] = df['model'].apply(lambda x: device)
+    df['error_rate'] = df['failed_requests'] / (df['failed_requests'] + df['successful_requests']) * 100.0
+    df['prompt_tokens'] = df['total_tokens_sent'] / df['successful_requests']
+    df['decoded_tokens'] = df['total_tokens'] / df['successful_requests']
+    df.to_parquet(results_file)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--results-dir', default='results', type=str, required=True,
+                        help='Path to the source directory containing the results')
+    parser.add_argument('--results-file', type=str, required=True,
+                        help='Path to the results file to write to. Can be a S3 path')
+    parser.add_argument('--device', type=str, required=True, help='GPU name used for benchmarking')
+    args = parser.parse_args()
+    build_results(args.results_dir, args.results_file, args.device)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+gradio==5.23.1
+pandas==2.2.3
+numpy==2.2.4
+matplotlib==3.10.1
+python-dateutil==2.9.0
+pyyaml==6.0.2
+fastapi==0.115.12
+uvicorn==0.34.0
+aiofiles==23.2.1
+orjson==3.10.16
+typing-extensions==4.13.0
+anyio==4.9.0

results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json ADDED Viewed

	@@ -0,0 +1,296 @@

+{
+	"config": {
+		"max_vus": 800,
+		"duration_secs": 120,
+		"benchmark_kind": "Rate",
+		"warmup_duration_secs": 30,
+		"rates": [
+			1.0,
+			10.0,
+			30.0,
+			100.0
+		],
+		"num_rates": 10,
+		"prompt_options": {
+			"num_tokens": 200,
+			"min_tokens": 180,
+			"max_tokens": 220,
+			"variance": 10
+		},
+		"decode_options": {
+			"num_tokens": 200,
+			"min_tokens": 180,
+			"max_tokens": 220,
+			"variance": 10
+		},
+		"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
+		"model_name": "phi-4",
+		"profile": null,
+		"meta": null,
+		"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (200 tokens)"
+	},
+	"results": [
+		{
+			"id": "warmup",
+			"executor_type": "ConstantVUs",
+			"config": {
+				"max_vus": 1,
+				"duration_secs": 30,
+				"rate": null
+			},
+			"total_requests": 7,
+			"total_tokens": 1401,
+			"token_throughput_secs": 41.207311909734074,
+			"duration_ms": 33998,
+			"time_to_first_token_ms": {
+				"p50": 30.74,
+				"p60": 30.848,
+				"p70": 31.032,
+				"p80": 31.367,
+				"p90": 600.369,
+				"p95": 1027.036,
+				"p99": 1368.37,
+				"avg": 233.964
+			},
+			"inter_token_latency_ms": {
+				"p50": 23.217,
+				"p60": 23.222,
+				"p70": 23.228,
+				"p80": 23.236,
+				"p90": 23.248,
+				"p95": 23.254,
+				"p99": 23.26,
+				"avg": 23.213
+			},
+			"failed_requests": 0,
+			"successful_requests": 7,
+			"request_rate": 0.2058894956232252,
+			"total_tokens_sent": 1400,
+			"e2e_latency_ms": {
+				"p50": 4743.409,
+				"p60": 4751.971,
+				"p70": 4775.205,
+				"p80": 4827.785,
+				"p90": 5318.839,
+				"p95": 5673.985,
+				"p99": 5958.102,
+				"avg": 4856.823
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 1.0
+			},
+			"total_requests": 115,
+			"total_tokens": 22163,
+			"token_throughput_secs": 186.64991064360598,
+			"duration_ms": 118741,
+			"time_to_first_token_ms": {
+				"p50": 43.445,
+				"p60": 45.341,
+				"p70": 47.407,
+				"p80": 50.324,
+				"p90": 53.509,
+				"p95": 54.94,
+				"p99": 57.022,
+				"avg": 43.314
+			},
+			"inter_token_latency_ms": {
+				"p50": 24.082,
+				"p60": 24.1,
+				"p70": 24.124,
+				"p80": 24.146,
+				"p90": 24.21,
+				"p95": 24.288,
+				"p99": 24.376,
+				"avg": 24.09
+			},
+			"failed_requests": 0,
+			"successful_requests": 115,
+			"request_rate": 0.9684943249566704,
+			"total_tokens_sent": 23000,
+			"e2e_latency_ms": {
+				"p50": 4814.201,
+				"p60": 4873.26,
+				"p70": 4947.365,
+				"p80": 5011.934,
+				"p90": 5104.903,
+				"p95": 5182.844,
+				"p99": 5309.301,
+				"avg": 4665.197
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 10.0
+			},
+			"total_requests": 1149,
+			"total_tokens": 217686,
+			"token_throughput_secs": 1837.4411468828155,
+			"duration_ms": 118472,
+			"time_to_first_token_ms": {
+				"p50": 55.249,
+				"p60": 57.796,
+				"p70": 60.296,
+				"p80": 63.162,
+				"p90": 66.14,
+				"p95": 67.799,
+				"p99": 70.85,
+				"avg": 55.52
+			},
+			"inter_token_latency_ms": {
+				"p50": 28.914,
+				"p60": 28.973,
+				"p70": 29.029,
+				"p80": 29.089,
+				"p90": 29.168,
+				"p95": 29.211,
+				"p99": 29.331,
+				"avg": 28.737
+			},
+			"failed_requests": 0,
+			"successful_requests": 1149,
+			"request_rate": 9.698464199665366,
+			"total_tokens_sent": 229800,
+			"e2e_latency_ms": {
+				"p50": 5707.118,
+				"p60": 5793.95,
+				"p70": 5885.254,
+				"p80": 5983.201,
+				"p90": 6126.889,
+				"p95": 6219.476,
+				"p99": 6386.803,
+				"avg": 5477.946
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 30.0
+			},
+			"total_requests": 1889,
+			"total_tokens": 348708,
+			"token_throughput_secs": 2911.7479692043544,
+			"duration_ms": 119758,
+			"time_to_first_token_ms": {
+				"p50": 22192.744,
+				"p60": 26837.194,
+				"p70": 29205.612,
+				"p80": 33069.312,
+				"p90": 35968.562,
+				"p95": 36825.858,
+				"p99": 37298.867,
+				"avg": 19829.052
+			},
+			"inter_token_latency_ms": {
+				"p50": 64.987,
+				"p60": 66.093,
+				"p70": 67.344,
+				"p80": 72.108,
+				"p90": 90.713,
+				"p95": 98.38,
+				"p99": 177.348,
+				"avg": 69.926
+			},
+			"failed_requests": 0,
+			"successful_requests": 1889,
+			"request_rate": 15.77334593363796,
+			"total_tokens_sent": 377800,
+			"e2e_latency_ms": {
+				"p50": 33837.749,
+				"p60": 38364.805,
+				"p70": 42612.972,
+				"p80": 45779.935,
+				"p90": 48249.655,
+				"p95": 49268.594,
+				"p99": 50884.661,
+				"avg": 32263.266
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 100.0
+			},
+			"total_requests": 1923,
+			"total_tokens": 355495,
+			"token_throughput_secs": 2963.3510051149824,
+			"duration_ms": 119963,
+			"time_to_first_token_ms": {
+				"p50": 30849.07,
+				"p60": 32647.17,
+				"p70": 35695.762,
+				"p80": 36657.309,
+				"p90": 37063.893,
+				"p95": 37265.804,
+				"p99": 37693.244,
+				"avg": 25983.203
+			},
+			"inter_token_latency_ms": {
+				"p50": 64.756,
+				"p60": 66.434,
+				"p70": 68.803,
+				"p80": 83.204,
+				"p90": 96.295,
+				"p95": 103.874,
+				"p99": 163.895,
+				"avg": 73.033
+			},
+			"failed_requests": 0,
+			"successful_requests": 1923,
+			"request_rate": 16.02982878194099,
+			"total_tokens_sent": 384600,
+			"e2e_latency_ms": {
+				"p50": 44432.763,
+				"p60": 46273.082,
+				"p70": 47729.904,
+				"p80": 48714.768,
+				"p90": 49917.33,
+				"p95": 50686.527,
+				"p99": 51992.951,
+				"avg": 38685.294
+			}
+		}
+	],
+	"start_time": "2025-05-21T09:04:59.479961191+00:00",
+	"end_time": "2025-05-21T09:15:05.115323148+00:00",
+	"system": {
+		"cpu": [
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
+		],
+		"memory": "83.47 GB",
+		"os_name": "Debian GNU/Linux",
+		"os_version": "11",
+		"kernel": "5.15.167.4-microsoft-standard-WSL2",
+		"hostname": "computer"
+	}
+}

results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json ADDED Viewed

	@@ -0,0 +1,296 @@

+{
+	"config": {
+		"max_vus": 800,
+		"duration_secs": 120,
+		"benchmark_kind": "Rate",
+		"warmup_duration_secs": 30,
+		"rates": [
+			1.0,
+			10.0,
+			30.0,
+			100.0
+		],
+		"num_rates": 10,
+		"prompt_options": {
+			"num_tokens": 8000,
+			"min_tokens": 7980,
+			"max_tokens": 8020,
+			"variance": 10
+		},
+		"decode_options": {
+			"num_tokens": 8000,
+			"min_tokens": 7980,
+			"max_tokens": 8020,
+			"variance": 10
+		},
+		"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
+		"model_name": "phi-4",
+		"profile": null,
+		"meta": null,
+		"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (8000 tokens)"
+	},
+	"results": [
+		{
+			"id": "warmup",
+			"executor_type": "ConstantVUs",
+			"config": {
+				"max_vus": 1,
+				"duration_secs": 30,
+				"rate": null
+			},
+			"total_requests": 2,
+			"total_tokens": 1643,
+			"token_throughput_secs": 38.490013255851395,
+			"duration_ms": 42686,
+			"time_to_first_token_ms": {
+				"p50": 1276.801,
+				"p60": 1388.913,
+				"p70": 1501.026,
+				"p80": 1613.139,
+				"p90": 1725.252,
+				"p95": 1781.309,
+				"p99": 1826.154,
+				"avg": 1276.801
+			},
+			"inter_token_latency_ms": {
+				"p50": 24.424,
+				"p60": 24.432,
+				"p70": 24.44,
+				"p80": 24.448,
+				"p90": 24.456,
+				"p95": 24.46,
+				"p99": 24.463,
+				"avg": 24.424
+			},
+			"failed_requests": 0,
+			"successful_requests": 2,
+			"request_rate": 0.0468533332390157,
+			"total_tokens_sent": 16000,
+			"e2e_latency_ms": {
+				"p50": 21343.075,
+				"p60": 21391.438,
+				"p70": 21439.801,
+				"p80": 21488.164,
+				"p90": 21536.527,
+				"p95": 21560.709,
+				"p99": 21580.054,
+				"avg": 21343.075
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 1.0
+			},
+			"total_requests": 90,
+			"total_tokens": 55892,
+			"token_throughput_secs": 478.696852515677,
+			"duration_ms": 116758,
+			"time_to_first_token_ms": {
+				"p50": 118.856,
+				"p60": 124.707,
+				"p70": 131.654,
+				"p80": 135.562,
+				"p90": 145.529,
+				"p95": 150.366,
+				"p99": 715.649,
+				"avg": 128.611
+			},
+			"inter_token_latency_ms": {
+				"p50": 45.758,
+				"p60": 46.229,
+				"p70": 46.314,
+				"p80": 46.373,
+				"p90": 46.483,
+				"p95": 46.581,
+				"p99": 46.871,
+				"avg": 43.271
+			},
+			"failed_requests": 0,
+			"successful_requests": 90,
+			"request_rate": 0.7708208102485317,
+			"total_tokens_sent": 720000,
+			"e2e_latency_ms": {
+				"p50": 27887.256,
+				"p60": 30188.411,
+				"p70": 31661.903,
+				"p80": 35685.812,
+				"p90": 45661.636,
+				"p95": 50093.628,
+				"p99": 59727.184,
+				"avg": 27093.895
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 10.0
+			},
+			"total_requests": 97,
+			"total_tokens": 45779,
+			"token_throughput_secs": 385.8671945353039,
+			"duration_ms": 118639,
+			"time_to_first_token_ms": {
+				"p50": 264.625,
+				"p60": 314.639,
+				"p70": 341.786,
+				"p80": 416.021,
+				"p90": 502.604,
+				"p95": 608.336,
+				"p99": 712.908,
+				"avg": 278.878
+			},
+			"inter_token_latency_ms": {
+				"p50": 152.068,
+				"p60": 183.639,
+				"p70": 208.294,
+				"p80": 210.057,
+				"p90": 211.894,
+				"p95": 421.244,
+				"p99": 436.578,
+				"avg": 190.502
+			},
+			"failed_requests": 0,
+			"successful_requests": 97,
+			"request_rate": 0.8176045319890011,
+			"total_tokens_sent": 776000,
+			"e2e_latency_ms": {
+				"p50": 89809.719,
+				"p60": 90599.198,
+				"p70": 97086.861,
+				"p80": 97763.592,
+				"p90": 102705.608,
+				"p95": 105891.319,
+				"p99": 109209.372,
+				"avg": 80168.287
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 30.0
+			},
+			"total_requests": 108,
+			"total_tokens": 48755,
+			"token_throughput_secs": 408.5182278415837,
+			"duration_ms": 119345,
+			"time_to_first_token_ms": {
+				"p50": 315.639,
+				"p60": 364.113,
+				"p70": 440.936,
+				"p80": 517.15,
+				"p90": 635.496,
+				"p95": 743.467,
+				"p99": 886.077,
+				"avg": 348.945
+			},
+			"inter_token_latency_ms": {
+				"p50": 172.827,
+				"p60": 189.057,
+				"p70": 196.538,
+				"p80": 201.266,
+				"p90": 442.975,
+				"p95": 465.991,
+				"p99": 473.842,
+				"avg": 207.845
+			},
+			"failed_requests": 0,
+			"successful_requests": 108,
+			"request_rate": 0.9049321835071489,
+			"total_tokens_sent": 864000,
+			"e2e_latency_ms": {
+				"p50": 89868.756,
+				"p60": 96902.23,
+				"p70": 98937.333,
+				"p80": 102789.849,
+				"p90": 109541.9,
+				"p95": 111388.456,
+				"p99": 114281.927,
+				"avg": 82072.638
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 100.0
+			},
+			"total_requests": 125,
+			"total_tokens": 57918,
+			"token_throughput_secs": 485.359321343381,
+			"duration_ms": 119330,
+			"time_to_first_token_ms": {
+				"p50": 1154.434,
+				"p60": 1276.393,
+				"p70": 1440.368,
+				"p80": 1604.069,
+				"p90": 1768.54,
+				"p95": 1850.13,
+				"p99": 1919.678,
+				"avg": 1208.132
+			},
+			"inter_token_latency_ms": {
+				"p50": 166.875,
+				"p60": 166.884,
+				"p70": 167.245,
+				"p80": 188.28,
+				"p90": 350.172,
+				"p95": 417.485,
+				"p99": 437.566,
+				"avg": 186.06
+			},
+			"failed_requests": 0,
+			"successful_requests": 125,
+			"request_rate": 1.047513988188864,
+			"total_tokens_sent": 1000000,
+			"e2e_latency_ms": {
+				"p50": 82803.004,
+				"p60": 89976.229,
+				"p70": 90374.914,
+				"p80": 99727.225,
+				"p90": 108866.194,
+				"p95": 113444.528,
+				"p99": 116545.189,
+				"avg": 77917.015
+			}
+		}
+	],
+	"start_time": "2025-05-21T13:41:44.260015742+00:00",
+	"end_time": "2025-05-21T13:56:47.150683889+00:00",
+	"system": {
+		"cpu": [
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
+		],
+		"memory": "83.47 GB",
+		"os_name": "Debian GNU/Linux",
+		"os_version": "11",
+		"kernel": "5.15.167.4-microsoft-standard-WSL2",
+		"hostname": "computer"
+	}
+}

results/microsoft_phi-4_2025-05-21-12-47-52.json ADDED Viewed

	@@ -0,0 +1,296 @@

+{
+	"config": {
+		"max_vus": 800,
+		"duration_secs": 120,
+		"benchmark_kind": "Rate",
+		"warmup_duration_secs": 30,
+		"rates": [
+			1.0,
+			10.0,
+			30.0,
+			100.0
+		],
+		"num_rates": 10,
+		"prompt_options": {
+			"num_tokens": 200,
+			"min_tokens": 180,
+			"max_tokens": 220,
+			"variance": 10
+		},
+		"decode_options": {
+			"num_tokens": 200,
+			"min_tokens": 180,
+			"max_tokens": 220,
+			"variance": 10
+		},
+		"tokenizer": "microsoft/phi-4",
+		"model_name": "phi-4",
+		"profile": null,
+		"meta": null,
+		"run_id": "Ollama: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
+	},
+	"results": [
+		{
+			"id": "warmup",
+			"executor_type": "ConstantVUs",
+			"config": {
+				"max_vus": 1,
+				"duration_secs": 30,
+				"rate": null
+			},
+			"total_requests": 17,
+			"total_tokens": 2560,
+			"token_throughput_secs": 81.92346820970964,
+			"duration_ms": 31248,
+			"time_to_first_token_ms": {
+				"p50": 48.023,
+				"p60": 48.316,
+				"p70": 48.704,
+				"p80": 49.172,
+				"p90": 50.133,
+				"p95": 79.141,
+				"p99": 171.884,
+				"avg": 56.904
+			},
+			"inter_token_latency_ms": {
+				"p50": 11.835,
+				"p60": 11.849,
+				"p70": 11.866,
+				"p80": 11.888,
+				"p90": 11.999,
+				"p95": 12.031,
+				"p99": 12.057,
+				"avg": 11.863
+			},
+			"failed_requests": 0,
+			"successful_requests": 17,
+			"request_rate": 0.5440230310801031,
+			"total_tokens_sent": 3400,
+			"e2e_latency_ms": {
+				"p50": 2193.161,
+				"p60": 2256.189,
+				"p70": 2409.636,
+				"p80": 2503.287,
+				"p90": 2558.373,
+				"p95": 2565.267,
+				"p99": 2582.093,
+				"avg": 1837.986
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 1.0
+			},
+			"total_requests": 68,
+			"total_tokens": 13393,
+			"token_throughput_secs": 113.50678834081126,
+			"duration_ms": 117992,
+			"time_to_first_token_ms": {
+				"p50": 23628.355,
+				"p60": 28364.866,
+				"p70": 33468.314,
+				"p80": 37116.28,
+				"p90": 42197.075,
+				"p95": 44792.584,
+				"p99": 46808.871,
+				"avg": 23527.531
+			},
+			"inter_token_latency_ms": {
+				"p50": 17.148,
+				"p60": 17.164,
+				"p70": 17.183,
+				"p80": 17.199,
+				"p90": 17.22,
+				"p95": 17.235,
+				"p99": 17.256,
+				"avg": 17.123
+			},
+			"failed_requests": 0,
+			"successful_requests": 68,
+			"request_rate": 0.5763056527421164,
+			"total_tokens_sent": 13600,
+			"e2e_latency_ms": {
+				"p50": 26918.292,
+				"p60": 31837.746,
+				"p70": 36426.629,
+				"p80": 40565.391,
+				"p90": 45507.834,
+				"p95": 48259.487,
+				"p99": 50280.92,
+				"avg": 26884.974
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 10.0
+			},
+			"total_requests": 69,
+			"total_tokens": 13411,
+			"token_throughput_secs": 112.91469560470007,
+			"duration_ms": 118771,
+			"time_to_first_token_ms": {
+				"p50": 54889.419,
+				"p60": 66226.724,
+				"p70": 77657.43,
+				"p80": 87194.269,
+				"p90": 97361.153,
+				"p95": 102660.303,
+				"p99": 106894.626,
+				"avg": 54527.075
+			},
+			"inter_token_latency_ms": {
+				"p50": 17.284,
+				"p60": 17.295,
+				"p70": 17.305,
+				"p80": 17.328,
+				"p90": 17.385,
+				"p95": 17.394,
+				"p99": 17.447,
+				"avg": 17.279
+			},
+			"failed_requests": 0,
+			"successful_requests": 69,
+			"request_rate": 0.5809495188072705,
+			"total_tokens_sent": 13800,
+			"e2e_latency_ms": {
+				"p50": 58021.804,
+				"p60": 69751.13,
+				"p70": 80116.293,
+				"p80": 90587.03,
+				"p90": 100535.513,
+				"p95": 105903.68,
+				"p99": 110535.65,
+				"avg": 57868.946
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 30.0
+			},
+			"total_requests": 70,
+			"total_tokens": 13581,
+			"token_throughput_secs": 113.61611267427078,
+			"duration_ms": 119534,
+			"time_to_first_token_ms": {
+				"p50": 56313.526,
+				"p60": 68465.8,
+				"p70": 78580.113,
+				"p80": 90639.114,
+				"p90": 102040.301,
+				"p95": 108031.928,
+				"p99": 112499.04,
+				"avg": 56639.341
+			},
+			"inter_token_latency_ms": {
+				"p50": 17.172,
+				"p60": 17.182,
+				"p70": 17.217,
+				"p80": 17.235,
+				"p90": 17.256,
+				"p95": 17.31,
+				"p99": 17.346,
+				"avg": 17.18
+			},
+			"failed_requests": 0,
+			"successful_requests": 70,
+			"request_rate": 0.5856069425814708,
+			"total_tokens_sent": 14000,
+			"e2e_latency_ms": {
+				"p50": 59683.651,
+				"p60": 71746.875,
+				"p70": 81953.181,
+				"p80": 94277.653,
+				"p90": 105378.271,
+				"p95": 111453.36,
+				"p99": 115949.496,
+				"avg": 59958.385
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 100.0
+			},
+			"total_requests": 70,
+			"total_tokens": 13359,
+			"token_throughput_secs": 114.42379660997986,
+			"duration_ms": 116750,
+			"time_to_first_token_ms": {
+				"p50": 57218.949,
+				"p60": 67960.841,
+				"p70": 79764.715,
+				"p80": 91579.471,
+				"p90": 102620.956,
+				"p95": 107961.016,
+				"p99": 112866.279,
+				"avg": 56772.876
+			},
+			"inter_token_latency_ms": {
+				"p50": 17.171,
+				"p60": 17.189,
+				"p70": 17.201,
+				"p80": 17.215,
+				"p90": 17.245,
+				"p95": 17.299,
+				"p99": 17.353,
+				"avg": 17.179
+			},
+			"failed_requests": 0,
+			"successful_requests": 70,
+			"request_rate": 0.5995707584922966,
+			"total_tokens_sent": 14000,
+			"e2e_latency_ms": {
+				"p50": 60551.916,
+				"p60": 71380.408,
+				"p70": 83198.203,
+				"p80": 93909.886,
+				"p90": 105788.774,
+				"p95": 111364.807,
+				"p99": 115968.729,
+				"avg": 60037.39
+			}
+		}
+	],
+	"start_time": "2025-05-21T12:32:04.299141299+00:00",
+	"end_time": "2025-05-21T12:47:52.695866821+00:00",
+	"system": {
+		"cpu": [
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
+		],
+		"memory": "83.47 GB",
+		"os_name": "Debian GNU/Linux",
+		"os_version": "11",
+		"kernel": "5.15.167.4-microsoft-standard-WSL2",
+		"hostname": "computer"
+	}
+}

results/microsoft_phi-4_2025-05-21-13-17-26.json ADDED Viewed

	@@ -0,0 +1,296 @@

+{
+	"config": {
+		"max_vus": 800,
+		"duration_secs": 120,
+		"benchmark_kind": "Rate",
+		"warmup_duration_secs": 30,
+		"rates": [
+			1.0,
+			10.0,
+			30.0,
+			100.0
+		],
+		"num_rates": 10,
+		"prompt_options": {
+			"num_tokens": 200,
+			"min_tokens": 180,
+			"max_tokens": 220,
+			"variance": 10
+		},
+		"decode_options": {
+			"num_tokens": 200,
+			"min_tokens": 180,
+			"max_tokens": 220,
+			"variance": 10
+		},
+		"tokenizer": "microsoft/phi-4",
+		"model_name": "phi-4",
+		"profile": null,
+		"meta": null,
+		"run_id": "LM Studio: lmstudio-community/phi-4-GGUF:Q8_0 (200 tokens)"
+	},
+	"results": [
+		{
+			"id": "warmup",
+			"executor_type": "ConstantVUs",
+			"config": {
+				"max_vus": 1,
+				"duration_secs": 30,
+				"rate": null
+			},
+			"total_requests": 13,
+			"total_tokens": 2610,
+			"token_throughput_secs": 83.60700961692694,
+			"duration_ms": 31217,
+			"time_to_first_token_ms": {
+				"p50": 90.517,
+				"p60": 93.25,
+				"p70": 102.443,
+				"p80": 109.227,
+				"p90": 130.959,
+				"p95": 207.294,
+				"p99": 293.629,
+				"avg": 108.58
+			},
+			"inter_token_latency_ms": {
+				"p50": 11.513,
+				"p60": 11.519,
+				"p70": 11.534,
+				"p80": 11.548,
+				"p90": 11.559,
+				"p95": 11.574,
+				"p99": 11.589,
+				"avg": 11.472
+			},
+			"failed_requests": 0,
+			"successful_requests": 13,
+			"request_rate": 0.41643338123373574,
+			"total_tokens_sent": 2600,
+			"e2e_latency_ms": {
+				"p50": 2419.372,
+				"p60": 2423.796,
+				"p70": 2432.426,
+				"p80": 2458.236,
+				"p90": 2525.006,
+				"p95": 2596.86,
+				"p99": 2667.757,
+				"avg": 2401.195
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 1.0
+			},
+			"total_requests": 52,
+			"total_tokens": 9915,
+			"token_throughput_secs": 84.1224984364473,
+			"duration_ms": 117863,
+			"time_to_first_token_ms": {
+				"p50": 31149.018,
+				"p60": 38159.307,
+				"p70": 44798.95,
+				"p80": 51599.01,
+				"p90": 58334.517,
+				"p95": 61414.588,
+				"p99": 63915.289,
+				"avg": 32379.62
+			},
+			"inter_token_latency_ms": {
+				"p50": 11.473,
+				"p60": 11.501,
+				"p70": 11.517,
+				"p80": 11.529,
+				"p90": 11.563,
+				"p95": 11.598,
+				"p99": 11.775,
+				"avg": 11.254
+			},
+			"failed_requests": 0,
+			"successful_requests": 52,
+			"request_rate": 0.4411870820670963,
+			"total_tokens_sent": 10400,
+			"e2e_latency_ms": {
+				"p50": 33388.263,
+				"p60": 40395.415,
+				"p70": 47230.795,
+				"p80": 53979.194,
+				"p90": 60382.07,
+				"p95": 63519.032,
+				"p99": 66184.234,
+				"avg": 34556.301
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 10.0
+			},
+			"total_requests": 51,
+			"total_tokens": 10041,
+			"token_throughput_secs": 84.04049965954646,
+			"duration_ms": 119478,
+			"time_to_first_token_ms": {
+				"p50": 55889.645,
+				"p60": 67098.347,
+				"p70": 78905.359,
+				"p80": 90289.182,
+				"p90": 101201.112,
+				"p95": 106805.272,
+				"p99": 111193.127,
+				"avg": 56139.066
+			},
+			"inter_token_latency_ms": {
+				"p50": 11.487,
+				"p60": 11.498,
+				"p70": 11.51,
+				"p80": 11.536,
+				"p90": 11.584,
+				"p95": 11.638,
+				"p99": 11.883,
+				"avg": 11.474
+			},
+			"failed_requests": 0,
+			"successful_requests": 51,
+			"request_rate": 0.4268564368725096,
+			"total_tokens_sent": 10200,
+			"e2e_latency_ms": {
+				"p50": 58084.912,
+				"p60": 69432.711,
+				"p70": 81080.254,
+				"p80": 92442.614,
+				"p90": 103527.041,
+				"p95": 108999.672,
+				"p99": 113397.637,
+				"avg": 58387.662
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 30.0
+			},
+			"total_requests": 51,
+			"total_tokens": 9889,
+			"token_throughput_secs": 84.08188681268076,
+			"duration_ms": 117611,
+			"time_to_first_token_ms": {
+				"p50": 55982.506,
+				"p60": 68000.692,
+				"p70": 79600.152,
+				"p80": 91108.706,
+				"p90": 101995.453,
+				"p95": 107929.312,
+				"p99": 112340.212,
+				"avg": 56754.648
+			},
+			"inter_token_latency_ms": {
+				"p50": 11.503,
+				"p60": 11.515,
+				"p70": 11.531,
+				"p80": 11.564,
+				"p90": 11.589,
+				"p95": 11.633,
+				"p99": 11.795,
+				"avg": 11.477
+			},
+			"failed_requests": 0,
+			"successful_requests": 51,
+			"request_rate": 0.43363092602353315,
+			"total_tokens_sent": 10200,
+			"e2e_latency_ms": {
+				"p50": 58352.067,
+				"p60": 70321.743,
+				"p70": 81960.377,
+				"p80": 93288.338,
+				"p90": 104277.554,
+				"p95": 110084.734,
+				"p99": 114675.842,
+				"avg": 58969.412
+			}
+		},
+		{
+			"id": "[email protected]/s",
+			"executor_type": "ConstantArrivalRate",
+			"config": {
+				"max_vus": 800,
+				"duration_secs": 120,
+				"rate": 100.0
+			},
+			"total_requests": 57,
+			"total_tokens": 9983,
+			"token_throughput_secs": 83.83914212119033,
+			"duration_ms": 119073,
+			"time_to_first_token_ms": {
+				"p50": 60425.652,
+				"p60": 73426.16,
+				"p70": 83375.468,
+				"p80": 96034.495,
+				"p90": 104082.959,
+				"p95": 110616.366,
+				"p99": 114826.821,
+				"avg": 59050.64
+			},
+			"inter_token_latency_ms": {
+				"p50": 11.528,
+				"p60": 11.552,
+				"p70": 11.577,
+				"p80": 11.595,
+				"p90": 11.625,
+				"p95": 11.656,
+				"p99": 11.7,
+				"avg": 11.281
+			},
+			"failed_requests": 0,
+			"successful_requests": 57,
+			"request_rate": 0.4786968948119652,
+			"total_tokens_sent": 11400,
+			"e2e_latency_ms": {
+				"p50": 62519.008,
+				"p60": 74991.853,
+				"p70": 85562.76,
+				"p80": 96625.366,
+				"p90": 106351.421,
+				"p95": 112531.399,
+				"p99": 117196.304,
+				"avg": 61050.657
+			}
+		}
+	],
+	"start_time": "2025-05-21T13:01:17.074891817+00:00",
+	"end_time": "2025-05-21T13:17:26.396424745+00:00",
+	"system": {
+		"cpu": [
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
+			"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
+		],
+		"memory": "83.47 GB",
+		"os_name": "Debian GNU/Linux",
+		"os_version": "11",
+		"kernel": "5.15.167.4-microsoft-standard-WSL2",
+		"hostname": "computer"
+	}
+}