Commit
·
f7cde70
1
Parent(s):
1e69227
initial commit
Browse files- app.py +209 -0
- parse_results.py +70 -0
- requirements.txt +12 -0
- results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json +296 -0
- results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json +296 -0
- results/microsoft_phi-4_2025-05-21-12-47-52.json +296 -0
- results/microsoft_phi-4_2025-05-21-13-17-26.json +296 -0
app.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from contextlib import ExitStack
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from typing import List
|
4 |
+
|
5 |
+
import click
|
6 |
+
import gradio as gr
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
from parse_results import build_results
|
10 |
+
|
11 |
+
|
12 |
+
@dataclass
|
13 |
+
class PlotConfig:
|
14 |
+
x_title: str
|
15 |
+
y_title: str
|
16 |
+
title: str
|
17 |
+
percentiles: List[float] = None
|
18 |
+
|
19 |
+
|
20 |
+
def run(from_results_dir, datasource, port):
|
21 |
+
css = '''
|
22 |
+
.summary span {
|
23 |
+
font-size: 10px;
|
24 |
+
padding-top:0;
|
25 |
+
padding-bottom:0;
|
26 |
+
}
|
27 |
+
'''
|
28 |
+
|
29 |
+
summary_desc = '''
|
30 |
+
## Summary
|
31 |
+
This table shows the average of the metrics for each model and QPS rate.
|
32 |
+
|
33 |
+
The metrics are:
|
34 |
+
* Inter token latency: Time to generate a new output token for each user querying the system.
|
35 |
+
It translates as the “speed” perceived by the end-user. We aim for at least 300 words per minute (average reading speed), so ITL<150ms
|
36 |
+
* Time to First Token: Time the user has to wait before seeing the first token of its answer.
|
37 |
+
Lower waiting time are essential for real-time interactions, less so for offline workloads.
|
38 |
+
* End-to-end latency: The overall time the system took to generate the full response to the user.
|
39 |
+
* Throughput: The number of tokens per second the system can generate across all requests
|
40 |
+
* Successful requests: The number of requests the system was able to honor in the benchmark timeframe
|
41 |
+
* Error rate: The percentage of requests that ended up in error, as the system could not process them in time or failed to process them.
|
42 |
+
|
43 |
+
'''
|
44 |
+
|
45 |
+
df_bench = pd.DataFrame()
|
46 |
+
line_plots_bench = []
|
47 |
+
column_mappings = {'inter_token_latency_ms_p90': 'ITL P90 (ms)', 'time_to_first_token_ms_p90': 'TTFT P90 (ms)',
|
48 |
+
'e2e_latency_ms_p90': 'E2E P90 (ms)', 'token_throughput_secs': 'Throughput (tokens/s)',
|
49 |
+
'successful_requests': 'Successful requests', 'error_rate': 'Error rate (%)', 'model': 'Model',
|
50 |
+
'rate': 'QPS', 'run_id': 'Run ID'}
|
51 |
+
default_df = pd.DataFrame.from_dict(
|
52 |
+
{"rate": [1, 2], "inter_token_latency_ms_p90": [10, 20],
|
53 |
+
"version": ["default", "default"],
|
54 |
+
"model": ["default", "default"]})
|
55 |
+
|
56 |
+
def load_demo(model_bench, percentiles):
|
57 |
+
return update_bench(model_bench, percentiles)
|
58 |
+
|
59 |
+
def update_bench(model, percentiles):
|
60 |
+
res = []
|
61 |
+
for plot in line_plots_bench:
|
62 |
+
if plot['config'].percentiles:
|
63 |
+
k = plot['metric'] + '_' + str(percentiles)
|
64 |
+
df_bench[plot['metric']] = df_bench[k] if k in df_bench.columns else 0
|
65 |
+
res.append(df_bench[(df_bench['model'] == model)])
|
66 |
+
|
67 |
+
return res + [summary_table()]
|
68 |
+
|
69 |
+
def summary_table() -> pd.DataFrame:
|
70 |
+
data = df_bench.groupby(['model', 'run_id', 'rate']).agg(
|
71 |
+
{'inter_token_latency_ms_p90': 'mean', 'time_to_first_token_ms_p90': 'mean',
|
72 |
+
'e2e_latency_ms_p90': 'mean', 'token_throughput_secs': 'mean',
|
73 |
+
'successful_requests': 'mean', 'error_rate': 'mean'}).reset_index()
|
74 |
+
data = data[
|
75 |
+
['run_id', 'model', 'rate', 'inter_token_latency_ms_p90', 'time_to_first_token_ms_p90',
|
76 |
+
'e2e_latency_ms_p90',
|
77 |
+
'token_throughput_secs']]
|
78 |
+
for metric in ['inter_token_latency_ms_p90', 'time_to_first_token_ms_p90', 'e2e_latency_ms_p90',
|
79 |
+
'token_throughput_secs']:
|
80 |
+
data[metric] = data[metric].apply(lambda x: f"{x:.2f}")
|
81 |
+
data = data.rename(
|
82 |
+
columns=column_mappings)
|
83 |
+
return data
|
84 |
+
|
85 |
+
def load_bench_results(source) -> pd.DataFrame:
|
86 |
+
data = pd.read_parquet(source)
|
87 |
+
# remove warmup and throughput
|
88 |
+
data = data[(data['id'] != 'warmup') & (data['id'] != 'throughput')]
|
89 |
+
# only keep constant rate
|
90 |
+
data = data[data['executor_type'] == 'ConstantArrivalRate']
|
91 |
+
return data
|
92 |
+
|
93 |
+
def select_region(selection: gr.SelectData, model):
|
94 |
+
min_w, max_w = selection.index
|
95 |
+
data = df_bench[(df_bench['model'] == model) & (df_bench['rate'] >= min_w) & (
|
96 |
+
df_bench['rate'] <= max_w)]
|
97 |
+
res = []
|
98 |
+
for plot in line_plots_bench:
|
99 |
+
# find the y values for the selected region
|
100 |
+
metric = plot["metric"]
|
101 |
+
y_min = data[metric].min()
|
102 |
+
y_max = data[metric].max()
|
103 |
+
res.append(gr.LinePlot(x_lim=[min_w, max_w], y_lim=[y_min, y_max]))
|
104 |
+
return res
|
105 |
+
|
106 |
+
def reset_region():
|
107 |
+
res = []
|
108 |
+
for _ in line_plots_bench:
|
109 |
+
res.append(gr.LinePlot(x_lim=None, y_lim=None))
|
110 |
+
return res
|
111 |
+
|
112 |
+
def load_datasource(datasource, fn):
|
113 |
+
if datasource.startswith('file://'):
|
114 |
+
return fn(datasource)
|
115 |
+
elif datasource.startswith('s3://'):
|
116 |
+
return fn(datasource)
|
117 |
+
else:
|
118 |
+
raise ValueError(f"Unknown datasource: {datasource}")
|
119 |
+
|
120 |
+
if from_results_dir is not None:
|
121 |
+
build_results(from_results_dir, 'benchmarks.parquet', None)
|
122 |
+
# Load data
|
123 |
+
df_bench = load_datasource(datasource, load_bench_results)
|
124 |
+
|
125 |
+
# Define metrics
|
126 |
+
metrics = {
|
127 |
+
"inter_token_latency_ms": PlotConfig(title="Inter Token Latency (lower is better)", x_title="QPS",
|
128 |
+
y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
|
129 |
+
"time_to_first_token_ms": PlotConfig(title="TTFT (lower is better)", x_title="QPS",
|
130 |
+
y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
|
131 |
+
"e2e_latency_ms": PlotConfig(title="End to End Latency (lower is better)", x_title="QPS",
|
132 |
+
y_title="Time (ms)", percentiles=[0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]),
|
133 |
+
"token_throughput_secs": PlotConfig(title="Request Output Throughput (higher is better)", x_title="QPS",
|
134 |
+
y_title="Tokens/s"),
|
135 |
+
"successful_requests": PlotConfig(title="Successful requests (higher is better)", x_title="QPS",
|
136 |
+
y_title="Count"),
|
137 |
+
"error_rate": PlotConfig(title="Error rate", x_title="QPS", y_title="%"),
|
138 |
+
"prompt_tokens": PlotConfig(title="Prompt tokens", x_title="QPS", y_title="Count"),
|
139 |
+
"decoded_tokens": PlotConfig(title="Decoded tokens", x_title="QPS", y_title="Count")
|
140 |
+
}
|
141 |
+
|
142 |
+
models = df_bench["model"].unique()
|
143 |
+
run_ids = df_bench["run_id"].unique()
|
144 |
+
|
145 |
+
# get all available percentiles
|
146 |
+
percentiles = set()
|
147 |
+
for k, v in metrics.items():
|
148 |
+
if v.percentiles:
|
149 |
+
percentiles.update(v.percentiles)
|
150 |
+
percentiles = map(lambda p: f'p{int(float(p) * 100)}', percentiles)
|
151 |
+
percentiles = sorted(list(percentiles))
|
152 |
+
percentiles.append('avg')
|
153 |
+
with gr.Blocks(css=css, title="Inference Benchmarker") as demo:
|
154 |
+
with gr.Row():
|
155 |
+
gr.Markdown("# Inference-benchmarker 🤗\n## Benchmarks results")
|
156 |
+
with gr.Row():
|
157 |
+
gr.Markdown(summary_desc)
|
158 |
+
with gr.Row():
|
159 |
+
table = gr.DataFrame(
|
160 |
+
pd.DataFrame(),
|
161 |
+
elem_classes=["summary"],
|
162 |
+
)
|
163 |
+
with gr.Row():
|
164 |
+
details_desc = gr.Markdown("## Details")
|
165 |
+
with gr.Row():
|
166 |
+
model = gr.Dropdown(list(models), label="Select model", value=models[0])
|
167 |
+
with gr.Row():
|
168 |
+
percentiles_bench = gr.Radio(percentiles, label="", value="avg")
|
169 |
+
i = 0
|
170 |
+
with ExitStack() as stack:
|
171 |
+
for k, v in metrics.items():
|
172 |
+
if i % 2 == 0:
|
173 |
+
stack.close()
|
174 |
+
gs = stack.enter_context(gr.Row())
|
175 |
+
line_plots_bench.append(
|
176 |
+
{"component": gr.LinePlot(default_df, label=f'{v.title}', x="rate", y=k,
|
177 |
+
y_title=v.y_title, x_title=v.x_title,
|
178 |
+
color="run_id"
|
179 |
+
),
|
180 |
+
"model": model.value,
|
181 |
+
"metric": k,
|
182 |
+
"config": v
|
183 |
+
},
|
184 |
+
)
|
185 |
+
i += 1
|
186 |
+
|
187 |
+
for component in [model, percentiles_bench]:
|
188 |
+
component.change(update_bench, [model, percentiles_bench],
|
189 |
+
[item["component"] for item in line_plots_bench] + [table])
|
190 |
+
gr.on([plot["component"].select for plot in line_plots_bench], select_region, [model],
|
191 |
+
outputs=[item["component"] for item in line_plots_bench])
|
192 |
+
gr.on([plot["component"].double_click for plot in line_plots_bench], reset_region, None,
|
193 |
+
outputs=[item["component"] for item in line_plots_bench])
|
194 |
+
demo.load(load_demo, [model, percentiles_bench],
|
195 |
+
[item["component"] for item in line_plots_bench] + [table])
|
196 |
+
|
197 |
+
demo.launch(server_port=port, server_name="0.0.0.0")
|
198 |
+
|
199 |
+
|
200 |
+
@click.command()
|
201 |
+
@click.option('--from-results-dir', default=None, help='Load inference-benchmarker results from a directory')
|
202 |
+
@click.option('--datasource', default='file://benchmarks.parquet', help='Load a Parquet file already generated')
|
203 |
+
@click.option('--port', default=7860, help='Port to run the dashboard')
|
204 |
+
def main(from_results_dir, datasource, port):
|
205 |
+
run(from_results_dir, datasource, port)
|
206 |
+
|
207 |
+
|
208 |
+
if __name__ == '__main__':
|
209 |
+
main(auto_envvar_prefix='DASHBOARD')
|
parse_results.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
|
8 |
+
def build_df(model: str, data_files: dict[str, str]) -> pd.DataFrame:
|
9 |
+
df = pd.DataFrame()
|
10 |
+
# Load the results
|
11 |
+
for key, filename in data_files.items():
|
12 |
+
with open(filename, 'r') as f:
|
13 |
+
data = json.load(f)
|
14 |
+
if data['config']['meta'] is None:
|
15 |
+
data['config']['meta'] = {}
|
16 |
+
for result in data['results']:
|
17 |
+
entry = pd.json_normalize(result).to_dict(orient='records')[0]
|
18 |
+
if 'engine' in data['config']['meta']:
|
19 |
+
entry['engine'] = data['config']['meta']['engine']
|
20 |
+
if 'tp' in data['config']['meta']:
|
21 |
+
entry['tp'] = data['config']['meta']['tp']
|
22 |
+
if 'version' in data['config']['meta']:
|
23 |
+
entry['version'] = data['config']['meta']['version']
|
24 |
+
if 'device' in data['config']['meta']:
|
25 |
+
entry['device'] = data['config']['meta']['device']
|
26 |
+
entry['model'] = data['config']['model_name']
|
27 |
+
entry['run_id'] = data['config']['run_id']
|
28 |
+
df_tmp = pd.DataFrame(entry, index=[0])
|
29 |
+
# rename columns that start with 'config.'
|
30 |
+
df_tmp = df_tmp.rename(columns={c: c.split('config.')[-1] for c in df_tmp.columns})
|
31 |
+
# replace . with _ in column names
|
32 |
+
df_tmp.columns = [c.replace('.', '_') for c in df_tmp.columns]
|
33 |
+
|
34 |
+
df = pd.concat([df, df_tmp])
|
35 |
+
return df
|
36 |
+
|
37 |
+
|
38 |
+
def build_results_df(results_dir) -> pd.DataFrame:
|
39 |
+
df = pd.DataFrame()
|
40 |
+
# list directories
|
41 |
+
directories = [f'{results_dir}/{d}' for d in os.listdir(results_dir) if os.path.isdir(f'{results_dir}/{d}')] + [results_dir]
|
42 |
+
for directory in directories:
|
43 |
+
# list json files in results directory
|
44 |
+
data_files = {}
|
45 |
+
for filename in os.listdir(directory):
|
46 |
+
if filename.endswith('.json'):
|
47 |
+
data_files[filename.split('.')[-2]] = f'{directory}/{filename}'
|
48 |
+
df = pd.concat([df, build_df(directory.split('/')[-1], data_files)])
|
49 |
+
return df
|
50 |
+
|
51 |
+
|
52 |
+
def build_results(results_dir, results_file, device):
|
53 |
+
df = build_results_df(results_dir)
|
54 |
+
if 'device' not in df.columns:
|
55 |
+
df['device'] = df['model'].apply(lambda x: device)
|
56 |
+
df['error_rate'] = df['failed_requests'] / (df['failed_requests'] + df['successful_requests']) * 100.0
|
57 |
+
df['prompt_tokens'] = df['total_tokens_sent'] / df['successful_requests']
|
58 |
+
df['decoded_tokens'] = df['total_tokens'] / df['successful_requests']
|
59 |
+
df.to_parquet(results_file)
|
60 |
+
|
61 |
+
|
62 |
+
if __name__ == '__main__':
|
63 |
+
parser = argparse.ArgumentParser()
|
64 |
+
parser.add_argument('--results-dir', default='results', type=str, required=True,
|
65 |
+
help='Path to the source directory containing the results')
|
66 |
+
parser.add_argument('--results-file', type=str, required=True,
|
67 |
+
help='Path to the results file to write to. Can be a S3 path')
|
68 |
+
parser.add_argument('--device', type=str, required=True, help='GPU name used for benchmarking')
|
69 |
+
args = parser.parse_args()
|
70 |
+
build_results(args.results_dir, args.results_file, args.device)
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==5.23.1
|
2 |
+
pandas==2.2.3
|
3 |
+
numpy==2.2.4
|
4 |
+
matplotlib==3.10.1
|
5 |
+
python-dateutil==2.9.0
|
6 |
+
pyyaml==6.0.2
|
7 |
+
fastapi==0.115.12
|
8 |
+
uvicorn==0.34.0
|
9 |
+
aiofiles==23.2.1
|
10 |
+
orjson==3.10.16
|
11 |
+
typing-extensions==4.13.0
|
12 |
+
anyio==4.9.0
|
results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-09-15-05.json
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"config": {
|
3 |
+
"max_vus": 800,
|
4 |
+
"duration_secs": 120,
|
5 |
+
"benchmark_kind": "Rate",
|
6 |
+
"warmup_duration_secs": 30,
|
7 |
+
"rates": [
|
8 |
+
1.0,
|
9 |
+
10.0,
|
10 |
+
30.0,
|
11 |
+
100.0
|
12 |
+
],
|
13 |
+
"num_rates": 10,
|
14 |
+
"prompt_options": {
|
15 |
+
"num_tokens": 200,
|
16 |
+
"min_tokens": 180,
|
17 |
+
"max_tokens": 220,
|
18 |
+
"variance": 10
|
19 |
+
},
|
20 |
+
"decode_options": {
|
21 |
+
"num_tokens": 200,
|
22 |
+
"min_tokens": 180,
|
23 |
+
"max_tokens": 220,
|
24 |
+
"variance": 10
|
25 |
+
},
|
26 |
+
"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
|
27 |
+
"model_name": "phi-4",
|
28 |
+
"profile": null,
|
29 |
+
"meta": null,
|
30 |
+
"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (200 tokens)"
|
31 |
+
},
|
32 |
+
"results": [
|
33 |
+
{
|
34 |
+
"id": "warmup",
|
35 |
+
"executor_type": "ConstantVUs",
|
36 |
+
"config": {
|
37 |
+
"max_vus": 1,
|
38 |
+
"duration_secs": 30,
|
39 |
+
"rate": null
|
40 |
+
},
|
41 |
+
"total_requests": 7,
|
42 |
+
"total_tokens": 1401,
|
43 |
+
"token_throughput_secs": 41.207311909734074,
|
44 |
+
"duration_ms": 33998,
|
45 |
+
"time_to_first_token_ms": {
|
46 |
+
"p50": 30.74,
|
47 |
+
"p60": 30.848,
|
48 |
+
"p70": 31.032,
|
49 |
+
"p80": 31.367,
|
50 |
+
"p90": 600.369,
|
51 |
+
"p95": 1027.036,
|
52 |
+
"p99": 1368.37,
|
53 |
+
"avg": 233.964
|
54 |
+
},
|
55 |
+
"inter_token_latency_ms": {
|
56 |
+
"p50": 23.217,
|
57 |
+
"p60": 23.222,
|
58 |
+
"p70": 23.228,
|
59 |
+
"p80": 23.236,
|
60 |
+
"p90": 23.248,
|
61 |
+
"p95": 23.254,
|
62 |
+
"p99": 23.26,
|
63 |
+
"avg": 23.213
|
64 |
+
},
|
65 |
+
"failed_requests": 0,
|
66 |
+
"successful_requests": 7,
|
67 |
+
"request_rate": 0.2058894956232252,
|
68 |
+
"total_tokens_sent": 1400,
|
69 |
+
"e2e_latency_ms": {
|
70 |
+
"p50": 4743.409,
|
71 |
+
"p60": 4751.971,
|
72 |
+
"p70": 4775.205,
|
73 |
+
"p80": 4827.785,
|
74 |
+
"p90": 5318.839,
|
75 |
+
"p95": 5673.985,
|
76 |
+
"p99": 5958.102,
|
77 |
+
"avg": 4856.823
|
78 |
+
}
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"id": "[email protected]/s",
|
82 |
+
"executor_type": "ConstantArrivalRate",
|
83 |
+
"config": {
|
84 |
+
"max_vus": 800,
|
85 |
+
"duration_secs": 120,
|
86 |
+
"rate": 1.0
|
87 |
+
},
|
88 |
+
"total_requests": 115,
|
89 |
+
"total_tokens": 22163,
|
90 |
+
"token_throughput_secs": 186.64991064360598,
|
91 |
+
"duration_ms": 118741,
|
92 |
+
"time_to_first_token_ms": {
|
93 |
+
"p50": 43.445,
|
94 |
+
"p60": 45.341,
|
95 |
+
"p70": 47.407,
|
96 |
+
"p80": 50.324,
|
97 |
+
"p90": 53.509,
|
98 |
+
"p95": 54.94,
|
99 |
+
"p99": 57.022,
|
100 |
+
"avg": 43.314
|
101 |
+
},
|
102 |
+
"inter_token_latency_ms": {
|
103 |
+
"p50": 24.082,
|
104 |
+
"p60": 24.1,
|
105 |
+
"p70": 24.124,
|
106 |
+
"p80": 24.146,
|
107 |
+
"p90": 24.21,
|
108 |
+
"p95": 24.288,
|
109 |
+
"p99": 24.376,
|
110 |
+
"avg": 24.09
|
111 |
+
},
|
112 |
+
"failed_requests": 0,
|
113 |
+
"successful_requests": 115,
|
114 |
+
"request_rate": 0.9684943249566704,
|
115 |
+
"total_tokens_sent": 23000,
|
116 |
+
"e2e_latency_ms": {
|
117 |
+
"p50": 4814.201,
|
118 |
+
"p60": 4873.26,
|
119 |
+
"p70": 4947.365,
|
120 |
+
"p80": 5011.934,
|
121 |
+
"p90": 5104.903,
|
122 |
+
"p95": 5182.844,
|
123 |
+
"p99": 5309.301,
|
124 |
+
"avg": 4665.197
|
125 |
+
}
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"id": "[email protected]/s",
|
129 |
+
"executor_type": "ConstantArrivalRate",
|
130 |
+
"config": {
|
131 |
+
"max_vus": 800,
|
132 |
+
"duration_secs": 120,
|
133 |
+
"rate": 10.0
|
134 |
+
},
|
135 |
+
"total_requests": 1149,
|
136 |
+
"total_tokens": 217686,
|
137 |
+
"token_throughput_secs": 1837.4411468828155,
|
138 |
+
"duration_ms": 118472,
|
139 |
+
"time_to_first_token_ms": {
|
140 |
+
"p50": 55.249,
|
141 |
+
"p60": 57.796,
|
142 |
+
"p70": 60.296,
|
143 |
+
"p80": 63.162,
|
144 |
+
"p90": 66.14,
|
145 |
+
"p95": 67.799,
|
146 |
+
"p99": 70.85,
|
147 |
+
"avg": 55.52
|
148 |
+
},
|
149 |
+
"inter_token_latency_ms": {
|
150 |
+
"p50": 28.914,
|
151 |
+
"p60": 28.973,
|
152 |
+
"p70": 29.029,
|
153 |
+
"p80": 29.089,
|
154 |
+
"p90": 29.168,
|
155 |
+
"p95": 29.211,
|
156 |
+
"p99": 29.331,
|
157 |
+
"avg": 28.737
|
158 |
+
},
|
159 |
+
"failed_requests": 0,
|
160 |
+
"successful_requests": 1149,
|
161 |
+
"request_rate": 9.698464199665366,
|
162 |
+
"total_tokens_sent": 229800,
|
163 |
+
"e2e_latency_ms": {
|
164 |
+
"p50": 5707.118,
|
165 |
+
"p60": 5793.95,
|
166 |
+
"p70": 5885.254,
|
167 |
+
"p80": 5983.201,
|
168 |
+
"p90": 6126.889,
|
169 |
+
"p95": 6219.476,
|
170 |
+
"p99": 6386.803,
|
171 |
+
"avg": 5477.946
|
172 |
+
}
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"id": "[email protected]/s",
|
176 |
+
"executor_type": "ConstantArrivalRate",
|
177 |
+
"config": {
|
178 |
+
"max_vus": 800,
|
179 |
+
"duration_secs": 120,
|
180 |
+
"rate": 30.0
|
181 |
+
},
|
182 |
+
"total_requests": 1889,
|
183 |
+
"total_tokens": 348708,
|
184 |
+
"token_throughput_secs": 2911.7479692043544,
|
185 |
+
"duration_ms": 119758,
|
186 |
+
"time_to_first_token_ms": {
|
187 |
+
"p50": 22192.744,
|
188 |
+
"p60": 26837.194,
|
189 |
+
"p70": 29205.612,
|
190 |
+
"p80": 33069.312,
|
191 |
+
"p90": 35968.562,
|
192 |
+
"p95": 36825.858,
|
193 |
+
"p99": 37298.867,
|
194 |
+
"avg": 19829.052
|
195 |
+
},
|
196 |
+
"inter_token_latency_ms": {
|
197 |
+
"p50": 64.987,
|
198 |
+
"p60": 66.093,
|
199 |
+
"p70": 67.344,
|
200 |
+
"p80": 72.108,
|
201 |
+
"p90": 90.713,
|
202 |
+
"p95": 98.38,
|
203 |
+
"p99": 177.348,
|
204 |
+
"avg": 69.926
|
205 |
+
},
|
206 |
+
"failed_requests": 0,
|
207 |
+
"successful_requests": 1889,
|
208 |
+
"request_rate": 15.77334593363796,
|
209 |
+
"total_tokens_sent": 377800,
|
210 |
+
"e2e_latency_ms": {
|
211 |
+
"p50": 33837.749,
|
212 |
+
"p60": 38364.805,
|
213 |
+
"p70": 42612.972,
|
214 |
+
"p80": 45779.935,
|
215 |
+
"p90": 48249.655,
|
216 |
+
"p95": 49268.594,
|
217 |
+
"p99": 50884.661,
|
218 |
+
"avg": 32263.266
|
219 |
+
}
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"id": "[email protected]/s",
|
223 |
+
"executor_type": "ConstantArrivalRate",
|
224 |
+
"config": {
|
225 |
+
"max_vus": 800,
|
226 |
+
"duration_secs": 120,
|
227 |
+
"rate": 100.0
|
228 |
+
},
|
229 |
+
"total_requests": 1923,
|
230 |
+
"total_tokens": 355495,
|
231 |
+
"token_throughput_secs": 2963.3510051149824,
|
232 |
+
"duration_ms": 119963,
|
233 |
+
"time_to_first_token_ms": {
|
234 |
+
"p50": 30849.07,
|
235 |
+
"p60": 32647.17,
|
236 |
+
"p70": 35695.762,
|
237 |
+
"p80": 36657.309,
|
238 |
+
"p90": 37063.893,
|
239 |
+
"p95": 37265.804,
|
240 |
+
"p99": 37693.244,
|
241 |
+
"avg": 25983.203
|
242 |
+
},
|
243 |
+
"inter_token_latency_ms": {
|
244 |
+
"p50": 64.756,
|
245 |
+
"p60": 66.434,
|
246 |
+
"p70": 68.803,
|
247 |
+
"p80": 83.204,
|
248 |
+
"p90": 96.295,
|
249 |
+
"p95": 103.874,
|
250 |
+
"p99": 163.895,
|
251 |
+
"avg": 73.033
|
252 |
+
},
|
253 |
+
"failed_requests": 0,
|
254 |
+
"successful_requests": 1923,
|
255 |
+
"request_rate": 16.02982878194099,
|
256 |
+
"total_tokens_sent": 384600,
|
257 |
+
"e2e_latency_ms": {
|
258 |
+
"p50": 44432.763,
|
259 |
+
"p60": 46273.082,
|
260 |
+
"p70": 47729.904,
|
261 |
+
"p80": 48714.768,
|
262 |
+
"p90": 49917.33,
|
263 |
+
"p95": 50686.527,
|
264 |
+
"p99": 51992.951,
|
265 |
+
"avg": 38685.294
|
266 |
+
}
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"start_time": "2025-05-21T09:04:59.479961191+00:00",
|
270 |
+
"end_time": "2025-05-21T09:15:05.115323148+00:00",
|
271 |
+
"system": {
|
272 |
+
"cpu": [
|
273 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
274 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
|
275 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
|
276 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
|
277 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
|
278 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
|
279 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
|
280 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
|
281 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
|
282 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
|
283 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
|
284 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
|
285 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
|
286 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
|
287 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
|
288 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
|
289 |
+
],
|
290 |
+
"memory": "83.47 GB",
|
291 |
+
"os_name": "Debian GNU/Linux",
|
292 |
+
"os_version": "11",
|
293 |
+
"kernel": "5.15.167.4-microsoft-standard-WSL2",
|
294 |
+
"hostname": "computer"
|
295 |
+
}
|
296 |
+
}
|
results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"config": {
|
3 |
+
"max_vus": 800,
|
4 |
+
"duration_secs": 120,
|
5 |
+
"benchmark_kind": "Rate",
|
6 |
+
"warmup_duration_secs": 30,
|
7 |
+
"rates": [
|
8 |
+
1.0,
|
9 |
+
10.0,
|
10 |
+
30.0,
|
11 |
+
100.0
|
12 |
+
],
|
13 |
+
"num_rates": 10,
|
14 |
+
"prompt_options": {
|
15 |
+
"num_tokens": 8000,
|
16 |
+
"min_tokens": 7980,
|
17 |
+
"max_tokens": 8020,
|
18 |
+
"variance": 10
|
19 |
+
},
|
20 |
+
"decode_options": {
|
21 |
+
"num_tokens": 8000,
|
22 |
+
"min_tokens": 7980,
|
23 |
+
"max_tokens": 8020,
|
24 |
+
"variance": 10
|
25 |
+
},
|
26 |
+
"tokenizer": "RedHatAI/phi-4-FP8-dynamic",
|
27 |
+
"model_name": "phi-4",
|
28 |
+
"profile": null,
|
29 |
+
"meta": null,
|
30 |
+
"run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (8000 tokens)"
|
31 |
+
},
|
32 |
+
"results": [
|
33 |
+
{
|
34 |
+
"id": "warmup",
|
35 |
+
"executor_type": "ConstantVUs",
|
36 |
+
"config": {
|
37 |
+
"max_vus": 1,
|
38 |
+
"duration_secs": 30,
|
39 |
+
"rate": null
|
40 |
+
},
|
41 |
+
"total_requests": 2,
|
42 |
+
"total_tokens": 1643,
|
43 |
+
"token_throughput_secs": 38.490013255851395,
|
44 |
+
"duration_ms": 42686,
|
45 |
+
"time_to_first_token_ms": {
|
46 |
+
"p50": 1276.801,
|
47 |
+
"p60": 1388.913,
|
48 |
+
"p70": 1501.026,
|
49 |
+
"p80": 1613.139,
|
50 |
+
"p90": 1725.252,
|
51 |
+
"p95": 1781.309,
|
52 |
+
"p99": 1826.154,
|
53 |
+
"avg": 1276.801
|
54 |
+
},
|
55 |
+
"inter_token_latency_ms": {
|
56 |
+
"p50": 24.424,
|
57 |
+
"p60": 24.432,
|
58 |
+
"p70": 24.44,
|
59 |
+
"p80": 24.448,
|
60 |
+
"p90": 24.456,
|
61 |
+
"p95": 24.46,
|
62 |
+
"p99": 24.463,
|
63 |
+
"avg": 24.424
|
64 |
+
},
|
65 |
+
"failed_requests": 0,
|
66 |
+
"successful_requests": 2,
|
67 |
+
"request_rate": 0.0468533332390157,
|
68 |
+
"total_tokens_sent": 16000,
|
69 |
+
"e2e_latency_ms": {
|
70 |
+
"p50": 21343.075,
|
71 |
+
"p60": 21391.438,
|
72 |
+
"p70": 21439.801,
|
73 |
+
"p80": 21488.164,
|
74 |
+
"p90": 21536.527,
|
75 |
+
"p95": 21560.709,
|
76 |
+
"p99": 21580.054,
|
77 |
+
"avg": 21343.075
|
78 |
+
}
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"id": "[email protected]/s",
|
82 |
+
"executor_type": "ConstantArrivalRate",
|
83 |
+
"config": {
|
84 |
+
"max_vus": 800,
|
85 |
+
"duration_secs": 120,
|
86 |
+
"rate": 1.0
|
87 |
+
},
|
88 |
+
"total_requests": 90,
|
89 |
+
"total_tokens": 55892,
|
90 |
+
"token_throughput_secs": 478.696852515677,
|
91 |
+
"duration_ms": 116758,
|
92 |
+
"time_to_first_token_ms": {
|
93 |
+
"p50": 118.856,
|
94 |
+
"p60": 124.707,
|
95 |
+
"p70": 131.654,
|
96 |
+
"p80": 135.562,
|
97 |
+
"p90": 145.529,
|
98 |
+
"p95": 150.366,
|
99 |
+
"p99": 715.649,
|
100 |
+
"avg": 128.611
|
101 |
+
},
|
102 |
+
"inter_token_latency_ms": {
|
103 |
+
"p50": 45.758,
|
104 |
+
"p60": 46.229,
|
105 |
+
"p70": 46.314,
|
106 |
+
"p80": 46.373,
|
107 |
+
"p90": 46.483,
|
108 |
+
"p95": 46.581,
|
109 |
+
"p99": 46.871,
|
110 |
+
"avg": 43.271
|
111 |
+
},
|
112 |
+
"failed_requests": 0,
|
113 |
+
"successful_requests": 90,
|
114 |
+
"request_rate": 0.7708208102485317,
|
115 |
+
"total_tokens_sent": 720000,
|
116 |
+
"e2e_latency_ms": {
|
117 |
+
"p50": 27887.256,
|
118 |
+
"p60": 30188.411,
|
119 |
+
"p70": 31661.903,
|
120 |
+
"p80": 35685.812,
|
121 |
+
"p90": 45661.636,
|
122 |
+
"p95": 50093.628,
|
123 |
+
"p99": 59727.184,
|
124 |
+
"avg": 27093.895
|
125 |
+
}
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"id": "[email protected]/s",
|
129 |
+
"executor_type": "ConstantArrivalRate",
|
130 |
+
"config": {
|
131 |
+
"max_vus": 800,
|
132 |
+
"duration_secs": 120,
|
133 |
+
"rate": 10.0
|
134 |
+
},
|
135 |
+
"total_requests": 97,
|
136 |
+
"total_tokens": 45779,
|
137 |
+
"token_throughput_secs": 385.8671945353039,
|
138 |
+
"duration_ms": 118639,
|
139 |
+
"time_to_first_token_ms": {
|
140 |
+
"p50": 264.625,
|
141 |
+
"p60": 314.639,
|
142 |
+
"p70": 341.786,
|
143 |
+
"p80": 416.021,
|
144 |
+
"p90": 502.604,
|
145 |
+
"p95": 608.336,
|
146 |
+
"p99": 712.908,
|
147 |
+
"avg": 278.878
|
148 |
+
},
|
149 |
+
"inter_token_latency_ms": {
|
150 |
+
"p50": 152.068,
|
151 |
+
"p60": 183.639,
|
152 |
+
"p70": 208.294,
|
153 |
+
"p80": 210.057,
|
154 |
+
"p90": 211.894,
|
155 |
+
"p95": 421.244,
|
156 |
+
"p99": 436.578,
|
157 |
+
"avg": 190.502
|
158 |
+
},
|
159 |
+
"failed_requests": 0,
|
160 |
+
"successful_requests": 97,
|
161 |
+
"request_rate": 0.8176045319890011,
|
162 |
+
"total_tokens_sent": 776000,
|
163 |
+
"e2e_latency_ms": {
|
164 |
+
"p50": 89809.719,
|
165 |
+
"p60": 90599.198,
|
166 |
+
"p70": 97086.861,
|
167 |
+
"p80": 97763.592,
|
168 |
+
"p90": 102705.608,
|
169 |
+
"p95": 105891.319,
|
170 |
+
"p99": 109209.372,
|
171 |
+
"avg": 80168.287
|
172 |
+
}
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"id": "[email protected]/s",
|
176 |
+
"executor_type": "ConstantArrivalRate",
|
177 |
+
"config": {
|
178 |
+
"max_vus": 800,
|
179 |
+
"duration_secs": 120,
|
180 |
+
"rate": 30.0
|
181 |
+
},
|
182 |
+
"total_requests": 108,
|
183 |
+
"total_tokens": 48755,
|
184 |
+
"token_throughput_secs": 408.5182278415837,
|
185 |
+
"duration_ms": 119345,
|
186 |
+
"time_to_first_token_ms": {
|
187 |
+
"p50": 315.639,
|
188 |
+
"p60": 364.113,
|
189 |
+
"p70": 440.936,
|
190 |
+
"p80": 517.15,
|
191 |
+
"p90": 635.496,
|
192 |
+
"p95": 743.467,
|
193 |
+
"p99": 886.077,
|
194 |
+
"avg": 348.945
|
195 |
+
},
|
196 |
+
"inter_token_latency_ms": {
|
197 |
+
"p50": 172.827,
|
198 |
+
"p60": 189.057,
|
199 |
+
"p70": 196.538,
|
200 |
+
"p80": 201.266,
|
201 |
+
"p90": 442.975,
|
202 |
+
"p95": 465.991,
|
203 |
+
"p99": 473.842,
|
204 |
+
"avg": 207.845
|
205 |
+
},
|
206 |
+
"failed_requests": 0,
|
207 |
+
"successful_requests": 108,
|
208 |
+
"request_rate": 0.9049321835071489,
|
209 |
+
"total_tokens_sent": 864000,
|
210 |
+
"e2e_latency_ms": {
|
211 |
+
"p50": 89868.756,
|
212 |
+
"p60": 96902.23,
|
213 |
+
"p70": 98937.333,
|
214 |
+
"p80": 102789.849,
|
215 |
+
"p90": 109541.9,
|
216 |
+
"p95": 111388.456,
|
217 |
+
"p99": 114281.927,
|
218 |
+
"avg": 82072.638
|
219 |
+
}
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"id": "[email protected]/s",
|
223 |
+
"executor_type": "ConstantArrivalRate",
|
224 |
+
"config": {
|
225 |
+
"max_vus": 800,
|
226 |
+
"duration_secs": 120,
|
227 |
+
"rate": 100.0
|
228 |
+
},
|
229 |
+
"total_requests": 125,
|
230 |
+
"total_tokens": 57918,
|
231 |
+
"token_throughput_secs": 485.359321343381,
|
232 |
+
"duration_ms": 119330,
|
233 |
+
"time_to_first_token_ms": {
|
234 |
+
"p50": 1154.434,
|
235 |
+
"p60": 1276.393,
|
236 |
+
"p70": 1440.368,
|
237 |
+
"p80": 1604.069,
|
238 |
+
"p90": 1768.54,
|
239 |
+
"p95": 1850.13,
|
240 |
+
"p99": 1919.678,
|
241 |
+
"avg": 1208.132
|
242 |
+
},
|
243 |
+
"inter_token_latency_ms": {
|
244 |
+
"p50": 166.875,
|
245 |
+
"p60": 166.884,
|
246 |
+
"p70": 167.245,
|
247 |
+
"p80": 188.28,
|
248 |
+
"p90": 350.172,
|
249 |
+
"p95": 417.485,
|
250 |
+
"p99": 437.566,
|
251 |
+
"avg": 186.06
|
252 |
+
},
|
253 |
+
"failed_requests": 0,
|
254 |
+
"successful_requests": 125,
|
255 |
+
"request_rate": 1.047513988188864,
|
256 |
+
"total_tokens_sent": 1000000,
|
257 |
+
"e2e_latency_ms": {
|
258 |
+
"p50": 82803.004,
|
259 |
+
"p60": 89976.229,
|
260 |
+
"p70": 90374.914,
|
261 |
+
"p80": 99727.225,
|
262 |
+
"p90": 108866.194,
|
263 |
+
"p95": 113444.528,
|
264 |
+
"p99": 116545.189,
|
265 |
+
"avg": 77917.015
|
266 |
+
}
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"start_time": "2025-05-21T13:41:44.260015742+00:00",
|
270 |
+
"end_time": "2025-05-21T13:56:47.150683889+00:00",
|
271 |
+
"system": {
|
272 |
+
"cpu": [
|
273 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
274 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
|
275 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
|
276 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
|
277 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
|
278 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
|
279 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
|
280 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
|
281 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
|
282 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
|
283 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
|
284 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
|
285 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
|
286 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
|
287 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
|
288 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
|
289 |
+
],
|
290 |
+
"memory": "83.47 GB",
|
291 |
+
"os_name": "Debian GNU/Linux",
|
292 |
+
"os_version": "11",
|
293 |
+
"kernel": "5.15.167.4-microsoft-standard-WSL2",
|
294 |
+
"hostname": "computer"
|
295 |
+
}
|
296 |
+
}
|
results/microsoft_phi-4_2025-05-21-12-47-52.json
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"config": {
|
3 |
+
"max_vus": 800,
|
4 |
+
"duration_secs": 120,
|
5 |
+
"benchmark_kind": "Rate",
|
6 |
+
"warmup_duration_secs": 30,
|
7 |
+
"rates": [
|
8 |
+
1.0,
|
9 |
+
10.0,
|
10 |
+
30.0,
|
11 |
+
100.0
|
12 |
+
],
|
13 |
+
"num_rates": 10,
|
14 |
+
"prompt_options": {
|
15 |
+
"num_tokens": 200,
|
16 |
+
"min_tokens": 180,
|
17 |
+
"max_tokens": 220,
|
18 |
+
"variance": 10
|
19 |
+
},
|
20 |
+
"decode_options": {
|
21 |
+
"num_tokens": 200,
|
22 |
+
"min_tokens": 180,
|
23 |
+
"max_tokens": 220,
|
24 |
+
"variance": 10
|
25 |
+
},
|
26 |
+
"tokenizer": "microsoft/phi-4",
|
27 |
+
"model_name": "phi-4",
|
28 |
+
"profile": null,
|
29 |
+
"meta": null,
|
30 |
+
"run_id": "Ollama: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
|
31 |
+
},
|
32 |
+
"results": [
|
33 |
+
{
|
34 |
+
"id": "warmup",
|
35 |
+
"executor_type": "ConstantVUs",
|
36 |
+
"config": {
|
37 |
+
"max_vus": 1,
|
38 |
+
"duration_secs": 30,
|
39 |
+
"rate": null
|
40 |
+
},
|
41 |
+
"total_requests": 17,
|
42 |
+
"total_tokens": 2560,
|
43 |
+
"token_throughput_secs": 81.92346820970964,
|
44 |
+
"duration_ms": 31248,
|
45 |
+
"time_to_first_token_ms": {
|
46 |
+
"p50": 48.023,
|
47 |
+
"p60": 48.316,
|
48 |
+
"p70": 48.704,
|
49 |
+
"p80": 49.172,
|
50 |
+
"p90": 50.133,
|
51 |
+
"p95": 79.141,
|
52 |
+
"p99": 171.884,
|
53 |
+
"avg": 56.904
|
54 |
+
},
|
55 |
+
"inter_token_latency_ms": {
|
56 |
+
"p50": 11.835,
|
57 |
+
"p60": 11.849,
|
58 |
+
"p70": 11.866,
|
59 |
+
"p80": 11.888,
|
60 |
+
"p90": 11.999,
|
61 |
+
"p95": 12.031,
|
62 |
+
"p99": 12.057,
|
63 |
+
"avg": 11.863
|
64 |
+
},
|
65 |
+
"failed_requests": 0,
|
66 |
+
"successful_requests": 17,
|
67 |
+
"request_rate": 0.5440230310801031,
|
68 |
+
"total_tokens_sent": 3400,
|
69 |
+
"e2e_latency_ms": {
|
70 |
+
"p50": 2193.161,
|
71 |
+
"p60": 2256.189,
|
72 |
+
"p70": 2409.636,
|
73 |
+
"p80": 2503.287,
|
74 |
+
"p90": 2558.373,
|
75 |
+
"p95": 2565.267,
|
76 |
+
"p99": 2582.093,
|
77 |
+
"avg": 1837.986
|
78 |
+
}
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"id": "[email protected]/s",
|
82 |
+
"executor_type": "ConstantArrivalRate",
|
83 |
+
"config": {
|
84 |
+
"max_vus": 800,
|
85 |
+
"duration_secs": 120,
|
86 |
+
"rate": 1.0
|
87 |
+
},
|
88 |
+
"total_requests": 68,
|
89 |
+
"total_tokens": 13393,
|
90 |
+
"token_throughput_secs": 113.50678834081126,
|
91 |
+
"duration_ms": 117992,
|
92 |
+
"time_to_first_token_ms": {
|
93 |
+
"p50": 23628.355,
|
94 |
+
"p60": 28364.866,
|
95 |
+
"p70": 33468.314,
|
96 |
+
"p80": 37116.28,
|
97 |
+
"p90": 42197.075,
|
98 |
+
"p95": 44792.584,
|
99 |
+
"p99": 46808.871,
|
100 |
+
"avg": 23527.531
|
101 |
+
},
|
102 |
+
"inter_token_latency_ms": {
|
103 |
+
"p50": 17.148,
|
104 |
+
"p60": 17.164,
|
105 |
+
"p70": 17.183,
|
106 |
+
"p80": 17.199,
|
107 |
+
"p90": 17.22,
|
108 |
+
"p95": 17.235,
|
109 |
+
"p99": 17.256,
|
110 |
+
"avg": 17.123
|
111 |
+
},
|
112 |
+
"failed_requests": 0,
|
113 |
+
"successful_requests": 68,
|
114 |
+
"request_rate": 0.5763056527421164,
|
115 |
+
"total_tokens_sent": 13600,
|
116 |
+
"e2e_latency_ms": {
|
117 |
+
"p50": 26918.292,
|
118 |
+
"p60": 31837.746,
|
119 |
+
"p70": 36426.629,
|
120 |
+
"p80": 40565.391,
|
121 |
+
"p90": 45507.834,
|
122 |
+
"p95": 48259.487,
|
123 |
+
"p99": 50280.92,
|
124 |
+
"avg": 26884.974
|
125 |
+
}
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"id": "[email protected]/s",
|
129 |
+
"executor_type": "ConstantArrivalRate",
|
130 |
+
"config": {
|
131 |
+
"max_vus": 800,
|
132 |
+
"duration_secs": 120,
|
133 |
+
"rate": 10.0
|
134 |
+
},
|
135 |
+
"total_requests": 69,
|
136 |
+
"total_tokens": 13411,
|
137 |
+
"token_throughput_secs": 112.91469560470007,
|
138 |
+
"duration_ms": 118771,
|
139 |
+
"time_to_first_token_ms": {
|
140 |
+
"p50": 54889.419,
|
141 |
+
"p60": 66226.724,
|
142 |
+
"p70": 77657.43,
|
143 |
+
"p80": 87194.269,
|
144 |
+
"p90": 97361.153,
|
145 |
+
"p95": 102660.303,
|
146 |
+
"p99": 106894.626,
|
147 |
+
"avg": 54527.075
|
148 |
+
},
|
149 |
+
"inter_token_latency_ms": {
|
150 |
+
"p50": 17.284,
|
151 |
+
"p60": 17.295,
|
152 |
+
"p70": 17.305,
|
153 |
+
"p80": 17.328,
|
154 |
+
"p90": 17.385,
|
155 |
+
"p95": 17.394,
|
156 |
+
"p99": 17.447,
|
157 |
+
"avg": 17.279
|
158 |
+
},
|
159 |
+
"failed_requests": 0,
|
160 |
+
"successful_requests": 69,
|
161 |
+
"request_rate": 0.5809495188072705,
|
162 |
+
"total_tokens_sent": 13800,
|
163 |
+
"e2e_latency_ms": {
|
164 |
+
"p50": 58021.804,
|
165 |
+
"p60": 69751.13,
|
166 |
+
"p70": 80116.293,
|
167 |
+
"p80": 90587.03,
|
168 |
+
"p90": 100535.513,
|
169 |
+
"p95": 105903.68,
|
170 |
+
"p99": 110535.65,
|
171 |
+
"avg": 57868.946
|
172 |
+
}
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"id": "[email protected]/s",
|
176 |
+
"executor_type": "ConstantArrivalRate",
|
177 |
+
"config": {
|
178 |
+
"max_vus": 800,
|
179 |
+
"duration_secs": 120,
|
180 |
+
"rate": 30.0
|
181 |
+
},
|
182 |
+
"total_requests": 70,
|
183 |
+
"total_tokens": 13581,
|
184 |
+
"token_throughput_secs": 113.61611267427078,
|
185 |
+
"duration_ms": 119534,
|
186 |
+
"time_to_first_token_ms": {
|
187 |
+
"p50": 56313.526,
|
188 |
+
"p60": 68465.8,
|
189 |
+
"p70": 78580.113,
|
190 |
+
"p80": 90639.114,
|
191 |
+
"p90": 102040.301,
|
192 |
+
"p95": 108031.928,
|
193 |
+
"p99": 112499.04,
|
194 |
+
"avg": 56639.341
|
195 |
+
},
|
196 |
+
"inter_token_latency_ms": {
|
197 |
+
"p50": 17.172,
|
198 |
+
"p60": 17.182,
|
199 |
+
"p70": 17.217,
|
200 |
+
"p80": 17.235,
|
201 |
+
"p90": 17.256,
|
202 |
+
"p95": 17.31,
|
203 |
+
"p99": 17.346,
|
204 |
+
"avg": 17.18
|
205 |
+
},
|
206 |
+
"failed_requests": 0,
|
207 |
+
"successful_requests": 70,
|
208 |
+
"request_rate": 0.5856069425814708,
|
209 |
+
"total_tokens_sent": 14000,
|
210 |
+
"e2e_latency_ms": {
|
211 |
+
"p50": 59683.651,
|
212 |
+
"p60": 71746.875,
|
213 |
+
"p70": 81953.181,
|
214 |
+
"p80": 94277.653,
|
215 |
+
"p90": 105378.271,
|
216 |
+
"p95": 111453.36,
|
217 |
+
"p99": 115949.496,
|
218 |
+
"avg": 59958.385
|
219 |
+
}
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"id": "[email protected]/s",
|
223 |
+
"executor_type": "ConstantArrivalRate",
|
224 |
+
"config": {
|
225 |
+
"max_vus": 800,
|
226 |
+
"duration_secs": 120,
|
227 |
+
"rate": 100.0
|
228 |
+
},
|
229 |
+
"total_requests": 70,
|
230 |
+
"total_tokens": 13359,
|
231 |
+
"token_throughput_secs": 114.42379660997986,
|
232 |
+
"duration_ms": 116750,
|
233 |
+
"time_to_first_token_ms": {
|
234 |
+
"p50": 57218.949,
|
235 |
+
"p60": 67960.841,
|
236 |
+
"p70": 79764.715,
|
237 |
+
"p80": 91579.471,
|
238 |
+
"p90": 102620.956,
|
239 |
+
"p95": 107961.016,
|
240 |
+
"p99": 112866.279,
|
241 |
+
"avg": 56772.876
|
242 |
+
},
|
243 |
+
"inter_token_latency_ms": {
|
244 |
+
"p50": 17.171,
|
245 |
+
"p60": 17.189,
|
246 |
+
"p70": 17.201,
|
247 |
+
"p80": 17.215,
|
248 |
+
"p90": 17.245,
|
249 |
+
"p95": 17.299,
|
250 |
+
"p99": 17.353,
|
251 |
+
"avg": 17.179
|
252 |
+
},
|
253 |
+
"failed_requests": 0,
|
254 |
+
"successful_requests": 70,
|
255 |
+
"request_rate": 0.5995707584922966,
|
256 |
+
"total_tokens_sent": 14000,
|
257 |
+
"e2e_latency_ms": {
|
258 |
+
"p50": 60551.916,
|
259 |
+
"p60": 71380.408,
|
260 |
+
"p70": 83198.203,
|
261 |
+
"p80": 93909.886,
|
262 |
+
"p90": 105788.774,
|
263 |
+
"p95": 111364.807,
|
264 |
+
"p99": 115968.729,
|
265 |
+
"avg": 60037.39
|
266 |
+
}
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"start_time": "2025-05-21T12:32:04.299141299+00:00",
|
270 |
+
"end_time": "2025-05-21T12:47:52.695866821+00:00",
|
271 |
+
"system": {
|
272 |
+
"cpu": [
|
273 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
274 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
|
275 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
|
276 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
|
277 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
|
278 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
|
279 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
|
280 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
|
281 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
|
282 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
|
283 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
|
284 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
|
285 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
|
286 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
|
287 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
|
288 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
|
289 |
+
],
|
290 |
+
"memory": "83.47 GB",
|
291 |
+
"os_name": "Debian GNU/Linux",
|
292 |
+
"os_version": "11",
|
293 |
+
"kernel": "5.15.167.4-microsoft-standard-WSL2",
|
294 |
+
"hostname": "computer"
|
295 |
+
}
|
296 |
+
}
|
results/microsoft_phi-4_2025-05-21-13-17-26.json
ADDED
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"config": {
|
3 |
+
"max_vus": 800,
|
4 |
+
"duration_secs": 120,
|
5 |
+
"benchmark_kind": "Rate",
|
6 |
+
"warmup_duration_secs": 30,
|
7 |
+
"rates": [
|
8 |
+
1.0,
|
9 |
+
10.0,
|
10 |
+
30.0,
|
11 |
+
100.0
|
12 |
+
],
|
13 |
+
"num_rates": 10,
|
14 |
+
"prompt_options": {
|
15 |
+
"num_tokens": 200,
|
16 |
+
"min_tokens": 180,
|
17 |
+
"max_tokens": 220,
|
18 |
+
"variance": 10
|
19 |
+
},
|
20 |
+
"decode_options": {
|
21 |
+
"num_tokens": 200,
|
22 |
+
"min_tokens": 180,
|
23 |
+
"max_tokens": 220,
|
24 |
+
"variance": 10
|
25 |
+
},
|
26 |
+
"tokenizer": "microsoft/phi-4",
|
27 |
+
"model_name": "phi-4",
|
28 |
+
"profile": null,
|
29 |
+
"meta": null,
|
30 |
+
"run_id": "LM Studio: lmstudio-community/phi-4-GGUF:Q8_0 (200 tokens)"
|
31 |
+
},
|
32 |
+
"results": [
|
33 |
+
{
|
34 |
+
"id": "warmup",
|
35 |
+
"executor_type": "ConstantVUs",
|
36 |
+
"config": {
|
37 |
+
"max_vus": 1,
|
38 |
+
"duration_secs": 30,
|
39 |
+
"rate": null
|
40 |
+
},
|
41 |
+
"total_requests": 13,
|
42 |
+
"total_tokens": 2610,
|
43 |
+
"token_throughput_secs": 83.60700961692694,
|
44 |
+
"duration_ms": 31217,
|
45 |
+
"time_to_first_token_ms": {
|
46 |
+
"p50": 90.517,
|
47 |
+
"p60": 93.25,
|
48 |
+
"p70": 102.443,
|
49 |
+
"p80": 109.227,
|
50 |
+
"p90": 130.959,
|
51 |
+
"p95": 207.294,
|
52 |
+
"p99": 293.629,
|
53 |
+
"avg": 108.58
|
54 |
+
},
|
55 |
+
"inter_token_latency_ms": {
|
56 |
+
"p50": 11.513,
|
57 |
+
"p60": 11.519,
|
58 |
+
"p70": 11.534,
|
59 |
+
"p80": 11.548,
|
60 |
+
"p90": 11.559,
|
61 |
+
"p95": 11.574,
|
62 |
+
"p99": 11.589,
|
63 |
+
"avg": 11.472
|
64 |
+
},
|
65 |
+
"failed_requests": 0,
|
66 |
+
"successful_requests": 13,
|
67 |
+
"request_rate": 0.41643338123373574,
|
68 |
+
"total_tokens_sent": 2600,
|
69 |
+
"e2e_latency_ms": {
|
70 |
+
"p50": 2419.372,
|
71 |
+
"p60": 2423.796,
|
72 |
+
"p70": 2432.426,
|
73 |
+
"p80": 2458.236,
|
74 |
+
"p90": 2525.006,
|
75 |
+
"p95": 2596.86,
|
76 |
+
"p99": 2667.757,
|
77 |
+
"avg": 2401.195
|
78 |
+
}
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"id": "[email protected]/s",
|
82 |
+
"executor_type": "ConstantArrivalRate",
|
83 |
+
"config": {
|
84 |
+
"max_vus": 800,
|
85 |
+
"duration_secs": 120,
|
86 |
+
"rate": 1.0
|
87 |
+
},
|
88 |
+
"total_requests": 52,
|
89 |
+
"total_tokens": 9915,
|
90 |
+
"token_throughput_secs": 84.1224984364473,
|
91 |
+
"duration_ms": 117863,
|
92 |
+
"time_to_first_token_ms": {
|
93 |
+
"p50": 31149.018,
|
94 |
+
"p60": 38159.307,
|
95 |
+
"p70": 44798.95,
|
96 |
+
"p80": 51599.01,
|
97 |
+
"p90": 58334.517,
|
98 |
+
"p95": 61414.588,
|
99 |
+
"p99": 63915.289,
|
100 |
+
"avg": 32379.62
|
101 |
+
},
|
102 |
+
"inter_token_latency_ms": {
|
103 |
+
"p50": 11.473,
|
104 |
+
"p60": 11.501,
|
105 |
+
"p70": 11.517,
|
106 |
+
"p80": 11.529,
|
107 |
+
"p90": 11.563,
|
108 |
+
"p95": 11.598,
|
109 |
+
"p99": 11.775,
|
110 |
+
"avg": 11.254
|
111 |
+
},
|
112 |
+
"failed_requests": 0,
|
113 |
+
"successful_requests": 52,
|
114 |
+
"request_rate": 0.4411870820670963,
|
115 |
+
"total_tokens_sent": 10400,
|
116 |
+
"e2e_latency_ms": {
|
117 |
+
"p50": 33388.263,
|
118 |
+
"p60": 40395.415,
|
119 |
+
"p70": 47230.795,
|
120 |
+
"p80": 53979.194,
|
121 |
+
"p90": 60382.07,
|
122 |
+
"p95": 63519.032,
|
123 |
+
"p99": 66184.234,
|
124 |
+
"avg": 34556.301
|
125 |
+
}
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"id": "[email protected]/s",
|
129 |
+
"executor_type": "ConstantArrivalRate",
|
130 |
+
"config": {
|
131 |
+
"max_vus": 800,
|
132 |
+
"duration_secs": 120,
|
133 |
+
"rate": 10.0
|
134 |
+
},
|
135 |
+
"total_requests": 51,
|
136 |
+
"total_tokens": 10041,
|
137 |
+
"token_throughput_secs": 84.04049965954646,
|
138 |
+
"duration_ms": 119478,
|
139 |
+
"time_to_first_token_ms": {
|
140 |
+
"p50": 55889.645,
|
141 |
+
"p60": 67098.347,
|
142 |
+
"p70": 78905.359,
|
143 |
+
"p80": 90289.182,
|
144 |
+
"p90": 101201.112,
|
145 |
+
"p95": 106805.272,
|
146 |
+
"p99": 111193.127,
|
147 |
+
"avg": 56139.066
|
148 |
+
},
|
149 |
+
"inter_token_latency_ms": {
|
150 |
+
"p50": 11.487,
|
151 |
+
"p60": 11.498,
|
152 |
+
"p70": 11.51,
|
153 |
+
"p80": 11.536,
|
154 |
+
"p90": 11.584,
|
155 |
+
"p95": 11.638,
|
156 |
+
"p99": 11.883,
|
157 |
+
"avg": 11.474
|
158 |
+
},
|
159 |
+
"failed_requests": 0,
|
160 |
+
"successful_requests": 51,
|
161 |
+
"request_rate": 0.4268564368725096,
|
162 |
+
"total_tokens_sent": 10200,
|
163 |
+
"e2e_latency_ms": {
|
164 |
+
"p50": 58084.912,
|
165 |
+
"p60": 69432.711,
|
166 |
+
"p70": 81080.254,
|
167 |
+
"p80": 92442.614,
|
168 |
+
"p90": 103527.041,
|
169 |
+
"p95": 108999.672,
|
170 |
+
"p99": 113397.637,
|
171 |
+
"avg": 58387.662
|
172 |
+
}
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"id": "[email protected]/s",
|
176 |
+
"executor_type": "ConstantArrivalRate",
|
177 |
+
"config": {
|
178 |
+
"max_vus": 800,
|
179 |
+
"duration_secs": 120,
|
180 |
+
"rate": 30.0
|
181 |
+
},
|
182 |
+
"total_requests": 51,
|
183 |
+
"total_tokens": 9889,
|
184 |
+
"token_throughput_secs": 84.08188681268076,
|
185 |
+
"duration_ms": 117611,
|
186 |
+
"time_to_first_token_ms": {
|
187 |
+
"p50": 55982.506,
|
188 |
+
"p60": 68000.692,
|
189 |
+
"p70": 79600.152,
|
190 |
+
"p80": 91108.706,
|
191 |
+
"p90": 101995.453,
|
192 |
+
"p95": 107929.312,
|
193 |
+
"p99": 112340.212,
|
194 |
+
"avg": 56754.648
|
195 |
+
},
|
196 |
+
"inter_token_latency_ms": {
|
197 |
+
"p50": 11.503,
|
198 |
+
"p60": 11.515,
|
199 |
+
"p70": 11.531,
|
200 |
+
"p80": 11.564,
|
201 |
+
"p90": 11.589,
|
202 |
+
"p95": 11.633,
|
203 |
+
"p99": 11.795,
|
204 |
+
"avg": 11.477
|
205 |
+
},
|
206 |
+
"failed_requests": 0,
|
207 |
+
"successful_requests": 51,
|
208 |
+
"request_rate": 0.43363092602353315,
|
209 |
+
"total_tokens_sent": 10200,
|
210 |
+
"e2e_latency_ms": {
|
211 |
+
"p50": 58352.067,
|
212 |
+
"p60": 70321.743,
|
213 |
+
"p70": 81960.377,
|
214 |
+
"p80": 93288.338,
|
215 |
+
"p90": 104277.554,
|
216 |
+
"p95": 110084.734,
|
217 |
+
"p99": 114675.842,
|
218 |
+
"avg": 58969.412
|
219 |
+
}
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"id": "[email protected]/s",
|
223 |
+
"executor_type": "ConstantArrivalRate",
|
224 |
+
"config": {
|
225 |
+
"max_vus": 800,
|
226 |
+
"duration_secs": 120,
|
227 |
+
"rate": 100.0
|
228 |
+
},
|
229 |
+
"total_requests": 57,
|
230 |
+
"total_tokens": 9983,
|
231 |
+
"token_throughput_secs": 83.83914212119033,
|
232 |
+
"duration_ms": 119073,
|
233 |
+
"time_to_first_token_ms": {
|
234 |
+
"p50": 60425.652,
|
235 |
+
"p60": 73426.16,
|
236 |
+
"p70": 83375.468,
|
237 |
+
"p80": 96034.495,
|
238 |
+
"p90": 104082.959,
|
239 |
+
"p95": 110616.366,
|
240 |
+
"p99": 114826.821,
|
241 |
+
"avg": 59050.64
|
242 |
+
},
|
243 |
+
"inter_token_latency_ms": {
|
244 |
+
"p50": 11.528,
|
245 |
+
"p60": 11.552,
|
246 |
+
"p70": 11.577,
|
247 |
+
"p80": 11.595,
|
248 |
+
"p90": 11.625,
|
249 |
+
"p95": 11.656,
|
250 |
+
"p99": 11.7,
|
251 |
+
"avg": 11.281
|
252 |
+
},
|
253 |
+
"failed_requests": 0,
|
254 |
+
"successful_requests": 57,
|
255 |
+
"request_rate": 0.4786968948119652,
|
256 |
+
"total_tokens_sent": 11400,
|
257 |
+
"e2e_latency_ms": {
|
258 |
+
"p50": 62519.008,
|
259 |
+
"p60": 74991.853,
|
260 |
+
"p70": 85562.76,
|
261 |
+
"p80": 96625.366,
|
262 |
+
"p90": 106351.421,
|
263 |
+
"p95": 112531.399,
|
264 |
+
"p99": 117196.304,
|
265 |
+
"avg": 61050.657
|
266 |
+
}
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"start_time": "2025-05-21T13:01:17.074891817+00:00",
|
270 |
+
"end_time": "2025-05-21T13:17:26.396424745+00:00",
|
271 |
+
"system": {
|
272 |
+
"cpu": [
|
273 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
|
274 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
|
275 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
|
276 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
|
277 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
|
278 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
|
279 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
|
280 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
|
281 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
|
282 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
|
283 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
|
284 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
|
285 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
|
286 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
|
287 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
|
288 |
+
"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
|
289 |
+
],
|
290 |
+
"memory": "83.47 GB",
|
291 |
+
"os_name": "Debian GNU/Linux",
|
292 |
+
"os_version": "11",
|
293 |
+
"kernel": "5.15.167.4-microsoft-standard-WSL2",
|
294 |
+
"hostname": "computer"
|
295 |
+
}
|
296 |
+
}
|