textgeflecht commited on
Commit
f0d353c
·
verified ·
1 Parent(s): b5bc669

Update results/microsoft_phi-4_2025-05-27-07-58-36.json

Browse files
results/microsoft_phi-4_2025-05-27-07-58-36.json CHANGED
@@ -1,296 +1,296 @@
1
- {
2
- "config": {
3
- "max_vus": 800,
4
- "duration_secs": 120,
5
- "benchmark_kind": "Rate",
6
- "warmup_duration_secs": 30,
7
- "rates": [
8
- 1.0,
9
- 10.0,
10
- 30.0,
11
- 100.0
12
- ],
13
- "num_rates": 10,
14
- "prompt_options": {
15
- "num_tokens": 200,
16
- "min_tokens": 180,
17
- "max_tokens": 220,
18
- "variance": 10
19
- },
20
- "decode_options": {
21
- "num_tokens": 200,
22
- "min_tokens": 180,
23
- "max_tokens": 220,
24
- "variance": 10
25
- },
26
- "tokenizer": "microsoft/phi-4",
27
- "model_name": "phi-4",
28
- "profile": null,
29
- "meta": null,
30
- "run_id": "llama.cpp -np 64 -fa & KV cache Q8: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
31
- },
32
- "results": [
33
- {
34
- "id": "warmup",
35
- "executor_type": "ConstantVUs",
36
- "config": {
37
- "max_vus": 1,
38
- "duration_secs": 30,
39
- "rate": null
40
- },
41
- "total_requests": 6,
42
- "total_tokens": 1008,
43
- "token_throughput_secs": 30.886902450373114,
44
- "duration_ms": 32635,
45
- "time_to_first_token_ms": {
46
- "p50": 65.229,
47
- "p60": 65.846,
48
- "p70": 66.369,
49
- "p80": 66.891,
50
- "p90": 165.906,
51
- "p95": 215.413,
52
- "p99": 255.019,
53
- "avg": 97.398
54
- },
55
- "inter_token_latency_ms": {
56
- "p50": 32.408,
57
- "p60": 32.874,
58
- "p70": 33.215,
59
- "p80": 33.556,
60
- "p90": 34.032,
61
- "p95": 34.271,
62
- "p99": 34.461,
63
- "avg": 32.032
64
- },
65
- "failed_requests": 0,
66
- "successful_requests": 6,
67
- "request_rate": 0.18385060982364948,
68
- "total_tokens_sent": 1200,
69
- "e2e_latency_ms": {
70
- "p50": 6436.385,
71
- "p60": 6471.967,
72
- "p70": 6485.143,
73
- "p80": 6498.318,
74
- "p90": 6629.543,
75
- "p95": 6695.155,
76
- "p99": 6747.645,
77
- "avg": 5439.044
78
- }
79
- },
80
- {
81
- "id": "[email protected]/s",
82
- "executor_type": "ConstantArrivalRate",
83
- "config": {
84
- "max_vus": 800,
85
- "duration_secs": 120,
86
- "rate": 1.0
87
- },
88
- "total_requests": 70,
89
- "total_tokens": 12789,
90
- "token_throughput_secs": 107.5610800290394,
91
- "duration_ms": 118899,
92
- "time_to_first_token_ms": {
93
- "p50": 223.039,
94
- "p60": 263.086,
95
- "p70": 301.726,
96
- "p80": 351.592,
97
- "p90": 446.277,
98
- "p95": 524.554,
99
- "p99": 687.403,
100
- "avg": 261.592
101
- },
102
- "inter_token_latency_ms": {
103
- "p50": 189.153,
104
- "p60": 203.627,
105
- "p70": 216.902,
106
- "p80": 225.796,
107
- "p90": 235.447,
108
- "p95": 247.066,
109
- "p99": 251.787,
110
- "avg": 178.878
111
- },
112
- "failed_requests": 0,
113
- "successful_requests": 70,
114
- "request_rate": 0.5887305967654045,
115
- "total_tokens_sent": 14000,
116
- "e2e_latency_ms": {
117
- "p50": 35454.912,
118
- "p60": 40607.238,
119
- "p70": 43046.937,
120
- "p80": 44549.685,
121
- "p90": 47388.399,
122
- "p95": 49372.99,
123
- "p99": 51724.399,
124
- "avg": 32736.03
125
- }
126
- },
127
- {
128
- "id": "[email protected]/s",
129
- "executor_type": "ConstantArrivalRate",
130
- "config": {
131
- "max_vus": 800,
132
- "duration_secs": 120,
133
- "rate": 10.0
134
- },
135
- "total_requests": 132,
136
- "total_tokens": 23773,
137
- "token_throughput_secs": 204.77591318879323,
138
- "duration_ms": 116092,
139
- "time_to_first_token_ms": {
140
- "p50": 15342.264,
141
- "p60": 39555.975,
142
- "p70": 40113.352,
143
- "p80": 41116.218,
144
- "p90": 41755.572,
145
- "p95": 42168.407,
146
- "p99": 43570.966,
147
- "avg": 20486.632
148
- },
149
- "inter_token_latency_ms": {
150
- "p50": 248.004,
151
- "p60": 282.314,
152
- "p70": 294.062,
153
- "p80": 296.481,
154
- "p90": 299.062,
155
- "p95": 302.627,
156
- "p99": 652.413,
157
- "avg": 269.253
158
- },
159
- "failed_requests": 3,
160
- "successful_requests": 129,
161
- "request_rate": 1.1111804484648269,
162
- "total_tokens_sent": 25800,
163
- "e2e_latency_ms": {
164
- "p50": 54235.782,
165
- "p60": 87726.185,
166
- "p70": 96299.098,
167
- "p80": 99127.207,
168
- "p90": 101184.305,
169
- "p95": 102359.562,
170
- "p99": 104110.646,
171
- "avg": 68791.408
172
- }
173
- },
174
- {
175
- "id": "[email protected]/s",
176
- "executor_type": "ConstantArrivalRate",
177
- "config": {
178
- "max_vus": 800,
179
- "duration_secs": 120,
180
- "rate": 30.0
181
- },
182
- "total_requests": 131,
183
- "total_tokens": 24522,
184
- "token_throughput_secs": 212.57832422707094,
185
- "duration_ms": 115355,
186
- "time_to_first_token_ms": {
187
- "p50": 12995.218,
188
- "p60": 40118.385,
189
- "p70": 41798.91,
190
- "p80": 43406.593,
191
- "p90": 45430.637,
192
- "p95": 46713.711,
193
- "p99": 47827.533,
194
- "avg": 22508.982
195
- },
196
- "inter_token_latency_ms": {
197
- "p50": 239.607,
198
- "p60": 292.625,
199
- "p70": 303.451,
200
- "p80": 305.91,
201
- "p90": 307.71,
202
- "p95": 309.457,
203
- "p99": 335.394,
204
- "avg": 254.648
205
- },
206
- "failed_requests": 3,
207
- "successful_requests": 128,
208
- "request_rate": 1.109616895076465,
209
- "total_tokens_sent": 25600,
210
- "e2e_latency_ms": {
211
- "p50": 57279.527,
212
- "p60": 92501.513,
213
- "p70": 100539.07,
214
- "p80": 104067.985,
215
- "p90": 106079.891,
216
- "p95": 108412.055,
217
- "p99": 110926.325,
218
- "avg": 71057.313
219
- }
220
- },
221
- {
222
- "id": "[email protected]/s",
223
- "executor_type": "ConstantArrivalRate",
224
- "config": {
225
- "max_vus": 800,
226
- "duration_secs": 120,
227
- "rate": 100.0
228
- },
229
- "total_requests": 7,
230
- "total_tokens": 833,
231
- "token_throughput_secs": 6.953252666463842,
232
- "duration_ms": 119800,
233
- "time_to_first_token_ms": {
234
- "p50": 910.482,
235
- "p60": 914.617,
236
- "p70": 918.391,
237
- "p80": 922.164,
238
- "p90": 2391.437,
239
- "p95": 3126.073,
240
- "p99": 3713.781,
241
- "avg": 1260.213
242
- },
243
- "inter_token_latency_ms": {
244
- "p50": 341.136,
245
- "p60": 348.441,
246
- "p70": 387.776,
247
- "p80": 427.11,
248
- "p90": 526.482,
249
- "p95": 576.168,
250
- "p99": 615.916,
251
- "avg": 397.48
252
- },
253
- "failed_requests": 1,
254
- "successful_requests": 6,
255
- "request_rate": 0.05008345257957149,
256
- "total_tokens_sent": 1200,
257
- "e2e_latency_ms": {
258
- "p50": 54130.288,
259
- "p60": 61916.924,
260
- "p70": 64878.386,
261
- "p80": 67839.847,
262
- "p90": 71403.414,
263
- "p95": 73185.198,
264
- "p99": 74610.625,
265
- "avg": 48627.299
266
- }
267
- }
268
- ],
269
- "start_time": "2025-05-27T07:44:52.169441869+00:00",
270
- "end_time": "2025-05-27T07:58:36.012073156+00:00",
271
- "system": {
272
- "cpu": [
273
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
274
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
275
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
276
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
277
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
278
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
279
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
280
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
281
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
282
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
283
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
284
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
285
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
286
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
287
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
288
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
289
- ],
290
- "memory": "83.47 GB",
291
- "os_name": "Debian GNU/Linux",
292
- "os_version": "11",
293
- "kernel": "5.15.167.4-microsoft-standard-WSL2",
294
- "hostname": "computer"
295
- }
296
  }
 
1
+ {
2
+ "config": {
3
+ "max_vus": 800,
4
+ "duration_secs": 120,
5
+ "benchmark_kind": "Rate",
6
+ "warmup_duration_secs": 30,
7
+ "rates": [
8
+ 1.0,
9
+ 10.0,
10
+ 30.0,
11
+ 100.0
12
+ ],
13
+ "num_rates": 10,
14
+ "prompt_options": {
15
+ "num_tokens": 200,
16
+ "min_tokens": 180,
17
+ "max_tokens": 220,
18
+ "variance": 10
19
+ },
20
+ "decode_options": {
21
+ "num_tokens": 200,
22
+ "min_tokens": 180,
23
+ "max_tokens": 220,
24
+ "variance": 10
25
+ },
26
+ "tokenizer": "microsoft/phi-4",
27
+ "model_name": "phi-4",
28
+ "profile": null,
29
+ "meta": null,
30
+ "run_id": "llama.cpp -np 64 -fa --cache-type-k q8_0 --cache-type-v q8_0 --no-kv-offload: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
31
+ },
32
+ "results": [
33
+ {
34
+ "id": "warmup",
35
+ "executor_type": "ConstantVUs",
36
+ "config": {
37
+ "max_vus": 1,
38
+ "duration_secs": 30,
39
+ "rate": null
40
+ },
41
+ "total_requests": 6,
42
+ "total_tokens": 1008,
43
+ "token_throughput_secs": 30.886902450373114,
44
+ "duration_ms": 32635,
45
+ "time_to_first_token_ms": {
46
+ "p50": 65.229,
47
+ "p60": 65.846,
48
+ "p70": 66.369,
49
+ "p80": 66.891,
50
+ "p90": 165.906,
51
+ "p95": 215.413,
52
+ "p99": 255.019,
53
+ "avg": 97.398
54
+ },
55
+ "inter_token_latency_ms": {
56
+ "p50": 32.408,
57
+ "p60": 32.874,
58
+ "p70": 33.215,
59
+ "p80": 33.556,
60
+ "p90": 34.032,
61
+ "p95": 34.271,
62
+ "p99": 34.461,
63
+ "avg": 32.032
64
+ },
65
+ "failed_requests": 0,
66
+ "successful_requests": 6,
67
+ "request_rate": 0.18385060982364948,
68
+ "total_tokens_sent": 1200,
69
+ "e2e_latency_ms": {
70
+ "p50": 6436.385,
71
+ "p60": 6471.967,
72
+ "p70": 6485.143,
73
+ "p80": 6498.318,
74
+ "p90": 6629.543,
75
+ "p95": 6695.155,
76
+ "p99": 6747.645,
77
+ "avg": 5439.044
78
+ }
79
+ },
80
+ {
81
+ "id": "[email protected]/s",
82
+ "executor_type": "ConstantArrivalRate",
83
+ "config": {
84
+ "max_vus": 800,
85
+ "duration_secs": 120,
86
+ "rate": 1.0
87
+ },
88
+ "total_requests": 70,
89
+ "total_tokens": 12789,
90
+ "token_throughput_secs": 107.5610800290394,
91
+ "duration_ms": 118899,
92
+ "time_to_first_token_ms": {
93
+ "p50": 223.039,
94
+ "p60": 263.086,
95
+ "p70": 301.726,
96
+ "p80": 351.592,
97
+ "p90": 446.277,
98
+ "p95": 524.554,
99
+ "p99": 687.403,
100
+ "avg": 261.592
101
+ },
102
+ "inter_token_latency_ms": {
103
+ "p50": 189.153,
104
+ "p60": 203.627,
105
+ "p70": 216.902,
106
+ "p80": 225.796,
107
+ "p90": 235.447,
108
+ "p95": 247.066,
109
+ "p99": 251.787,
110
+ "avg": 178.878
111
+ },
112
+ "failed_requests": 0,
113
+ "successful_requests": 70,
114
+ "request_rate": 0.5887305967654045,
115
+ "total_tokens_sent": 14000,
116
+ "e2e_latency_ms": {
117
+ "p50": 35454.912,
118
+ "p60": 40607.238,
119
+ "p70": 43046.937,
120
+ "p80": 44549.685,
121
+ "p90": 47388.399,
122
+ "p95": 49372.99,
123
+ "p99": 51724.399,
124
+ "avg": 32736.03
125
+ }
126
+ },
127
+ {
128
+ "id": "[email protected]/s",
129
+ "executor_type": "ConstantArrivalRate",
130
+ "config": {
131
+ "max_vus": 800,
132
+ "duration_secs": 120,
133
+ "rate": 10.0
134
+ },
135
+ "total_requests": 132,
136
+ "total_tokens": 23773,
137
+ "token_throughput_secs": 204.77591318879323,
138
+ "duration_ms": 116092,
139
+ "time_to_first_token_ms": {
140
+ "p50": 15342.264,
141
+ "p60": 39555.975,
142
+ "p70": 40113.352,
143
+ "p80": 41116.218,
144
+ "p90": 41755.572,
145
+ "p95": 42168.407,
146
+ "p99": 43570.966,
147
+ "avg": 20486.632
148
+ },
149
+ "inter_token_latency_ms": {
150
+ "p50": 248.004,
151
+ "p60": 282.314,
152
+ "p70": 294.062,
153
+ "p80": 296.481,
154
+ "p90": 299.062,
155
+ "p95": 302.627,
156
+ "p99": 652.413,
157
+ "avg": 269.253
158
+ },
159
+ "failed_requests": 3,
160
+ "successful_requests": 129,
161
+ "request_rate": 1.1111804484648269,
162
+ "total_tokens_sent": 25800,
163
+ "e2e_latency_ms": {
164
+ "p50": 54235.782,
165
+ "p60": 87726.185,
166
+ "p70": 96299.098,
167
+ "p80": 99127.207,
168
+ "p90": 101184.305,
169
+ "p95": 102359.562,
170
+ "p99": 104110.646,
171
+ "avg": 68791.408
172
+ }
173
+ },
174
+ {
175
+ "id": "[email protected]/s",
176
+ "executor_type": "ConstantArrivalRate",
177
+ "config": {
178
+ "max_vus": 800,
179
+ "duration_secs": 120,
180
+ "rate": 30.0
181
+ },
182
+ "total_requests": 131,
183
+ "total_tokens": 24522,
184
+ "token_throughput_secs": 212.57832422707094,
185
+ "duration_ms": 115355,
186
+ "time_to_first_token_ms": {
187
+ "p50": 12995.218,
188
+ "p60": 40118.385,
189
+ "p70": 41798.91,
190
+ "p80": 43406.593,
191
+ "p90": 45430.637,
192
+ "p95": 46713.711,
193
+ "p99": 47827.533,
194
+ "avg": 22508.982
195
+ },
196
+ "inter_token_latency_ms": {
197
+ "p50": 239.607,
198
+ "p60": 292.625,
199
+ "p70": 303.451,
200
+ "p80": 305.91,
201
+ "p90": 307.71,
202
+ "p95": 309.457,
203
+ "p99": 335.394,
204
+ "avg": 254.648
205
+ },
206
+ "failed_requests": 3,
207
+ "successful_requests": 128,
208
+ "request_rate": 1.109616895076465,
209
+ "total_tokens_sent": 25600,
210
+ "e2e_latency_ms": {
211
+ "p50": 57279.527,
212
+ "p60": 92501.513,
213
+ "p70": 100539.07,
214
+ "p80": 104067.985,
215
+ "p90": 106079.891,
216
+ "p95": 108412.055,
217
+ "p99": 110926.325,
218
+ "avg": 71057.313
219
+ }
220
+ },
221
+ {
222
+ "id": "[email protected]/s",
223
+ "executor_type": "ConstantArrivalRate",
224
+ "config": {
225
+ "max_vus": 800,
226
+ "duration_secs": 120,
227
+ "rate": 100.0
228
+ },
229
+ "total_requests": 7,
230
+ "total_tokens": 833,
231
+ "token_throughput_secs": 6.953252666463842,
232
+ "duration_ms": 119800,
233
+ "time_to_first_token_ms": {
234
+ "p50": 910.482,
235
+ "p60": 914.617,
236
+ "p70": 918.391,
237
+ "p80": 922.164,
238
+ "p90": 2391.437,
239
+ "p95": 3126.073,
240
+ "p99": 3713.781,
241
+ "avg": 1260.213
242
+ },
243
+ "inter_token_latency_ms": {
244
+ "p50": 341.136,
245
+ "p60": 348.441,
246
+ "p70": 387.776,
247
+ "p80": 427.11,
248
+ "p90": 526.482,
249
+ "p95": 576.168,
250
+ "p99": 615.916,
251
+ "avg": 397.48
252
+ },
253
+ "failed_requests": 1,
254
+ "successful_requests": 6,
255
+ "request_rate": 0.05008345257957149,
256
+ "total_tokens_sent": 1200,
257
+ "e2e_latency_ms": {
258
+ "p50": 54130.288,
259
+ "p60": 61916.924,
260
+ "p70": 64878.386,
261
+ "p80": 67839.847,
262
+ "p90": 71403.414,
263
+ "p95": 73185.198,
264
+ "p99": 74610.625,
265
+ "avg": 48627.299
266
+ }
267
+ }
268
+ ],
269
+ "start_time": "2025-05-27T07:44:52.169441869+00:00",
270
+ "end_time": "2025-05-27T07:58:36.012073156+00:00",
271
+ "system": {
272
+ "cpu": [
273
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
274
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
275
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
276
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
277
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
278
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
279
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
280
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
281
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
282
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
283
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
284
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
285
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
286
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
287
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
288
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
289
+ ],
290
+ "memory": "83.47 GB",
291
+ "os_name": "Debian GNU/Linux",
292
+ "os_version": "11",
293
+ "kernel": "5.15.167.4-microsoft-standard-WSL2",
294
+ "hostname": "computer"
295
+ }
296
  }