shubhrapandit commited on
Commit
cada640
·
verified ·
1 Parent(s): 653b617

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -12
README.md CHANGED
@@ -393,21 +393,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
393
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w8a8</td>
394
  <td>1.80</td>
395
  <td>1.2</td>
396
- <td>289</td>
397
  <td>4.0</td>
398
- <td>1020</td>
399
  <td>4.6</td>
400
- <td>1133</td>
401
  </tr>
402
  <tr>
403
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
404
  <td>2.75</td>
405
  <td>2.8</td>
406
- <td>341</td>
407
  <td>12.8</td>
408
- <td>1588</td>
409
  <td>16.4</td>
410
- <td>2037</td>
411
  </tr>
412
  <tr>
413
  <th rowspan="3" valign="top">H100x4</th>
@@ -424,21 +424,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
424
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-FP8-Dynamic</td>
425
  <td>1.73</td>
426
  <td>1.8</td>
427
- <td>247</td>
428
  <td>4.4</td>
429
- <td>621</td>
430
  <td>4.8</td>
431
- <td>669</td>
432
  </tr>
433
  <tr>
434
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
435
  <td>8.27</td>
436
  <td>13.2</td>
437
- <td>913</td>
438
  <td>13.2</td>
439
- <td>913</td>
440
  <td>99.2</td>
441
- <td>6777</td>
442
  </tr>
443
  </tbody>
444
  </table>
 
393
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w8a8</td>
394
  <td>1.80</td>
395
  <td>1.2</td>
396
+ <td>578</td>
397
  <td>4.0</td>
398
+ <td>2040</td>
399
  <td>4.6</td>
400
+ <td>2266</td>
401
  </tr>
402
  <tr>
403
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
404
  <td>2.75</td>
405
  <td>2.8</td>
406
+ <td>1364</td>
407
  <td>12.8</td>
408
+ <td>6352</td>
409
  <td>16.4</td>
410
+ <td>8148</td>
411
  </tr>
412
  <tr>
413
  <th rowspan="3" valign="top">H100x4</th>
 
424
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-FP8-Dynamic</td>
425
  <td>1.73</td>
426
  <td>1.8</td>
427
+ <td>479</td>
428
  <td>4.4</td>
429
+ <td>1203</td>
430
  <td>4.8</td>
431
+ <td>1296</td>
432
  </tr>
433
  <tr>
434
  <td>neuralmagic/Qwen2.5-VL-72B-Instruct-quantized.w4a16</td>
435
  <td>8.27</td>
436
  <td>13.2</td>
437
+ <td>3652</td>
438
  <td>13.2</td>
439
+ <td>3652</td>
440
  <td>99.2</td>
441
+ <td>27108</td>
442
  </tr>
443
  </tbody>
444
  </table>