Update README.md
Browse files
README.md
CHANGED
|
@@ -390,6 +390,7 @@ model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
|
| 390 |
|
| 391 |
# Enable static cache and compile the forward pass
|
| 392 |
model.generation_config.cache_implementation = "static"
|
|
|
|
| 393 |
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
| 394 |
|
| 395 |
processor = AutoProcessor.from_pretrained(model_id)
|
|
@@ -409,7 +410,7 @@ sample = dataset[0]["audio"]
|
|
| 409 |
# 2 warmup steps
|
| 410 |
for _ in tqdm(range(2), desc="Warm-up step"):
|
| 411 |
with sdpa_kernel(SDPBackend.MATH):
|
| 412 |
-
result = pipe(sample.copy())
|
| 413 |
|
| 414 |
# fast run
|
| 415 |
with sdpa_kernel(SDPBackend.MATH):
|
|
|
|
| 390 |
|
| 391 |
# Enable static cache and compile the forward pass
|
| 392 |
model.generation_config.cache_implementation = "static"
|
| 393 |
+
model.generation_config.max_new_tokens = 256
|
| 394 |
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
| 395 |
|
| 396 |
processor = AutoProcessor.from_pretrained(model_id)
|
|
|
|
| 410 |
# 2 warmup steps
|
| 411 |
for _ in tqdm(range(2), desc="Warm-up step"):
|
| 412 |
with sdpa_kernel(SDPBackend.MATH):
|
| 413 |
+
result = pipe(sample.copy(), generate_kwargs={"min_new_tokens": 256, "max_new_tokens": 256})
|
| 414 |
|
| 415 |
# fast run
|
| 416 |
with sdpa_kernel(SDPBackend.MATH):
|