Update README.md
Browse files
README.md
CHANGED
@@ -22,7 +22,7 @@ Run in a Python pipeline for local inference:
|
|
22 |
```python
|
23 |
from vllm import LLM, SamplingParams
|
24 |
|
25 |
-
model = LLM("nm-testing/llama2.c-stories110M-pruned2.4", sparsity="
|
26 |
prompt = "My name is "
|
27 |
sampling_params = SamplingParams(max_tokens=100,temperature=0)
|
28 |
outputs = model.generate(prompt, sampling_params=sampling_params)
|
|
|
22 |
```python
|
23 |
from vllm import LLM, SamplingParams
|
24 |
|
25 |
+
model = LLM("nm-testing/llama2.c-stories110M-pruned2.4", sparsity="semi_structured_sparse_w16a16")
|
26 |
prompt = "My name is "
|
27 |
sampling_params = SamplingParams(max_tokens=100,temperature=0)
|
28 |
outputs = model.generate(prompt, sampling_params=sampling_params)
|