run
#2
by
rakmik
- opened
how run it in wsl win 10
pip install git+https://github.com/snowflake-labs/vllm.git@swiftkv
from vllm import LLM
llm = LLM(model="Snowflake/Llama-3.1-SwiftKV-8B-Instruct-FP8")
from vllm import SamplingParams
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
prompts = ["Your prompt here"]
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
print(output.outputs[0].text)
not run