Doesn't work in VLLM?
#1
by
OwenArli
- opened
Seems to get this error and it won't run. Is it supposed to work in vllm?
Ran with:
vllm serve justinjja/ERNIE-4.5-300B-A47B-PT-INT4-W4A16 \
--gpu-memory-utilization 0.9 --max-model-len 16384 --max-seq-len-to-capture 16384 --port 8000 \
--enable-chunked-prefill --enable-prefix-caching \
--max-num-seqs 20 -tp 8 --max-num-batched-tokens 1024 --trust-remote-code \
--served-model-name ERNIE-4.5-300B-A47B \
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] WorkerProc failed to start.
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] Traceback (most recent call last):
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] File "/home/arli/vllm/vllm/vllm/v1/executor/multiproc_executor.py", line 461, in worker_main
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] worker = WorkerProc(*args, **kwargs)
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] File "/home/arli/vllm/vllm/vllm/v1/executor/multiproc_executor.py", line 358, in __init__
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] self.worker.load_model()
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] File "/home/arli/vllm/vllm/vllm/v1/worker/gpu_worker.py", line 185, in load_model
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] self.model_runner.load_model()
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] File "/home/arli/vllm/vllm/vllm/v1/worker/gpu_model_runner.py", line 1793, in load_model(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] self.model = model_loader.load_model(
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] ^^^^^^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] File "/home/arli/vllm/vllm/vllm/model_executor/model_loader/base_loader.py", line 41, in load_model
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] self.load_weights(model, model_config)
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] File "/home/arli/vllm/vllm/vllm/model_executor/model_loader/default_loader.py", line 269, in load_weights
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] loaded_weights = model.load_weights(
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] ^^^^^^^^^^^^^^^^^^^
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] File "/home/arli/vllm/vllm/vllm/model_executor/models/ernie45_moe.py", line 578, in load_weights
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] param = params_dict[name]
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] ~~~~~~~~~~~^^^^^^
(VllmWorker rank=0 pid=13736) ERROR 07-02 21:09:39 [multiproc_executor.py:487] KeyError: 'model.layers.30.mlp.gate.weight_packed'