(APIServer pid=279037) INFO: 10.100.20.4:51028 - "GET /v1/models HTTP/1.1" 200 OK (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP2 pid=279587) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [dump_input.py:72] Dumping input data for V1 LLM engine (v0.15.0) with config: model='/data/metax-tech/Qwen3.5-397B-A17B-W8A8', speculative_config=SpeculativeConfig(method='mtp', model='/data/metax-tech/Qwen3.5-397B-A17B-W8A8', num_spec_tokens=2), tokenizer='/data/metax-tech/Qwen3.5-397B-A17B-W8A8', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=262144, download_dir=None, load_format=auto, tensor_parallel_size=8, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=True, quantization=compressed-tensors, enforce_eager=False, enable_return_routed_experts=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='qwen3', reasoning_parser_plugin='', enable_in_reasoning=False), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, kv_cache_metrics=False, kv_cache_metrics_sample=0.01, cudagraph_metrics=False, enable_layerwise_nvtx_tracing=False, enable_mfu_metrics=False, enable_mm_processor_stats=False, enable_logging_iteration_details=False), seed=0, served_model_name=Qwen3.5-W8A8, enable_prefix_caching=False, enable_chunked_prefill=True, pooler_config=None, compilation_config={'level': None, 'mode': , 'debug_dump_path': None, 'cache_dir': '', 'compile_cache_save_format': 'binary', 'backend': 'inductor', 'custom_ops': ['none'], 'splitting_ops': ['vllm::unified_attention', 'vllm::unified_attention_with_output', 'vllm::unified_mla_attention', 'vllm::unified_mla_attention_with_output', 'vllm::mamba_mixer2', 'vllm::mamba_mixer', 'vllm::short_conv', 'vllm::linear_attention', 'vllm::plamo2_mamba_mixer', 'vllm::gdn_attention_core', 'vllm::kda_attention', 'vllm::sparse_attn_indexer', 'vllm::rocm_aiter_sparse_attn_indexer', 'vllm::mx_sparse_attn_indexer'], 'compile_mm_encoder': False, 'compile_sizes': [], 'compile_ranges_split_points': [2048], 'inductor_compile_config': {'enable_auto_functionalized_v2': False}, 'inductor_passes': {}, 'cudagraph_mode': , 'cudagraph_num_of_warmups': 1, 'cudagraph_capture_sizes': [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, 416, 432, 448, 464, 480, 496, 512], 'cudagraph_copy_inputs': False, 'cudagraph_specialize_lora': True, 'use_inductor_graph_partition': False, 'pass_config': {'fuse_norm_quant': False, 'fuse_act_quant': False, 'fuse_attn_quant': False, 'eliminate_noops': True, 'enable_sp': False, 'fuse_gemm_comms': False, 'fuse_allreduce_rms': False}, 'max_cudagraph_capture_size': 512, 'dynamic_shapes_config': {'type': , 'evaluate_guards': False, 'assume_32_bit_indexing': True}, 'local_cache_dir': None}, (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [dump_input.py:79] Dumping scheduler output for model execution: SchedulerOutput(scheduled_new_reqs=[NewRequestData(req_id=chatcmpl-acfd252e27f29942-bb0bd1ec,prompt_token_ids_len=11,prefill_token_ids_len=None,mm_features=[],sampling_params=SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.95, top_k=20, min_p=0.0, seed=None, stop=[], stop_token_ids=[248044], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=5, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, structured_outputs=None, extra_args=None),block_ids=([1, 2, 3], [4, 5, 6], [7, 8, 9], [10]),num_computed_tokens=0,lora_request=None,prompt_embeds_shape=None)], scheduled_cached_reqs=CachedRequestData(req_ids=[],resumed_req_ids=set(),new_token_ids_lens=[],all_token_ids_lens={},new_block_ids=[],num_computed_tokens=[],num_output_tokens=[]), num_scheduled_tokens={chatcmpl-acfd252e27f29942-bb0bd1ec: 11}, total_num_scheduled_tokens=11, scheduled_spec_decode_tokens={}, scheduled_encoder_inputs={}, num_common_prefix_blocks=[0, 0, 0, 0], finished_req_ids=[], free_encoder_mm_hashes=[], preempted_req_ids=[], has_structured_output_requests=false, pending_structured_output_tokens=false, num_invalid_spec_tokens=null, kv_connector_metadata=null, ec_connector_metadata=null) (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [dump_input.py:81] Dumping scheduler stats: SchedulerStats(num_running_reqs=1, num_waiting_reqs=0, step_counter=0, current_wave=0, kv_cache_usage=0.019379844961240345, prefix_cache_stats=PrefixCacheStats(reset=False, requests=0, queries=0, hits=0, preempted_requests=0, preempted_queries=0, preempted_hits=0), connector_prefix_cache_stats=None, kv_cache_eviction_events=[], spec_decoding_stats=None, kv_connector_stats=None, waiting_lora_adapters={}, running_lora_adapters={}, cudagraph_stats=None, perf_stats=None) (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] EngineCore encountered a fatal error. (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] Traceback (most recent call last): (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 939, in run_engine_core (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] engine_core.run_busy_loop() (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 966, in run_busy_loop (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] self._process_engine_step() (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 999, in _process_engine_step (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] outputs, model_executed = self.step_fn() (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] ^^^^^^^^^^^^^^ (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 486, in step_with_batch_queue (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] model_output = future.result() (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] ^^^^^^^^^^^^^^^ (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 80, in result (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] return super().result() (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] ^^^^^^^^^^^^^^^^ (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/concurrent/futures/_base.py", line 449, in result (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] return self.__get_result() (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] ^^^^^^^^^^^^^^^^^^^ (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/concurrent/futures/_base.py", line 401, in __get_result (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] raise self._exception (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 84, in wait_for_response (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] response = self.aggregate(get_response()) (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] ^^^^^^^^^^^^^^ (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 357, in get_response (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] raise RuntimeError( (EngineCore_DP0 pid=279447) ERROR 04-07 16:04:24 [core.py:948] RuntimeError: Worker failed with error ''EagleProposer' object has no attribute 'positions'', please check the stack trace above for the root cause (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP2 pid=279587) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP6 pid=279591) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP2 pid=279587) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down. (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP0 pid=279585) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] AsyncLLM output_handler failed. (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] Traceback (most recent call last): (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 649, in output_handler (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] outputs = await engine_core.get_output_async() (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 894, in get_output_async (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] raise self._format_exception(outputs) from None (APIServer pid=279037) ERROR 04-07 16:04:24 [async_llm.py:693] vllm.v1.engine.exceptions.EngineDeadError: EngineCore encountered an issue. See stack trace (above) for the root cause. (Worker_TP4 pid=279589) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP7 pid=279592) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP0 pid=279585) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP7 pid=279592) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP0 pid=279585) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down. (Worker_TP3 pid=279588) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP7 pid=279592) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down. (Worker_TP5 pid=279590) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 579, in sample_tokens (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.model_runner.sample_tokens(grammar_output) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3656, in sample_tokens (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] propose_draft_token_ids(sampled_token_ids) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3627, in propose_draft_token_ids (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._draft_token_ids = self.propose_draft_token_ids( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3990, in propose_draft_token_ids (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] draft_token_ids = self.drafter.propose( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/spec_decode/eagle.py", line 406, in propose (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] positions = self.positions[:, last_token_indices] (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AttributeError: 'EagleProposer' object has no attribute 'positions' (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (APIServer pid=279037) INFO: 10.100.56.14:39786 - "POST /v1/chat/completions HTTP/1.1" 200 OK (Worker_TP1 pid=279586) INFO 04-07 16:04:24 [multiproc_executor.py:730] Parent process exited, terminating worker (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP6 pid=279591) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP6 pid=279591) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down. (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP4 pid=279589) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP5 pid=279590) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP4 pid=279589) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down. (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP3 pid=279588) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP5 pid=279590) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down. (Worker_TP3 pid=279588) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down. (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] WorkerProc hit an exception. (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] Traceback (most recent call last): (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/executor/multiproc_executor.py", line 847, in worker_busy_loop (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/worker_base.py", line 365, in execute_model (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return self.worker.execute_model(scheduler_output) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 630, in execute_model (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] output = self.model_runner.execute_model( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] return func(*args, **kwargs) (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 3333, in execute_model (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] logits_indices, spec_decode_metadata = self._prepare_inputs( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1557, in _prepare_inputs (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] self._prepare_input_ids( (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 1283, in _prepare_input_ids (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] assert prev_req_id_to_index is not None (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] AssertionError (Worker_TP1 pid=279586) ERROR 04-07 16:04:24 [multiproc_executor.py:852] (Worker_TP1 pid=279586) INFO 04-07 16:04:24 [multiproc_executor.py:774] WorkerProc shutting down.