(EngineCore_DP0 pid=286) DEBUG 02-24 11:46:26 [v1/engine/core.py:737] EngineCore waiting for work. (APIServer pid=1) DEBUG 02-24 11:46:30 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:49098 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:30 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:49112 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:35 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:59046 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:40 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:59054 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:40 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:59062 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:45 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:58468 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:50 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:58476 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:50 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:58486 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:46:55 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:36916 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:47:00 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:36922 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) DEBUG 02-24 11:47:00 [v1/engine/async_llm.py:609] Called check_health. (APIServer pid=1) INFO: 192.168.2.3:36936 - "GET /health HTTP/1.1" 200 OK (APIServer pid=1) INFO: 10.20.34.32:54048 - "POST /v1/chat/completions HTTP/1.1" 200 OK (EngineCore_DP0 pid=286) DEBUG 02-24 11:47:02 [v1/engine/core.py:743] EngineCore loop active. (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/core_client.py:564] Engine core proc EngineCore_DP0 died unexpectedly, shutting down client. (Worker_TP0_EP0 pid=424) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:558] Parent process exited, terminating worker (Worker_TP1_EP1 pid=425) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:558] Parent process exited, terminating worker (Worker_TP3_EP3 pid=427) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:558] Parent process exited, terminating worker (Worker_TP1_EP1 pid=425) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:599] WorkerProc shutting down. (Worker_TP0_EP0 pid=424) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:599] WorkerProc shutting down. (Worker_TP2_EP2 pid=426) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:558] Parent process exited, terminating worker (Worker_TP3_EP3 pid=427) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:599] WorkerProc shutting down. (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] AsyncLLM output_handler failed. (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] Traceback (most recent call last): (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 439, in output_handler (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] outputs = await engine_core.get_output_async() (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 846, in get_output_async (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] raise self._format_exception(outputs) from None (APIServer pid=1) ERROR 02-24 11:47:03 [v1/engine/async_llm.py:480] vllm.v1.engine.exceptions.EngineDeadError: EngineCore encountered an issue. See stack trace (above) for the root cause. (Worker_TP2_EP2 pid=426) INFO 02-24 11:47:03 [v1/executor/multiproc_executor.py:599] WorkerProc shutting down. (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] Error in chat completion stream generator. (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] Traceback (most recent call last): (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] File "/opt/conda/lib/python3.12/site-packages/vllm/entrypoints/openai/serving_chat.py", line 574, in chat_completion_stream_generator (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] async for res in result_generator: (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 387, in generate (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] out = q.get_nowait() or await q.get() (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] ^^^^^^^^^^^^^ (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/output_processor.py", line 59, in get (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] raise output (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 439, in output_handler (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] outputs = await engine_core.get_output_async() (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] File "/opt/conda/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 846, in get_output_async (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] raise self._format_exception(outputs) from None (APIServer pid=1) ERROR 02-24 11:47:03 [entrypoints/openai/serving_chat.py:1145] vllm.v1.engine.exceptions.EngineDeadError: EngineCore encountered an issue. See stack trace (above) for the root cause. (APIServer pid=1) INFO: Shutting down (APIServer pid=1) INFO: Waiting for application shutdown. (APIServer pid=1) INFO: Application shutdown complete. (APIServer pid=1) INFO: Finished server process [1] /opt/conda/lib/python3.12/multiprocessing/resource_tracker.py:279: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown warnings.warn('resource_tracker: There appear to be %d '