-
Notifications
You must be signed in to change notification settings - Fork 70
BUG:Cannot copy out of meta tensor; no data! #856
Description
Describe the bug
A clear and concise description of what the bug is.
when I launch the model,there war an error:Cannot copy out of meta tensor; no data!
My Gpu Information:
To Reproduce
To help us to reproduce this bug, please provide information below:
- Your Python version
python3.10 - The version of Xorbits you use
docker image: xprobe/xinference:v1.6.0 - Versions of crucial packages, such as numpy, scipy and pandas
- Full stack of the error.
`The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 1054, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 532, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1199, in launch_builtin_model
await _launch_model()
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1134, in _launch_model
subpool_address = await _launch_one_model(
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1106, in _launch_one_model
await worker_ref.wait_for_load(_replica_model_uid)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 532, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 93, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 1178, in wait_for_load
await model_ref.wait_for_load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 532, in xoscar.core._BaseActor.on_receive
result = await result
File "/opt/inference/xinference/core/model.py", line 498, in wait_for_load
await asyncio.to_thread(self._model.wait_for_load)
File "/usr/lib/python3.10/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/opt/inference/xinference/model/llm/vllm/core.py", line 503, in wait_for_load
raise err.with_traceback(tb)
File "/opt/inference/xinference/model/llm/vllm/core.py", line 472, in _load
self._engine = XinferenceAsyncLLMEngine.from_engine_args(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 684, in from_engine_args
return async_engine_cls.from_vllm_config(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 657, in from_vllm_config
return cls(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 612, in init
self.engine = self._engine_class(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 267, in init
super().init(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 275, in init
self.model_executor = executor_class(vllm_config=vllm_config)
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 113, in init
super().init(vllm_config, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 286, in init
super().init(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 52, in init
self._init_executor()
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 189, in _init_executor
self._run_workers(
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 238, in _run_workers
return driver_worker_outputs + [output.result() for output in worker_outputs]
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 238, in
return driver_worker_outputs + [output.result() for output in worker_outputs]
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 530, in xoscar.core._BaseActor.on_receive
result = func(*args, **kwargs)
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 66, in execute_method
return getattr(self._worker, method)(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker.py", line 203, in load_model
self.model_runner.load_model()
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 1111, in load_model
self.model = get_model(vllm_config=self.vllm_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 1398, in load_model
gguf_weights_map = self._get_gguf_weights_map(model_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 1375, in _get_gguf_weights_map
dummy_model = AutoModelForCausalLM.from_config(
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 439, in from_config
model_class = _get_model_class(config, cls._model_mapping)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 388, in _get_model_class
supported_models = model_mapping[type(config)]
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 770, in getitem
return self._load_attr_from_module(model_type, model_name)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 784, in _load_attr_from_module
return getattribute_from_module(self._modules[module_name], attr)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 700, in getattribute_from_module
if hasattr(module, attr):
File "/usr/local/lib/python3.10/dist-packages/transformers/utils/import_utils.py", line 1955, in getattr
module = self._get_module(self._class_to_module[name])
File "/usr/local/lib/python3.10/dist-packages/transformers/utils/import_utils.py", line 1969, in _get_module
raise RuntimeError(
RuntimeError: [address=0.0.0.0:37045, pid=3671] Failed to import transformers.models.qwen3.modeling_qwen3 because of the following error (look up to see its traceback):
Cannot copy out of meta tensor; no data!
`
Additional context
Add any other context about the problem here.

