Skip to content

BUG:Cannot copy out of meta tensor; no data! #856

@jacksunsl

Description

@jacksunsl

Describe the bug

A clear and concise description of what the bug is.

when I launch the model,there war an error:Cannot copy out of meta tensor; no data!

Image

My Gpu Information:

Image

To Reproduce

To help us to reproduce this bug, please provide information below:

  1. Your Python version
    python3.10
  2. The version of Xorbits you use
    docker image: xprobe/xinference:v1.6.0
  3. Versions of crucial packages, such as numpy, scipy and pandas
  4. Full stack of the error.
    `The above exception was the direct cause of the following exception:

Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/xinference/api/restful_api.py", line 1054, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 532, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1199, in launch_builtin_model
await _launch_model()
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1134, in _launch_model
subpool_address = await _launch_one_model(
File "/usr/local/lib/python3.10/dist-packages/xinference/core/supervisor.py", line 1106, in _launch_one_model
await worker_ref.wait_for_load(_replica_model_uid)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 532, in xoscar.core._BaseActor.on_receive
result = await result
File "/usr/local/lib/python3.10/dist-packages/xinference/core/utils.py", line 93, in wrapped
ret = await func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/xinference/core/worker.py", line 1178, in wait_for_load
await model_ref.wait_for_load()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 532, in xoscar.core._BaseActor.on_receive
result = await result
File "/opt/inference/xinference/core/model.py", line 498, in wait_for_load
await asyncio.to_thread(self._model.wait_for_load)
File "/usr/lib/python3.10/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/opt/inference/xinference/model/llm/vllm/core.py", line 503, in wait_for_load
raise err.with_traceback(tb)
File "/opt/inference/xinference/model/llm/vllm/core.py", line 472, in _load
self._engine = XinferenceAsyncLLMEngine.from_engine_args(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 684, in from_engine_args
return async_engine_cls.from_vllm_config(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 657, in from_vllm_config
return cls(
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 612, in init
self.engine = self._engine_class(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/async_llm_engine.py", line 267, in init
super().init(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py", line 275, in init
self.model_executor = executor_class(vllm_config=vllm_config)
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 113, in init
super().init(vllm_config, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 286, in init
super().init(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py", line 52, in init
self._init_executor()
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 189, in _init_executor
self._run_workers(
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 238, in _run_workers
return driver_worker_outputs + [output.result() for output in worker_outputs]
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 238, in
return driver_worker_outputs + [output.result() for output in worker_outputs]
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in result
return self.__get_result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result
raise self._exception
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 262, in send
return self._process_result_message(result)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/context.py", line 111, in _process_result_message
raise message.as_instanceof_cause()
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 689, in send
result = await self._run_coro(message.message_id, coro)
File "/usr/local/lib/python3.10/dist-packages/xoscar/backends/pool.py", line 389, in _run_coro
return await coro
File "/usr/local/lib/python3.10/dist-packages/xoscar/api.py", line 418, in on_receive
return await super().on_receive(message) # type: ignore
File "xoscar/core.pyx", line 564, in on_receive
raise ex
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
async with self._lock:
File "xoscar/core.pyx", line 527, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
File "xoscar/core.pyx", line 530, in xoscar.core._BaseActor.on_receive
result = func(*args, **kwargs)
File "/opt/inference/xinference/model/llm/vllm/distributed_executor.py", line 66, in execute_method
return getattr(self._worker, method)(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/worker.py", line 203, in load_model
self.model_runner.load_model()
File "/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py", line 1111, in load_model
self.model = get_model(vllm_config=self.vllm_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 1398, in load_model
gguf_weights_map = self._get_gguf_weights_map(model_config)
File "/usr/local/lib/python3.10/dist-packages/vllm/model_executor/model_loader/loader.py", line 1375, in _get_gguf_weights_map
dummy_model = AutoModelForCausalLM.from_config(
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 439, in from_config
model_class = _get_model_class(config, cls._model_mapping)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 388, in _get_model_class
supported_models = model_mapping[type(config)]
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 770, in getitem
return self._load_attr_from_module(model_type, model_name)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 784, in _load_attr_from_module
return getattribute_from_module(self._modules[module_name], attr)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 700, in getattribute_from_module
if hasattr(module, attr):
File "/usr/local/lib/python3.10/dist-packages/transformers/utils/import_utils.py", line 1955, in getattr
module = self._get_module(self._class_to_module[name])
File "/usr/local/lib/python3.10/dist-packages/transformers/utils/import_utils.py", line 1969, in _get_module
raise RuntimeError(
RuntimeError: [address=0.0.0.0:37045, pid=3671] Failed to import transformers.models.qwen3.modeling_qwen3 because of the following error (look up to see its traceback):
Cannot copy out of meta tensor; no data!
`

Additional context

Add any other context about the problem here.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workinggpu

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions