-
Notifications
You must be signed in to change notification settings - Fork 158
Description
System Info / 系統信息
不行系统问题,单张GPU可以运行
Who can help? / 谁可以帮助到您?
No response
Information / 问题信息
- The official example scripts / 官方的示例脚本
- My own modified scripts / 我自己修改的脚本和任务
Reproduction / 复现过程
MODEL_PATH = "/root/.cache/modelscope/hub/ZhipuAI/cogvlm2-llama3-chinese-chat-19B-int4"
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[
0] >= 8 else torch.float16
tokenizer = AutoTokenizer.from_pretrained(
MODEL_PATH,
trust_remote_code=True
)
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
torch_dtype=TORCH_TYPE,
trust_remote_code=True,
)
num_gpus = torch.cuda.device_count()
max_memory_per_gpu = "24GiB"
if num_gpus > 2:
max_memory_per_gpu = f"{round(42 / num_gpus)}GiB"
device_map = infer_auto_device_map(
model=model,
max_memory={i: max_memory_per_gpu for i in range(num_gpus)},
no_split_module_classes=["CogVLMDecoderLayer"]
)
model = load_checkpoint_and_dispatch(model, MODEL_PATH, device_map=device_map, dtype=TORCH_TYPE)
model = model.eval()
运行报
AttributeError Traceback (most recent call last)
Cell In[2], line 27
20 max_memory_per_gpu = f"{round(42 / num_gpus)}GiB"
22 device_map = infer_auto_device_map(
23 model=model,
24 max_memory={i: max_memory_per_gpu for i in range(num_gpus)},
25 no_split_module_classes=["CogVLMDecoderLayer"]
26 )
---> 27 model = load_checkpoint_and_dispatch(model, MODEL_PATH, device_map=device_map, dtype=TORCH_TYPE)
28 model = model.eval()
File ~/anaconda3/envs/cogvlm2/lib/python3.10/site-packages/accelerate/big_modeling.py:613, in load_checkpoint_and_dispatch(model, checkpoint, device_map, max_memory, no_split_module_classes, offload_folder, offload_buffers, dtype, offload_state_dict, skip_keys, preload_module_classes, force_hooks, strict)
611 if offload_state_dict is None and device_map is not None and "disk" in device_map.values():
612 offload_state_dict = True
--> 613 load_checkpoint_in_model(
614 model,
615 checkpoint,
616 device_map=device_map,
617 offload_folder=offload_folder,
618 dtype=dtype,
619 offload_state_dict=offload_state_dict,
620 offload_buffers=offload_buffers,
621 strict=strict,
622 )
623 if device_map is None:
624 return model
File ~/anaconda3/envs/cogvlm2/lib/python3.10/site-packages/accelerate/utils/modeling.py:1780, in load_checkpoint_in_model(model, checkpoint, device_map, offload_folder, dtype, offload_state_dict, offload_buffers, keep_in_fp32_modules, offload_8bit_bnb, strict)
1778 offload_weight(param, param_name, state_dict_folder, index=state_dict_index)
1779 else:
-> 1780 set_module_tensor_to_device(
1781 model,
1782 param_name,
1783 param_device,
1784 value=param,
1785 dtype=new_dtype,
1786 fp16_statistics=fp16_statistics,
1787 )
1789 # Force Python to clean up.
1790 del loaded_checkpoint
File ~/anaconda3/envs/cogvlm2/lib/python3.10/site-packages/accelerate/utils/modeling.py:253, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)
250 module = new_module
251 tensor_name = splits[-1]
--> 253 if tensor_name not in module._parameters and tensor_name not in module._buffers:
254 raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.")
255 is_buffer = tensor_name in module._buffers
AttributeError: 'Params4bit' object has no attribute '_parameters'
Expected behavior / 期待表现
期望正常运行