-
Notifications
You must be signed in to change notification settings - Fork 960
Description
D:\software\Anaconda3\envs\bg_matting\python.exe "D:\software\JetBrains\PyCharm 2022.1.3\plugins\python\helpers\pydev\pydevd.py" --multiprocess --qt-support=auto --client 127.0.0.1 --port 12569 --file D:/_zzw_work/gouged/py_project/BackgroundMattingV2/train_refine_ori.py --dataset-name videomatte240k --model-backbone resnet101 --model-name refine_resnet101 --model-last-checkpoint D:_zzw_work\gouged\py_project\BackgroundMattingV2\weights\torchscript_resnet101_fp32.pth --epoch-end 2000
已连接到 pydev 调试器(内部版本号 221.5921.27)Traceback (most recent call last):
File "D:\software\JetBrains\PyCharm 2022.1.3\plugins\python\helpers\pydev\pydevd.py", line 1491, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "D:\software\JetBrains\PyCharm 2022.1.3\plugins\python\helpers\pydev_pydev_imps_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "D:/_zzw_work/gouged/py_project/BackgroundMattingV2/train_refine_ori.py", line 312, in
mp.spawn(train_worker,
File "D:\software\Anaconda3\envs\bg_matting\lib\site-packages\torch\multiprocessing\spawn.py", line 246, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method="spawn")
File "D:\software\Anaconda3\envs\bg_matting\lib\site-packages\torch\multiprocessing\spawn.py", line 202, in start_processes
while not context.join():
File "D:\software\Anaconda3\envs\bg_matting\lib\site-packages\torch\multiprocessing\spawn.py", line 163, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "D:\software\Anaconda3\envs\bg_matting\lib\site-packages\torch\multiprocessing\spawn.py", line 74, in _wrap
fn(i, *args)
File "D:_zzw_work\gouged\py_project\BackgroundMattingV2\train_refine_ori.py", line 82, in train_worker
dist.init_process_group("nccl", rank=rank, world_size=distributed_num_gpus)
File "D:\software\Anaconda3\envs\bg_matting\lib\site-packages\torch\distributed\c10d_logger.py", line 74, in wrapper
func_return = func(*args, **kwargs)
File "D:\software\Anaconda3\envs\bg_matting\lib\site-packages\torch\distributed\distributed_c10d.py", line 1148, in init_process_group
default_pg, _ = _new_process_group_helper(
File "D:\software\Anaconda3\envs\bg_matting\lib\site-packages\torch\distributed\distributed_c10d.py", line 1268, in _new_process_group_helper
raise RuntimeError("Distributed package doesn't have NCCL built in")
RuntimeError: Distributed package doesn't have NCCL built in