Skip to content

Commit fdb17a5

Browse files
lianqing11Tai-Wanglianqing01ZCMax
authored
[Fix] Fix some loading bugs and support fov_image_based mode in Waymo dataset. (#1942)
* modify sample_id to sample_id and support fov_image_based on waymo dataset * Update waymo_metric.py * Minor fix * Minor fix * Minor fix * Minor fix * Minor fix * Minor fix * Minor fixes * Minor fixes * Remove optional * fix dataset instances converting bugs * Add a blank line to fix the doc compilation format * Fix the bin file name in waymo_fov config * Resolve conflicts * fix ci and other things Co-authored-by: Tai-Wang <tab_wang@outlook.com> Co-authored-by: lianqing11 <lianqing11@foxmail.com> Co-authored-by: ChaimZhu <zhuchenming@pjlab.org.cn>
1 parent 5982f9e commit fdb17a5

11 files changed

+463
-45
lines changed

configs/_base_/datasets/kitti-mono3d.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
data_prefix=dict(img='training/image_2'),
5353
pipeline=train_pipeline,
5454
modality=input_modality,
55+
load_type='fov_image_based',
5556
test_mode=False,
5657
metainfo=metainfo,
5758
# we use box_type_3d='Camera' in monocular 3d
@@ -70,6 +71,7 @@
7071
ann_file='kitti_infos_val.pkl',
7172
pipeline=test_pipeline,
7273
modality=input_modality,
74+
load_type='fov_image_based',
7375
metainfo=metainfo,
7476
test_mode=True,
7577
box_type_3d='Camera'))

configs/_base_/datasets/nus-mono3d.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
6666
CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
6767
ann_file='nuscenes_infos_train.pkl',
68-
task='mono_det',
68+
load_type='mv_image_based',
6969
pipeline=train_pipeline,
7070
metainfo=metainfo,
7171
modality=input_modality,
@@ -92,7 +92,7 @@
9292
CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
9393
CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
9494
ann_file='nuscenes_infos_val.pkl',
95-
task='mono_det',
95+
load_type='mv_image_based',
9696
pipeline=test_pipeline,
9797
modality=input_modality,
9898
metainfo=metainfo,
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# dataset settings
2+
# D3 in the config name means the whole dataset is divided into 3 folds
3+
# We only use one fold for efficient experiments
4+
dataset_type = 'WaymoDataset'
5+
data_root = 'data/waymo/kitti_format/'
6+
class_names = ['Car', 'Pedestrian', 'Cyclist']
7+
input_modality = dict(use_lidar=False, use_camera=True)
8+
file_client_args = dict(backend='disk')
9+
# Uncomment the following if use ceph or other file clients.
10+
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
11+
# for more details.
12+
train_pipeline = [
13+
dict(type='LoadImageFromFileMono3D'),
14+
dict(
15+
type='LoadAnnotations3D',
16+
with_bbox=True,
17+
with_label=True,
18+
with_attr_label=False,
19+
with_bbox_3d=True,
20+
with_label_3d=True,
21+
with_bbox_depth=True),
22+
# base shape (1248, 832), scale (0.95, 1.05)
23+
dict(
24+
type='RandomResize3D',
25+
scale=(1284, 832),
26+
ratio_range=(0.95, 1.05),
27+
keep_ratio=True,
28+
),
29+
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
30+
dict(
31+
type='Pack3DDetInputs',
32+
keys=[
33+
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
34+
'gt_labels_3d', 'centers_2d', 'depths'
35+
]),
36+
]
37+
38+
test_pipeline = [
39+
dict(type='LoadImageFromFileMono3D'),
40+
dict(
41+
type='RandomResize3D',
42+
scale=(1248, 832),
43+
ratio_range=(1., 1.),
44+
keep_ratio=True),
45+
dict(type='Pack3DDetInputs', keys=['img']),
46+
]
47+
# construct a pipeline for data and gt loading in show function
48+
# please keep its loading function consistent with test_pipeline (e.g. client)
49+
eval_pipeline = [
50+
dict(type='LoadImageFromFileMono3D'),
51+
dict(
52+
type='RandomResize3D',
53+
scale=(1248, 832),
54+
ratio_range=(1., 1.),
55+
keep_ratio=True),
56+
dict(type='Pack3DDetInputs', keys=['img']),
57+
]
58+
59+
metainfo = dict(CLASSES=class_names)
60+
61+
train_dataloader = dict(
62+
batch_size=3,
63+
num_workers=3,
64+
persistent_workers=True,
65+
sampler=dict(type='DefaultSampler', shuffle=True),
66+
dataset=dict(
67+
type=dataset_type,
68+
data_root=data_root,
69+
ann_file='waymo_infos_train.pkl',
70+
data_prefix=dict(
71+
pts='training/velodyne',
72+
CAM_FRONT='training/image_0',
73+
CAM_FRONT_RIGHT='training/image_1',
74+
CAM_FRONT_LEFT='training/image_2',
75+
CAM_SIDE_RIGHT='training/image_3',
76+
CAM_SIDE_LEFT='training/image_4'),
77+
pipeline=train_pipeline,
78+
modality=input_modality,
79+
test_mode=False,
80+
metainfo=metainfo,
81+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
82+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
83+
box_type_3d='Camera',
84+
load_type='fov_image_based',
85+
# load one frame every three frames
86+
load_interval=5))
87+
88+
val_dataloader = dict(
89+
batch_size=1,
90+
num_workers=1,
91+
persistent_workers=True,
92+
drop_last=False,
93+
sampler=dict(type='DefaultSampler', shuffle=False),
94+
dataset=dict(
95+
type=dataset_type,
96+
data_root=data_root,
97+
data_prefix=dict(
98+
pts='training/velodyne',
99+
CAM_FRONT='training/image_0',
100+
CAM_FRONT_RIGHT='training/image_1',
101+
CAM_FRONT_LEFT='training/image_2',
102+
CAM_SIDE_RIGHT='training/image_3',
103+
CAM_SIDE_LEFT='training/image_4'),
104+
ann_file='waymo_infos_val.pkl',
105+
pipeline=eval_pipeline,
106+
modality=input_modality,
107+
test_mode=True,
108+
metainfo=metainfo,
109+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
110+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
111+
box_type_3d='Camera',
112+
load_type='fov_image_based',
113+
))
114+
115+
test_dataloader = dict(
116+
batch_size=1,
117+
num_workers=1,
118+
persistent_workers=True,
119+
drop_last=False,
120+
sampler=dict(type='DefaultSampler', shuffle=False),
121+
dataset=dict(
122+
type=dataset_type,
123+
data_root=data_root,
124+
data_prefix=dict(
125+
pts='training/velodyne',
126+
CAM_FRONT='training/image_0',
127+
CAM_FRONT_RIGHT='training/image_1',
128+
CAM_FRONT_LEFT='training/image_2',
129+
CAM_SIDE_RIGHT='training/image_3',
130+
CAM_SIDE_LEFT='training/image_4'),
131+
ann_file='waymo_infos_val.pkl',
132+
pipeline=eval_pipeline,
133+
modality=input_modality,
134+
test_mode=True,
135+
metainfo=metainfo,
136+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
137+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
138+
box_type_3d='Camera',
139+
load_type='fov_image_based',
140+
))
141+
142+
val_evaluator = dict(
143+
type='WaymoMetric',
144+
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
145+
waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
146+
data_root='./data/waymo/waymo_format',
147+
metric='LET_mAP',
148+
load_type='fov_image_based',
149+
)
150+
test_evaluator = val_evaluator

configs/_base_/datasets/waymoD5-mono3d-3class.py renamed to configs/_base_/datasets/waymoD5-mv-mono3d-3class.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
8282
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
8383
box_type_3d='Camera',
84-
task='mono_det',
84+
load_type='mv_image_based',
8585
# load one frame every three frames
8686
load_interval=5))
8787

@@ -109,7 +109,7 @@
109109
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
110110
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
111111
box_type_3d='Camera',
112-
task='mono_det',
112+
load_type='mv_image_based',
113113
))
114114

115115
test_dataloader = dict(
@@ -136,7 +136,7 @@
136136
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
137137
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
138138
box_type_3d='Camera',
139-
task='mono_det',
139+
load_type='mv_image_based',
140140
))
141141

142142
val_evaluator = dict(
@@ -145,5 +145,6 @@
145145
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
146146
data_root='./data/waymo/waymo_format',
147147
metric='LET_mAP',
148-
task='mono_det')
148+
load_type='mv_image_based',
149+
)
149150
test_evaluator = val_evaluator
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
_base_ = [
2+
'../_base_/datasets/waymoD5-fov-mono3d-3class.py',
3+
'../_base_/models/pgd.py', '../_base_/schedules/mmdet-schedule-1x.py',
4+
'../_base_/default_runtime.py'
5+
]
6+
# model settings
7+
model = dict(
8+
backbone=dict(
9+
type='mmdet.ResNet',
10+
depth=101,
11+
num_stages=4,
12+
out_indices=(0, 1, 2, 3),
13+
frozen_stages=1,
14+
norm_cfg=dict(type='BN', requires_grad=True),
15+
norm_eval=True,
16+
style='pytorch',
17+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
18+
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
19+
stage_with_dcn=(False, False, True, True)),
20+
neck=dict(num_outs=3),
21+
bbox_head=dict(
22+
num_classes=3,
23+
bbox_code_size=7,
24+
pred_attrs=False,
25+
pred_velo=False,
26+
pred_bbox2d=True,
27+
use_onlyreg_proj=True,
28+
strides=(8, 16, 32),
29+
regress_ranges=((-1, 128), (128, 256), (256, 1e8)),
30+
group_reg_dims=(2, 1, 3, 1, 16,
31+
4), # offset, depth, size, rot, kpts, bbox2d
32+
reg_branch=(
33+
(256, ), # offset
34+
(256, ), # depth
35+
(256, ), # size
36+
(256, ), # rot
37+
(256, ), # kpts
38+
(256, ) # bbox2d
39+
),
40+
centerness_branch=(256, ),
41+
loss_cls=dict(
42+
type='mmdet.FocalLoss',
43+
use_sigmoid=True,
44+
gamma=2.0,
45+
alpha=0.25,
46+
loss_weight=1.0),
47+
loss_bbox=dict(
48+
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
49+
loss_dir=dict(
50+
type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
51+
loss_centerness=dict(
52+
type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
53+
use_depth_classifier=True,
54+
depth_branch=(256, ),
55+
depth_range=(0, 50),
56+
depth_unit=10,
57+
division='uniform',
58+
depth_bins=6,
59+
pred_keypoints=True,
60+
weight_dim=1,
61+
loss_depth=dict(
62+
type='UncertainSmoothL1Loss', alpha=1.0, beta=3.0,
63+
loss_weight=1.0),
64+
loss_bbox2d=dict(
65+
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.0),
66+
loss_consistency=dict(type='mmdet.GIoULoss', loss_weight=0.0),
67+
bbox_coder=dict(
68+
type='PGDBBoxCoder',
69+
base_depths=((41.01, 18.44), ),
70+
base_dims=(
71+
(4.73, 1.77, 2.08),
72+
(0.91, 1.74, 0.84),
73+
(1.81, 1.77, 0.84),
74+
),
75+
code_size=7)),
76+
# set weight 1.0 for base 7 dims (offset, depth, size, rot)
77+
# 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
78+
train_cfg=dict(code_weight=[
79+
1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
80+
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0
81+
]),
82+
test_cfg=dict(nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20))
83+
84+
# optimizer
85+
optim_wrapper = dict(
86+
optimizer=dict(
87+
type='SGD',
88+
lr=0.008,
89+
),
90+
paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
91+
clip_grad=dict(max_norm=35, norm_type=2))
92+
93+
param_scheduler = [
94+
dict(
95+
type='LinearLR',
96+
start_factor=1.0 / 3,
97+
by_epoch=False,
98+
begin=0,
99+
end=500),
100+
dict(
101+
type='MultiStepLR',
102+
begin=0,
103+
end=24,
104+
by_epoch=True,
105+
milestones=[16, 22],
106+
gamma=0.1)
107+
]
108+
total_epochs = 24
109+
runner = dict(max_epochs=total_epochs)
110+
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=24)
111+
val_cfg = dict(type='ValLoop')
112+
test_cfg = dict(type='TestLoop')

0 commit comments

Comments
 (0)