Skip to content

Commit 6d3518d

Browse files
lianqing01lianqing11Tai-Wang
authored
[Features] Support waymo challenge solution (#1716)
* update evaluation metric to support waymo cam only evaluation * add transformation for bev detection * add multiview dfm * support multiview detection in datasets with transformation, dfm model and metric * remove deprecated config and update doc string * remove file_client_args=file_client_args and update docstr * add doc string and remove pdb * fix the doc string of voxel fusion * add doc string * remove lidar2img * add doc string * update doc string * support waymo dataset for replace_ceph and modify path of pkl in config * update evaluation metrics; and the config for waymo solution * fix the index error in waymo metric and add lidar2img utils function * replace __call_ to transform * fix doc string * rename configs * update the config name * update the lidar2cam calib in waymo data creater Co-authored-by: lianqing <lianqing1997@gmail.com> Co-authored-by: Tai-Wang <tab_wang@outlook.com>
1 parent 25e3801 commit 6d3518d

22 files changed

+2372
-145
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,3 +133,4 @@ data/sunrgbd/OFFICIAL_SUNRGBD/
133133

134134
# Waymo evaluation
135135
mmdet3d/core/evaluation/waymo_utils/compute_detection_metrics_main
136+
mmdet3d/core/evaluation/waymo_utils/compute_detection_let_metrics_main
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# dataset settings
2+
# D3 in the config name means the whole dataset is divided into 3 folds
3+
# We only use one fold for efficient experiments
4+
dataset_type = 'WaymoDataset'
5+
data_root = 'data/waymo/kitti_format/'
6+
class_names = ['Car', 'Pedestrian', 'Cyclist']
7+
input_modality = dict(use_lidar=False, use_camera=True)
8+
file_client_args = dict(backend='disk')
9+
# Uncomment the following if use ceph or other file clients.
10+
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
11+
# for more details.
12+
train_pipeline = [
13+
dict(type='LoadImageFromFileMono3D'),
14+
dict(
15+
type='LoadAnnotations3D',
16+
with_bbox=True,
17+
with_label=True,
18+
with_attr_label=False,
19+
with_bbox_3d=True,
20+
with_label_3d=True,
21+
with_bbox_depth=True),
22+
# base shape (1248, 832), scale (0.95, 1.05)
23+
dict(
24+
type='RandomResize3D',
25+
scale=(1284, 832),
26+
ratio_range=(0.95, 1.05),
27+
keep_ratio=True,
28+
),
29+
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
30+
dict(
31+
type='Pack3DDetInputs',
32+
keys=[
33+
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
34+
'gt_labels_3d', 'centers_2d', 'depths'
35+
]),
36+
]
37+
38+
test_pipeline = [
39+
dict(type='LoadImageFromFileMono3D'),
40+
dict(
41+
type='RandomResize3D',
42+
scale=(1248, 832),
43+
ratio_range=(1., 1.),
44+
keep_ratio=True),
45+
dict(type='Pack3DDetInputs', keys=['img']),
46+
]
47+
# construct a pipeline for data and gt loading in show function
48+
# please keep its loading function consistent with test_pipeline (e.g. client)
49+
eval_pipeline = [
50+
dict(type='LoadImageFromFileMono3D'),
51+
dict(
52+
type='RandomResize3D',
53+
scale=(1248, 832),
54+
ratio_range=(1., 1.),
55+
keep_ratio=True),
56+
dict(type='Pack3DDetInputs', keys=['img']),
57+
]
58+
59+
metainfo = dict(CLASSES=class_names)
60+
61+
train_dataloader = dict(
62+
batch_size=3,
63+
num_workers=3,
64+
persistent_workers=True,
65+
sampler=dict(type='DefaultSampler', shuffle=True),
66+
dataset=dict(
67+
type=dataset_type,
68+
data_root=data_root,
69+
ann_file='waymo_infos_train.pkl',
70+
data_prefix=dict(
71+
pts='training/velodyne',
72+
CAM_FRONT='training/image_0',
73+
CAM_FRONT_RIGHT='training/image_1',
74+
CAM_FRONT_LEFT='training/image_2',
75+
CAM_SIDE_RIGHT='training/image_3',
76+
CAM_SIDE_LEFT='training/image_4'),
77+
pipeline=train_pipeline,
78+
modality=input_modality,
79+
test_mode=False,
80+
metainfo=metainfo,
81+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
82+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
83+
box_type_3d='Camera',
84+
task='mono3d',
85+
# load one frame every three frames
86+
load_interval=5))
87+
88+
val_dataloader = dict(
89+
batch_size=1,
90+
num_workers=1,
91+
persistent_workers=True,
92+
drop_last=False,
93+
sampler=dict(type='DefaultSampler', shuffle=False),
94+
dataset=dict(
95+
type=dataset_type,
96+
data_root=data_root,
97+
data_prefix=dict(
98+
pts='training/velodyne',
99+
CAM_FRONT='training/image_0',
100+
CAM_FRONT_RIGHT='training/image_1',
101+
CAM_FRONT_LEFT='training/image_2',
102+
CAM_SIDE_RIGHT='training/image_3',
103+
CAM_SIDE_LEFT='training/image_4'),
104+
ann_file='waymo_infos_val.pkl',
105+
pipeline=eval_pipeline,
106+
modality=input_modality,
107+
test_mode=True,
108+
metainfo=metainfo,
109+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
110+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
111+
box_type_3d='Camera',
112+
task='mono3d',
113+
))
114+
115+
test_dataloader = dict(
116+
batch_size=1,
117+
num_workers=1,
118+
persistent_workers=True,
119+
drop_last=False,
120+
sampler=dict(type='DefaultSampler', shuffle=False),
121+
dataset=dict(
122+
type=dataset_type,
123+
data_root=data_root,
124+
data_prefix=dict(
125+
pts='training/velodyne',
126+
CAM_FRONT='training/image_0',
127+
CAM_FRONT_RIGHT='training/image_1',
128+
CAM_FRONT_LEFT='training/image_2',
129+
CAM_SIDE_RIGHT='training/image_3',
130+
CAM_SIDE_LEFT='training/image_4'),
131+
ann_file='waymo_infos_val.pkl',
132+
pipeline=eval_pipeline,
133+
modality=input_modality,
134+
test_mode=True,
135+
metainfo=metainfo,
136+
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
137+
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
138+
box_type_3d='Camera',
139+
task='mono3d',
140+
))
141+
142+
val_evaluator = dict(
143+
type='WaymoMetric',
144+
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
145+
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
146+
data_root='./data/waymo/waymo_format',
147+
metric='LET_mAP',
148+
task='mono3d')
149+
test_evaluator = val_evaluator
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# dataset settings
2+
# D3 in the config name means the whole dataset is divided into 3 folds
3+
# We only use one fold for efficient experiments
4+
dataset_type = 'WaymoDataset'
5+
data_root = 'data/waymo/kitti_format/'
6+
file_client_args = dict(backend='disk')
7+
# Uncomment the following if use ceph or other file clients.
8+
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
9+
# for more details.
10+
11+
class_names = ['Car', 'Pedestrian', 'Cyclist']
12+
input_modality = dict(use_lidar=False, use_camera=True)
13+
point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]
14+
15+
train_transforms = [
16+
dict(type='PhotoMetricDistortion3D'),
17+
dict(
18+
type='RandomResize3D',
19+
scale=(1248, 832),
20+
ratio_range=(0.95, 1.05),
21+
keep_ratio=True),
22+
dict(type='RandomCrop3D', crop_size=(720, 1080)),
23+
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False),
24+
]
25+
26+
train_pipeline = [
27+
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
28+
dict(
29+
type='LoadAnnotations3D',
30+
with_bbox=True,
31+
with_label=True,
32+
with_attr_label=False,
33+
with_bbox_3d=True,
34+
with_label_3d=True,
35+
with_bbox_depth=True),
36+
dict(type='MultiViewWrapper', transforms=train_transforms),
37+
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
38+
dict(type='ObjectNameFilter', classes=class_names),
39+
dict(
40+
type='Pack3DDetInputs', keys=[
41+
'img',
42+
'gt_bboxes_3d',
43+
'gt_labels_3d',
44+
]),
45+
]
46+
test_transforms = [
47+
dict(
48+
type='RandomResize3D',
49+
scale=(1248, 832),
50+
ratio_range=(1., 1.),
51+
keep_ratio=True)
52+
]
53+
test_pipeline = [
54+
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
55+
dict(type='MultiViewWrapper', transforms=test_transforms),
56+
dict(type='Pack3DDetInputs', keys=['img'])
57+
]
58+
# construct a pipeline for data and gt loading in show function
59+
# please keep its loading function consistent with test_pipeline (e.g. client)
60+
eval_pipeline = [
61+
dict(type='LoadMultiViewImageFromFiles', to_float32=True),
62+
dict(type='MultiViewWrapper', transforms=test_transforms),
63+
dict(type='Pack3DDetInputs', keys=['img'])
64+
]
65+
metainfo = dict(CLASSES=class_names)
66+
67+
train_dataloader = dict(
68+
batch_size=2,
69+
num_workers=2,
70+
persistent_workers=True,
71+
sampler=dict(type='DefaultSampler', shuffle=True),
72+
dataset=dict(
73+
type=dataset_type,
74+
data_root=data_root,
75+
ann_file='waymo_infos_train.pkl',
76+
data_prefix=dict(
77+
pts='training/velodyne',
78+
CAM_FRONT='training/image_0',
79+
CAM_FRONT_RIGHT='training/image_1',
80+
CAM_FRONT_LEFT='training/image_2',
81+
CAM_SIDE_RIGHT='training/image_3',
82+
CAM_SIDE_LEFT='training/image_4',
83+
),
84+
pipeline=train_pipeline,
85+
modality=input_modality,
86+
test_mode=False,
87+
metainfo=metainfo,
88+
box_type_3d='Lidar',
89+
load_interval=5,
90+
))
91+
92+
val_dataloader = dict(
93+
batch_size=1,
94+
num_workers=1,
95+
persistent_workers=True,
96+
drop_last=False,
97+
sampler=dict(type='DefaultSampler', shuffle=False),
98+
dataset=dict(
99+
type=dataset_type,
100+
data_root=data_root,
101+
ann_file='waymo_infos_val.pkl',
102+
data_prefix=dict(
103+
pts='training/velodyne',
104+
CAM_FRONT='training/image_0',
105+
CAM_FRONT_RIGHT='training/image_1',
106+
CAM_FRONT_LEFT='training/image_2',
107+
CAM_SIDE_RIGHT='training/image_3',
108+
CAM_SIDE_LEFT='training/image_4',
109+
),
110+
pipeline=eval_pipeline,
111+
modality=input_modality,
112+
test_mode=True,
113+
metainfo=metainfo,
114+
box_type_3d='Lidar',
115+
))
116+
117+
test_dataloader = dict(
118+
batch_size=1,
119+
num_workers=1,
120+
persistent_workers=True,
121+
drop_last=False,
122+
sampler=dict(type='DefaultSampler', shuffle=False),
123+
dataset=dict(
124+
type=dataset_type,
125+
data_root=data_root,
126+
ann_file='waymo_infos_val.pkl',
127+
data_prefix=dict(
128+
pts='training/velodyne',
129+
CAM_FRONT='training/image_0',
130+
CAM_FRONT_RIGHT='training/image_1',
131+
CAM_FRONT_LEFT='training/image_2',
132+
CAM_SIDE_RIGHT='training/image_3',
133+
CAM_SIDE_LEFT='training/image_4',
134+
),
135+
pipeline=eval_pipeline,
136+
modality=input_modality,
137+
test_mode=True,
138+
metainfo=metainfo,
139+
box_type_3d='Lidar',
140+
))
141+
val_evaluator = dict(
142+
type='WaymoMetric',
143+
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
144+
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
145+
data_root='./data/waymo/waymo_format',
146+
metric='LET_mAP')
147+
148+
test_evaluator = val_evaluator

0 commit comments

Comments
 (0)