Skip to content

Commit ed073a0

Browse files
[Fix] Fix some bugs in waymo conversion and dataset parsing (#1892)
* add the code of generating cam_sync_labels in waymo dataset * fix key error in waymo converation * fix waymo convert bug and refactor parse_ann_info in WaymoDataset * add image_path key in waymo sweeps * polish code * add sweeps in data_prefix in waymo config * fix some bugs in waymo * resolve some comments and fix configs of pointpillars * add 2 TODOs * fix lint Co-authored-by: lianqing <lianqing1997@gmail.com>
1 parent 937e7f8 commit ed073a0

15 files changed

+89
-46
lines changed

configs/_base_/datasets/waymoD5-3d-3class.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22
# D5 in the config name means the whole dataset is divided into 5 folds
33
# We only use one fold for efficient experiments
44
dataset_type = 'WaymoDataset'
5+
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
56
data_root = 'data/waymo/kitti_format/'
67
file_client_args = dict(backend='disk')
78
# Uncomment the following if use ceph or other file clients.
89
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
910
# for more details.
1011
# file_client_args = dict(
11-
# backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
12+
# backend='petrel',
13+
# path_mapping={
14+
# './data/waymo': 's3://openmmlab/datasets/detection3d/waymo',
15+
# 'data/waymo': 's3://openmmlab/datasets/detection3d/waymo'
16+
# })
1217

1318
class_names = ['Car', 'Pedestrian', 'Cyclist']
1419
metainfo = dict(CLASSES=class_names)
@@ -33,7 +38,7 @@
3338
train_pipeline = [
3439
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),
3540
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
36-
dict(type='ObjectSample', db_sampler=db_sampler),
41+
# dict(type='ObjectSample', db_sampler=db_sampler),
3742
dict(
3843
type='RandomFlip3D',
3944
sync_2d=False,
@@ -51,7 +56,12 @@
5156
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
5257
]
5358
test_pipeline = [
54-
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5),
59+
dict(
60+
type='LoadPointsFromFile',
61+
coord_type='LIDAR',
62+
load_dim=6,
63+
use_dim=5,
64+
file_client_args=file_client_args),
5565
dict(
5666
type='MultiScaleFlipAug3D',
5767
img_scale=(1333, 800),
@@ -98,7 +108,8 @@
98108
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
99109
box_type_3d='LiDAR',
100110
# load one frame every five frames
101-
load_interval=5)))
111+
load_interval=5,
112+
file_client_args=file_client_args)))
102113
val_dataloader = dict(
103114
batch_size=1,
104115
num_workers=1,
@@ -114,7 +125,8 @@
114125
modality=input_modality,
115126
test_mode=True,
116127
metainfo=metainfo,
117-
box_type_3d='LiDAR'))
128+
box_type_3d='LiDAR',
129+
file_client_args=file_client_args))
118130

119131
test_dataloader = dict(
120132
batch_size=1,
@@ -131,13 +143,15 @@
131143
modality=input_modality,
132144
test_mode=True,
133145
metainfo=metainfo,
134-
box_type_3d='LiDAR'))
146+
box_type_3d='LiDAR',
147+
file_client_args=file_client_args))
135148

136149
val_evaluator = dict(
137150
type='WaymoMetric',
138151
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
139152
waymo_bin_file='./data/waymo/waymo_format/gt.bin',
140-
data_root='./data/waymo/waymo_format')
153+
data_root='./data/waymo/waymo_format',
154+
file_client_args=file_client_args)
141155
test_evaluator = val_evaluator
142156

143157
vis_backends = [dict(type='LocalVisBackend')]

configs/_base_/datasets/waymoD5-3d-car.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@
8686
type=dataset_type,
8787
data_root=data_root,
8888
ann_file='waymo_infos_train.pkl',
89-
data_prefix=dict(pts='training/velodyne'),
89+
data_prefix=dict(
90+
pts='training/velodyne', sweeps='training/velodyne'),
9091
pipeline=train_pipeline,
9192
modality=input_modality,
9293
test_mode=False,
@@ -105,7 +106,7 @@
105106
dataset=dict(
106107
type=dataset_type,
107108
data_root=data_root,
108-
data_prefix=dict(pts='training/velodyne'),
109+
data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
109110
ann_file='waymo_infos_val.pkl',
110111
pipeline=eval_pipeline,
111112
modality=input_modality,
@@ -122,7 +123,7 @@
122123
dataset=dict(
123124
type=dataset_type,
124125
data_root=data_root,
125-
data_prefix=dict(pts='training/velodyne'),
126+
data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
126127
ann_file='waymo_infos_val.pkl',
127128
pipeline=eval_pipeline,
128129
modality=input_modality,

configs/_base_/default_runtime.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
timer=dict(type='IterTimerHook'),
55
logger=dict(type='LoggerHook', interval=50),
66
param_scheduler=dict(type='ParamSchedulerHook'),
7-
checkpoint=dict(type='CheckpointHook', interval=1),
7+
checkpoint=dict(type='CheckpointHook', interval=-1),
88
sampler_seed=dict(type='DistSamplerSeedHook'),
99
visualization=dict(type='Det3DVisualizationHook'))
1010

configs/pointpillars/pointpillars_hv_secfpn_sbn-all_16xb2-2x_waymo-3d-3class.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
]
77

88
# data settings
9-
data = dict(train=dict(dataset=dict(load_interval=1)))
9+
train_dataloader = dict(dataset=dict(dataset=dict(load_interval=1)))
1010
# Default setting for scaling LR automatically
1111
# - `enable` means enable scaling LR automatically
1212
# or not by default.

configs/pointpillars/pointpillars_hv_secfpn_sbn-all_16xb2-2x_waymo-3d-car.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
]
77

88
# data settings
9-
data = dict(train=dict(dataset=dict(load_interval=1)))
9+
train_dataloader = dict(dataset=dict(dataset=dict(load_interval=1)))
1010

1111
# model settings
1212
model = dict(

mmdet3d/datasets/waymo_dataset.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,9 @@ def parse_ann_info(self, info: dict) -> dict:
130130
ann_info = Det3DDataset.parse_ann_info(self, info)
131131
if ann_info is None:
132132
# empty instance
133-
anns_results = {}
134-
anns_results['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
135-
anns_results['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
136-
return anns_results
133+
ann_info = {}
134+
ann_info['gt_bboxes_3d'] = np.zeros((0, 7), dtype=np.float32)
135+
ann_info['gt_labels_3d'] = np.zeros(0, dtype=np.int64)
137136

138137
ann_info = self._remove_dontcare(ann_info)
139138
# in kitti, lidar2cam = R0_rect @ Tr_velo_to_cam
@@ -158,12 +157,14 @@ def parse_ann_info(self, info: dict) -> dict:
158157
origin=(0.5, 0.5, 0.5))
159158

160159
else:
160+
# in waymo, lidar2cam = R0_rect @ Tr_velo_to_cam
161+
# convert gt_bboxes_3d to velodyne coordinates with `lidar2cam`
161162
lidar2cam = np.array(
162163
info['images'][self.default_cam_key]['lidar2cam'])
163-
164164
gt_bboxes_3d = CameraInstance3DBoxes(
165165
ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d,
166166
np.linalg.inv(lidar2cam))
167+
ann_info['gt_bboxes_3d'] = gt_bboxes_3d
167168

168169
anns_results = dict(
169170
gt_bboxes_3d=gt_bboxes_3d,
@@ -220,7 +221,7 @@ def parse_data_info(self, info: dict) -> dict:
220221

221222
# TODO check if need to modify the sample id
222223
# TODO check when will use it except for evaluation.
223-
camera_info['sample_id'] = info['sample_id']
224+
camera_info['sample_idx'] = info['sample_idx']
224225

225226
if not self.test_mode:
226227
# used in training

mmdet3d/evaluation/metrics/waymo_metric.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def compute_metrics(self, results: list) -> Dict[str, float]:
9393
"""Compute the metrics from processed results.
9494
9595
Args:
96-
results (list): The processed results of each batch.
96+
results (list): The processed results of the whole dataset.
9797
9898
Returns:
9999
Dict[str, float]: The computed metrics. The keys are the names of
@@ -360,7 +360,7 @@ def merge_multi_view_boxes(self, box_dict_per_frame: List[dict],
360360
for cam_idx in range(self.num_cams):
361361
box_dict[key].append(box_dict_per_frame[cam_idx][key])
362362
# merge each elements
363-
box_dict['sample_id'] = cam0_info['image_id']
363+
box_dict['sample_idx'] = cam0_info['image_id']
364364
for key in ['bbox', 'box3d_lidar', 'scores', 'label_preds']:
365365
box_dict[key] = np.concatenate(box_dict[key])
366366

mmdet3d/models/detectors/imvoxelnet.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
from mmdet3d.registry import MODELS, TASK_UTILS
1010
from mmdet3d.structures.det3d_data_sample import SampleList
1111
from mmdet3d.utils import ConfigType, OptConfigType, OptInstanceList
12-
from mmdet.models.detectors import BaseDetector
13-
1412

1513

1614
@MODELS.register_module()

mmdet3d/structures/bbox_3d/cam_box3d.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,13 +291,15 @@ def convert_to(self, dst, rt_mat=None, correct_yaw=False):
291291
The conversion from ``src`` coordinates to ``dst`` coordinates
292292
usually comes along the change of sensors, e.g., from camera
293293
to LiDAR. This requires a transformation matrix.
294-
correct_yaw (bool): If convert the yaw angle to the target
294+
correct_yaw (bool): Whether to convert the yaw angle to the target
295295
coordinate. Defaults to False.
296296
Returns:
297297
:obj:`BaseInstance3DBoxes`:
298298
The converted box of the same type in the ``dst`` mode.
299299
"""
300300
from .box_3d_mode import Box3DMode
301+
302+
# TODO: always set correct_yaw=True
301303
return Box3DMode.convert(
302304
box=self,
303305
src=Box3DMode.CAM,

tools/create_data.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,14 +215,14 @@ def waymo_data_prep(root_path,
215215
info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl')
216216
info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl')
217217
info_trainval_path = osp.join(out_dir, f'{info_prefix}_infos_trainval.pkl')
218-
update_pkl_infos('kitti', out_dir=out_dir, pkl_path=info_train_path)
219-
update_pkl_infos('kitti', out_dir=out_dir, pkl_path=info_val_path)
220-
update_pkl_infos('kitti', out_dir=out_dir, pkl_path=info_trainval_path)
218+
update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_train_path)
219+
update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_val_path)
220+
update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_trainval_path)
221221
GTDatabaseCreater(
222222
'WaymoDataset',
223223
out_dir,
224224
info_prefix,
225-
f'{out_dir}/{info_prefix}_infos_train.pkl',
225+
f'{info_prefix}_infos_train.pkl',
226226
relative_path=False,
227227
with_mask=False,
228228
num_worker=workers).create()

tools/dataset_converters/create_gt_database.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ def create_groundtruth_database(dataset_class_name,
196196
file_client_args = dict(backend='disk')
197197
dataset_cfg.update(
198198
test_mode=False,
199-
split='training',
199+
data_prefix=dict(
200+
pts='training/velodyne', img='', sweeps='training/velodyne'),
200201
modality=dict(
201202
use_lidar=True,
202203
use_depth=False,
@@ -407,7 +408,9 @@ def create_single(self, input_dict):
407408
image_idx = example['sample_idx']
408409
points = example['points'].tensor.numpy()
409410
gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy()
410-
names = annos['gt_names']
411+
names = [
412+
self.dataset.metainfo['CLASSES'][i] for i in annos['gt_labels_3d']
413+
]
411414
group_dict = dict()
412415
if 'group_ids' in annos:
413416
group_ids = annos['group_ids']
@@ -510,7 +513,8 @@ def create(self):
510513
file_client_args = dict(backend='disk')
511514
dataset_cfg.update(
512515
test_mode=False,
513-
split='training',
516+
data_prefix=dict(
517+
pts='training/velodyne_reduced', img='training/image_2'),
514518
modality=dict(
515519
use_lidar=True,
516520
use_depth=False,
@@ -534,6 +538,9 @@ def create(self):
534538
elif self.dataset_class_name == 'NuScenesDataset':
535539
dataset_cfg.update(
536540
use_valid_flag=True,
541+
data_prefix=dict(
542+
pts='samples/LIDAR_TOP', img='',
543+
sweeps='sweeps/LIDAR_TOP'),
537544
pipeline=[
538545
dict(
539546
type='LoadPointsFromFile',
@@ -556,7 +563,10 @@ def create(self):
556563
file_client_args = dict(backend='disk')
557564
dataset_cfg.update(
558565
test_mode=False,
559-
split='training',
566+
data_prefix=dict(
567+
pts='training/velodyne',
568+
img='',
569+
sweeps='training/velodyne'),
560570
modality=dict(
561571
use_lidar=True,
562572
use_depth=False,
@@ -577,8 +587,8 @@ def create(self):
577587
file_client_args=file_client_args)
578588
])
579589

580-
dataset = build_dataset(dataset_cfg)
581-
self.pipeline = dataset.pipeline
590+
self.dataset = build_dataset(dataset_cfg)
591+
self.pipeline = self.dataset.pipeline
582592
if self.database_save_path is None:
583593
self.database_save_path = osp.join(
584594
self.data_path, f'{self.info_prefix}_gt_database')
@@ -595,13 +605,15 @@ def create(self):
595605
self.file2id.update({info['file_name']: i})
596606

597607
def loop_dataset(i):
598-
input_dict = dataset.get_data_info(i)
599-
dataset.pre_pipeline(input_dict)
608+
input_dict = self.dataset.get_data_info(i)
609+
input_dict['box_type_3d'] = self.dataset.box_type_3d
610+
input_dict['box_mode_3d'] = self.dataset.box_mode_3d
600611
return input_dict
601612

602613
multi_db_infos = mmengine.track_parallel_progress(
603614
self.create_single,
604-
((loop_dataset(i) for i in range(len(dataset))), len(dataset)),
615+
((loop_dataset(i)
616+
for i in range(len(self.dataset))), len(self.dataset)),
605617
self.num_worker)
606618
print('Make global unique group id')
607619
group_counter_offset = 0

tools/dataset_converters/kitti_data_utils.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ def get_image_path(idx,
4646
relative_path=True,
4747
exist_check=True,
4848
info_type='image_2',
49+
file_tail='.png',
4950
use_prefix_id=False):
50-
return get_kitti_info_path(idx, prefix, info_type, '.png', training,
51+
return get_kitti_info_path(idx, prefix, info_type, file_tail, training,
5152
relative_path, exist_check, use_prefix_id)
5253

5354

@@ -378,6 +379,7 @@ def gather_single(self, idx):
378379
self.training,
379380
self.relative_path,
380381
info_type='image_0',
382+
file_tail='.jpg',
381383
use_prefix_id=True)
382384
if self.with_imageshape:
383385
img_path = image_info['image_path']
@@ -443,6 +445,7 @@ def gather_single(self, idx):
443445
else:
444446
rect_4x4 = R0_rect
445447

448+
# TODO: naming Tr_velo_to_cam or Tr_velo_to_cam0
446449
Tr_velo_to_cam = np.array([
447450
float(info) for info in lines[6].split(' ')[1:13]
448451
]).reshape([3, 4])
@@ -521,6 +524,14 @@ def gather_single(self, idx):
521524
relative_path=False,
522525
use_prefix_id=True)) as f:
523526
prev_info['timestamp'] = np.int64(f.read())
527+
prev_info['image_path'] = get_image_path(
528+
prev_idx,
529+
self.path,
530+
self.training,
531+
self.relative_path,
532+
info_type='image_0',
533+
file_tail='.jpg',
534+
use_prefix_id=True)
524535
prev_pose_path = get_pose_path(
525536
prev_idx,
526537
self.path,

tools/dataset_converters/nuscenes_converter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def _fill_trainval_infos(nusc,
177177

178178
info = {
179179
'lidar_path': lidar_path,
180+
'num_features': 5,
180181
'token': sample['token'],
181182
'sweeps': [],
182183
'cams': dict(),

tools/dataset_converters/update_infos_to_v2.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,8 @@ def update_nuscenes_infos(pkl_path, out_dir):
285285
temp_data_info['ego2global'] = convert_quaternion_to_matrix(
286286
ori_info_dict['ego2global_rotation'],
287287
ori_info_dict['ego2global_translation'])
288-
temp_data_info['lidar_points']['num_pts_feats'] = 5
288+
temp_data_info['lidar_points']['num_pts_feats'] = ori_info_dict.get(
289+
'num_features', 5)
289290
temp_data_info['lidar_points']['lidar_path'] = ori_info_dict[
290291
'lidar_path'].split('/')[-1]
291292
temp_data_info['lidar_points'][
@@ -515,7 +516,7 @@ def update_s3dis_infos(pkl_path, out_dir):
515516
converted_list = []
516517
for i, ori_info_dict in enumerate(mmengine.track_iter_progress(data_list)):
517518
temp_data_info = get_empty_standard_data_info()
518-
temp_data_info['sample_id'] = i
519+
temp_data_info['sample_idx'] = i
519520
temp_data_info['lidar_points']['num_pts_feats'] = ori_info_dict[
520521
'point_cloud']['num_features']
521522
temp_data_info['lidar_points']['lidar_path'] = ori_info_dict[
@@ -830,7 +831,7 @@ def update_waymo_infos(pkl_path, out_dir):
830831

831832
if 'plane' in ori_info_dict:
832833
temp_data_info['plane'] = ori_info_dict['plane']
833-
temp_data_info['sample_id'] = ori_info_dict['image']['image_idx']
834+
temp_data_info['sample_idx'] = ori_info_dict['image']['image_idx']
834835

835836
# calib matrix
836837
for cam_idx, cam_key in enumerate(camera_types):

0 commit comments

Comments
 (0)