Skip to content

Commit d80dc07

Browse files
[Feature] Support FCAF3D on S3DIS dataset in dev-1.x branch (#1984)
* support fcaf3d for s3dis dataset * Update convert_utils.py * Update seg3d_dataset.py * Delete compose.py * fix import error * use `mmengine.Compose` * Update s3dis-3d.py * Update fcaf3d_2xb8_s3dis-3d-5class.py * Update s3dis_dataset.py * update unittest for s3dis * update docs * use `mmcv.Compose` instead of `mmengine.Compose` * update docstring * fix s3dis preprocessing bug * Add typehint * Update config and fix s3dis dataset * update typehit * Update convert_utils.py * Update README and metafile Co-authored-by: Tai-Wang <tab_wang@outlook.com>
1 parent 8981169 commit d80dc07

23 files changed

+508
-278
lines changed

configs/_base_/datasets/s3dis-3d.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# dataset settings
2+
dataset_type = 'S3DISDataset'
3+
data_root = 'data/s3dis/'
4+
5+
metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
6+
train_area = [1, 2, 3, 4, 6]
7+
test_area = 5
8+
9+
train_pipeline = [
10+
dict(
11+
type='LoadPointsFromFile',
12+
coord_type='DEPTH',
13+
shift_height=False,
14+
use_color=True,
15+
load_dim=6,
16+
use_dim=[0, 1, 2, 3, 4, 5]),
17+
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
18+
dict(type='PointSample', num_points=100000),
19+
dict(
20+
type='RandomFlip3D',
21+
sync_2d=False,
22+
flip_ratio_bev_horizontal=0.5,
23+
flip_ratio_bev_vertical=0.5),
24+
dict(
25+
type='GlobalRotScaleTrans',
26+
rot_range=[-0.087266, 0.087266],
27+
scale_ratio_range=[0.9, 1.1],
28+
translation_std=[.1, .1, .1],
29+
shift_height=False),
30+
dict(type='NormalizePointsColor', color_mean=None),
31+
dict(
32+
type='Pack3DDetInputs',
33+
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
34+
]
35+
test_pipeline = [
36+
dict(
37+
type='LoadPointsFromFile',
38+
coord_type='DEPTH',
39+
shift_height=False,
40+
use_color=True,
41+
load_dim=6,
42+
use_dim=[0, 1, 2, 3, 4, 5]),
43+
dict(
44+
type='MultiScaleFlipAug3D',
45+
img_scale=(1333, 800),
46+
pts_scale_ratio=1,
47+
flip=False,
48+
transforms=[
49+
dict(
50+
type='GlobalRotScaleTrans',
51+
rot_range=[0, 0],
52+
scale_ratio_range=[1., 1.],
53+
translation_std=[0, 0, 0]),
54+
dict(
55+
type='RandomFlip3D',
56+
sync_2d=False,
57+
flip_ratio_bev_horizontal=0.5,
58+
flip_ratio_bev_vertical=0.5),
59+
dict(type='PointSample', num_points=100000),
60+
dict(type='NormalizePointsColor', color_mean=None),
61+
]),
62+
dict(type='Pack3DDetInputs', keys=['points'])
63+
]
64+
65+
train_dataloader = dict(
66+
batch_size=8,
67+
num_workers=4,
68+
sampler=dict(type='DefaultSampler', shuffle=True),
69+
dataset=dict(
70+
type='RepeatDataset',
71+
times=13,
72+
dataset=dict(
73+
type='ConcatDataset',
74+
datasets=[
75+
dict(
76+
type=dataset_type,
77+
data_root=data_root,
78+
ann_file=f's3dis_infos_Area_{i}.pkl',
79+
pipeline=train_pipeline,
80+
filter_empty_gt=True,
81+
metainfo=metainfo,
82+
box_type_3d='Depth') for i in train_area
83+
])))
84+
85+
val_dataloader = dict(
86+
batch_size=1,
87+
num_workers=1,
88+
sampler=dict(type='DefaultSampler', shuffle=False),
89+
dataset=dict(
90+
type=dataset_type,
91+
data_root=data_root,
92+
ann_file=f's3dis_infos_Area_{test_area}.pkl',
93+
pipeline=test_pipeline,
94+
metainfo=metainfo,
95+
test_mode=True,
96+
box_type_3d='Depth'))
97+
test_dataloader = dict(
98+
batch_size=1,
99+
num_workers=1,
100+
sampler=dict(type='DefaultSampler', shuffle=False),
101+
dataset=dict(
102+
type=dataset_type,
103+
data_root=data_root,
104+
ann_file=f's3dis_infos_Area_{test_area}.pkl',
105+
pipeline=test_pipeline,
106+
metainfo=metainfo,
107+
test_mode=True,
108+
box_type_3d='Depth'))
109+
val_evaluator = dict(type='IndoorMetric')
110+
test_evaluator = val_evaluator
111+
112+
vis_backends = [dict(type='LocalVisBackend')]
113+
visualizer = dict(
114+
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/fcaf3d/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ We implement FCAF3D and provide the result and checkpoints on the ScanNet and SU
3030
| :------------------------------------------------: | :------: | :------------: | :----------: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
3131
| [MinkResNet34](./fcaf3d_8x2_sunrgbd-3d-10class.py) | 6.3 | 15.6 | 63.8(63.8\*) | 47.3(48.2\*) | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.log.json) |
3232

33+
### S3DIS
34+
35+
| Backbone | Mem (GB) | Inf time (fps) | AP@0.25 | AP@0.5 | Download |
36+
| :----------------------------------------------: | :------: | :------------: | :----------: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
37+
| [MinkResNet34](./fcaf3d_2xb8_s3dis-3d-5class.py) | 23.5 | 4.2 | 67.4(64.9\*) | 45.7(43.8\*) | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.log.json) |
38+
3339
**Note**
3440

3541
- We report the results across 5 train runs followed by 5 test runs. * means the results reported in the paper.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
_base_ = [
2+
'../_base_/models/fcaf3d.py', '../_base_/default_runtime.py',
3+
'../_base_/datasets/s3dis-3d.py'
4+
]
5+
6+
model = dict(bbox_head=dict(num_classes=5))
7+
8+
optim_wrapper = dict(
9+
type='OptimWrapper',
10+
optimizer=dict(type='AdamW', lr=0.001, weight_decay=0.0001),
11+
clip_grad=dict(max_norm=10, norm_type=2))
12+
13+
# learning rate
14+
param_scheduler = dict(
15+
type='MultiStepLR',
16+
begin=0,
17+
end=12,
18+
by_epoch=True,
19+
milestones=[8, 11],
20+
gamma=0.1)
21+
22+
custom_hooks = [dict(type='EmptyCacheHook', after_iter=True)]
23+
24+
# training schedule for 1x
25+
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=12)
26+
val_cfg = dict(type='ValLoop')
27+
test_cfg = dict(type='TestLoop')

configs/fcaf3d/metafile.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,17 @@ Models:
4242
AP@0.25: 63.76
4343
AP@0.5: 47.31
4444
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_sunrgbd-3d-10class/fcaf3d_8x2_sunrgbd-3d-10class_20220805_165017.pth
45+
46+
- Name: fcaf3d_2xb8_s3dis-3d-5class
47+
In Collection: FCAF3D
48+
Config: configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
49+
Metadata:
50+
Training Data: S3DIS
51+
Training Memory (GB): 23.5
52+
Results:
53+
- Task: 3D Object Detection
54+
Dataset: S3DIS
55+
Metrics:
56+
AP@0.25: 67.36
57+
AP@0.5: 45.74
58+
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/fcaf3d/fcaf3d_8x2_s3dis-3d-5class/fcaf3d_8x2_s3dis-3d-5class_20220805_121957.pth

docs/en/model_zoo.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ Please refer to [MonoFlex](https://github.com/open-mmlab/mmdetection3d/tree/v1.0
104104

105105
Please refer to [SA-SSD](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/sassd) for details. We provide SA-SSD baselines on the KITTI dataset.
106106

107+
### FCAF3D
108+
109+
Please refer to [FCAF3D](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/fcaf3d) for details. We provide FCAF3D baselines on the ScanNet, S3DIS, and SUN RGB-D datasets.
110+
107111
### Mixed Precision (FP16) Training
108112

109113
Please refer to [Mixed Precision (FP16) Training on PointPillars](https://github.com/open-mmlab/mmdetection3d/tree/v1.0.0.dev0/configs/pointpillars/hv_pointpillars_fpn_sbn-all_fp16_2x8_2x_nus-3d.py) for details.

mmdet3d/datasets/convert_utils.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
22
import copy
3-
from collections import OrderedDict
43
from typing import List, Optional, Tuple, Union
54

65
import numpy as np
6+
from nuscenes import NuScenes
77
from nuscenes.utils.geometry_utils import view_points
88
from pyquaternion import Quaternion
99
from shapely.geometry import MultiPoint, box
@@ -53,19 +53,20 @@
5353
}
5454

5555

56-
def get_nuscenes_2d_boxes(nusc, sample_data_token: str,
57-
visibilities: List[str]):
58-
"""Get the 2d / mono3d annotation records for a given `sample_data_token of
59-
nuscenes dataset.
56+
def get_nuscenes_2d_boxes(nusc: NuScenes, sample_data_token: str,
57+
visibilities: List[str]) -> List[dict]:
58+
"""Get the 2d / mono3d annotation records for a given `sample_data_token`
59+
of nuscenes dataset.
6060
6161
Args:
62+
nusc (:obj:`NuScenes`): NuScenes class.
6263
sample_data_token (str): Sample data token belonging to a camera
6364
keyframe.
64-
visibilities (list[str]): Visibility filter.
65+
visibilities (List[str]): Visibility filter.
6566
6667
Return:
67-
list[dict]: List of 2d annotation record that belongs to the input
68-
`sample_data_token`.
68+
List[dict]: List of 2d annotation record that belongs to the input
69+
`sample_data_token`.
6970
"""
7071

7172
# Get the sample data and the sample corresponding to that sample data.
@@ -190,7 +191,7 @@ def get_kitti_style_2d_boxes(info: dict,
190191
occluded: Tuple[int] = (0, 1, 2, 3),
191192
annos: Optional[dict] = None,
192193
mono3d: bool = True,
193-
dataset: str = 'kitti'):
194+
dataset: str = 'kitti') -> List[dict]:
194195
"""Get the 2d / mono3d annotation records for a given info.
195196
196197
This function is used to get 2D/Mono3D annotations when loading annotations
@@ -202,7 +203,7 @@ def get_kitti_style_2d_boxes(info: dict,
202203
belong to. In KITTI, typically only CAM 2 will be used,
203204
and in Waymo, multi cameras could be used.
204205
Defaults to 2.
205-
occluded (tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
206+
occluded (Tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
206207
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
207208
3 = unknown, -1 = DontCare.
208209
Defaults to (0, 1, 2, 3).
@@ -213,8 +214,8 @@ def get_kitti_style_2d_boxes(info: dict,
213214
Defaults to 'kitti'.
214215
215216
Return:
216-
list[dict]: List of 2d / mono3d annotation record that
217-
belongs to the input camera id.
217+
List[dict]: List of 2d / mono3d annotation record that
218+
belongs to the input camera id.
218219
"""
219220
# Get calibration information
220221
camera_intrinsic = info['calib'][f'P{cam_idx}']
@@ -336,20 +337,20 @@ def convert_annos(info: dict, cam_idx: int) -> dict:
336337

337338

338339
def post_process_coords(
339-
corner_coords: List, imsize: Tuple[int, int] = (1600, 900)
340-
) -> Union[Tuple[float, float, float, float], None]:
340+
corner_coords: List[int], imsize: Tuple[int] = (1600, 900)
341+
) -> Union[Tuple[float], None]:
341342
"""Get the intersection of the convex hull of the reprojected bbox corners
342343
and the image canvas, return None if no intersection.
343344
344345
Args:
345-
corner_coords (list[int]): Corner coordinates of reprojected
346+
corner_coords (List[int]): Corner coordinates of reprojected
346347
bounding box.
347-
imsize (tuple[int]): Size of the image canvas.
348+
imsize (Tuple[int]): Size of the image canvas.
348349
Defaults to (1600, 900).
349350
350351
Return:
351-
tuple[float]: Intersection of the convex hull of the 2D box
352-
corners and the image canvas.
352+
Tuple[float] or None: Intersection of the convex hull of the 2D box
353+
corners and the image canvas.
353354
"""
354355
polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
355356
img_canvas = box(0, 0, imsize[0], imsize[1])
@@ -370,7 +371,7 @@ def post_process_coords(
370371

371372

372373
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
373-
dataset: str) -> OrderedDict:
374+
dataset: str) -> Union[dict, None]:
374375
"""Generate one 2D annotation record given various information on top of
375376
the 2D bounding box coordinates.
376377
@@ -383,11 +384,11 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
383384
dataset (str): Name of dataset.
384385
385386
Returns:
386-
dict: A sample 2d annotation record.
387+
dict or None: A sample 2d annotation record.
387388
388389
- bbox_label (int): 2d box label id
389390
- bbox_label_3d (int): 3d box label id
390-
- bbox (list[float]): left x, top y, right x, bottom y of 2d box
391+
- bbox (List[float]): left x, top y, right x, bottom y of 2d box
391392
- bbox_3d_isvalid (bool): whether the box is valid
392393
"""
393394

mmdet3d/datasets/det3d_dataset.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@ class Det3DDataset(BaseDataset):
3030
information. Defaults to None.
3131
data_prefix (dict): Prefix for training data. Defaults to
3232
dict(pts='velodyne', img='').
33-
pipeline (list[dict]): Pipeline used for data processing.
33+
pipeline (List[dict]): Pipeline used for data processing.
3434
Defaults to [].
3535
modality (dict): Modality to specify the sensor data used as input,
3636
it usually has following keys:
3737
3838
- use_camera: bool
3939
- use_lidar: bool
40-
Defaults to `dict(use_lidar=True, use_camera=False)`
40+
Defaults to dict(use_lidar=True, use_camera=False).
4141
default_cam_key (str, optional): The default camera name adopted.
4242
Defaults to None.
4343
box_type_3d (str): Type of 3D box of this dataset.
@@ -78,7 +78,7 @@ def __init__(self,
7878
box_type_3d: dict = 'LiDAR',
7979
filter_empty_gt: bool = True,
8080
test_mode: bool = False,
81-
load_eval_anns=True,
81+
load_eval_anns: bool = True,
8282
file_client_args: dict = dict(backend='disk'),
8383
show_ins_var: bool = False,
8484
**kwargs) -> None:
@@ -158,7 +158,7 @@ def __init__(self,
158158
def _remove_dontcare(self, ann_info: dict) -> dict:
159159
"""Remove annotations that do not need to be cared.
160160
161-
-1 indicate dontcare in MMDet3d.
161+
-1 indicates dontcare in MMDet3d.
162162
163163
Args:
164164
ann_info (dict): Dict of annotation infos. The
@@ -186,7 +186,7 @@ def get_ann_info(self, index: int) -> dict:
186186
index (int): Index of the annotation data to get.
187187
188188
Returns:
189-
dict: annotation information.
189+
dict: Annotation information.
190190
"""
191191
data_info = self.get_data_info(index)
192192
# test model
@@ -197,7 +197,7 @@ def get_ann_info(self, index: int) -> dict:
197197

198198
return ann_info
199199

200-
def parse_ann_info(self, info: dict) -> Optional[dict]:
200+
def parse_ann_info(self, info: dict) -> Union[dict, None]:
201201
"""Process the `instances` in data info to `ann_info`.
202202
203203
In `Custom3DDataset`, we simply concatenate all the field
@@ -209,7 +209,7 @@ def parse_ann_info(self, info: dict) -> Optional[dict]:
209209
info (dict): Info dict.
210210
211211
Returns:
212-
dict | None: Processed `ann_info`
212+
dict or None: Processed `ann_info`.
213213
"""
214214
# add s or gt prefix for most keys after concat
215215
# we only process 3d annotations here, the corresponding
@@ -327,7 +327,8 @@ def parse_data_info(self, info: dict) -> dict:
327327

328328
return info
329329

330-
def _show_ins_var(self, old_labels: np.ndarray, new_labels: torch.Tensor):
330+
def _show_ins_var(self, old_labels: np.ndarray,
331+
new_labels: torch.Tensor) -> None:
331332
"""Show variation of the number of instances before and after through
332333
the pipeline.
333334
@@ -356,7 +357,7 @@ def _show_ins_var(self, old_labels: np.ndarray, new_labels: torch.Tensor):
356357
'The number of instances per category after and before '
357358
f'through pipeline:\n{table.table}', 'current')
358359

359-
def prepare_data(self, index: int) -> Optional[dict]:
360+
def prepare_data(self, index: int) -> Union[dict, None]:
360361
"""Data preparation for both training and testing stage.
361362
362363
Called by `__getitem__` of dataset.
@@ -365,7 +366,7 @@ def prepare_data(self, index: int) -> Optional[dict]:
365366
index (int): Index for accessing the target data.
366367
367368
Returns:
368-
dict | None: Data dict of the corresponding index.
369+
dict or None: Data dict of the corresponding index.
369370
"""
370371
ori_input_dict = self.get_data_info(index)
371372

0 commit comments

Comments
 (0)