open-mmlab
diff --git a/‎configs/_base_/datasets/kitti-mono3d.py
Lines changed: 2 additions & 0 deletions b/‎configs/_base_/datasets/kitti-mono3d.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎configs/_base_/datasets/nus-mono3d.py
Lines changed: 2 additions & 2 deletions b/‎configs/_base_/datasets/nus-mono3d.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎configs/_base_/datasets/waymoD5-fov-mono3d-3class.py
Lines changed: 150 additions & 0 deletions b/‎configs/_base_/datasets/waymoD5-fov-mono3d-3class.py
Lines changed: 150 additions & 0 deletions
diff --git a/‎configs/_base_/datasets/waymoD5-mono3d-3class.py renamed to ‎configs/_base_/datasets/waymoD5-mv-mono3d-3class.py
Lines changed: 5 additions & 4 deletions b/‎configs/_base_/datasets/waymoD5-mono3d-3class.py renamed to ‎configs/_base_/datasets/waymoD5-mv-mono3d-3class.py
Lines changed: 5 additions & 4 deletions
diff --git a/‎configs/pgd/pgd_r101_fpn-head_dcn_16xb3_waymoD5-fov-mono3d.py
Lines changed: 112 additions & 0 deletions b/‎configs/pgd/pgd_r101_fpn-head_dcn_16xb3_waymoD5-fov-mono3d.py
Lines changed: 112 additions & 0 deletions
@@ -52,6 +52,7 @@
         data_prefix=dict(img='training/image_2'),
         pipeline=train_pipeline,
         modality=input_modality,
+        load_type='fov_image_based',
         test_mode=False,
         metainfo=metainfo,
         # we use box_type_3d='Camera' in monocular 3d
@@ -70,6 +71,7 @@
         ann_file='kitti_infos_val.pkl',
         pipeline=test_pipeline,
         modality=input_modality,
+        load_type='fov_image_based',
         metainfo=metainfo,
         test_mode=True,
         box_type_3d='Camera'))
 
@@ -65,7 +65,7 @@
             CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
             CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
         ann_file='nuscenes_infos_train.pkl',
-        task='mono_det',
+        load_type='mv_image_based',
         pipeline=train_pipeline,
         metainfo=metainfo,
         modality=input_modality,
@@ -92,7 +92,7 @@
             CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
             CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
         ann_file='nuscenes_infos_val.pkl',
-        task='mono_det',
+        load_type='mv_image_based',
         pipeline=test_pipeline,
         modality=input_modality,
         metainfo=metainfo,
 
@@ -0,0 +1,150 @@
+# dataset settings
+# D3 in the config name means the whole dataset is divided into 3 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+data_root = 'data/waymo/kitti_format/'
+class_names = ['Car', 'Pedestrian', 'Cyclist']
+input_modality = dict(use_lidar=False, use_camera=True)
+file_client_args = dict(backend='disk')
+# Uncomment the following if use ceph or other file clients.
+# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
+# for more details.
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D'),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    # base shape (1248, 832), scale (0.95, 1.05)
+    dict(
+        type='RandomResize3D',
+        scale=(1284, 832),
+        ratio_range=(0.95, 1.05),
+        keep_ratio=True,
+    ),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
+            'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D'),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img']),
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(type='LoadImageFromFileMono3D'),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img']),
+]
+
+metainfo = dict(CLASSES=class_names)
+
+train_dataloader = dict(
+    batch_size=3,
+    num_workers=3,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_train.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_RIGHT='training/image_1',
+            CAM_FRONT_LEFT='training/image_2',
+            CAM_SIDE_RIGHT='training/image_3',
+            CAM_SIDE_LEFT='training/image_4'),
+        pipeline=train_pipeline,
+        modality=input_modality,
+        test_mode=False,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+        # load one frame every three frames
+        load_interval=5))
+
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_RIGHT='training/image_1',
+            CAM_FRONT_LEFT='training/image_2',
+            CAM_SIDE_RIGHT='training/image_3',
+            CAM_SIDE_LEFT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+    ))
+
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_RIGHT='training/image_1',
+            CAM_FRONT_LEFT='training/image_2',
+            CAM_SIDE_RIGHT='training/image_3',
+            CAM_SIDE_LEFT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+    ))
+
+val_evaluator = dict(
+    type='WaymoMetric',
+    ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
+    waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
+    data_root='./data/waymo/waymo_format',
+    metric='LET_mAP',
+    load_type='fov_image_based',
+)
+test_evaluator = val_evaluator
@@ -81,7 +81,7 @@
         # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
         # and box_type_3d='Depth' in sunrgbd and scannet dataset.
         box_type_3d='Camera',
-        task='mono_det',
+        load_type='mv_image_based',
         # load one frame every three frames
         load_interval=5))
 
@@ -109,7 +109,7 @@
         # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
         # and box_type_3d='Depth' in sunrgbd and scannet dataset.
         box_type_3d='Camera',
-        task='mono_det',
+        load_type='mv_image_based',
     ))
 
 test_dataloader = dict(
@@ -136,7 +136,7 @@
         # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
         # and box_type_3d='Depth' in sunrgbd and scannet dataset.
         box_type_3d='Camera',
-        task='mono_det',
+        load_type='mv_image_based',
     ))
 
 val_evaluator = dict(
@@ -145,5 +145,6 @@
     waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
     data_root='./data/waymo/waymo_format',
     metric='LET_mAP',
-    task='mono_det')
+    load_type='mv_image_based',
+)
 test_evaluator = val_evaluator
@@ -0,0 +1,112 @@
+_base_ = [
+    '../_base_/datasets/waymoD5-fov-mono3d-3class.py',
+    '../_base_/models/pgd.py', '../_base_/schedules/mmdet-schedule-1x.py',
+    '../_base_/default_runtime.py'
+]
+# model settings
+model = dict(
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=101,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
+        stage_with_dcn=(False, False, True, True)),
+    neck=dict(num_outs=3),
+    bbox_head=dict(
+        num_classes=3,
+        bbox_code_size=7,
+        pred_attrs=False,
+        pred_velo=False,
+        pred_bbox2d=True,
+        use_onlyreg_proj=True,
+        strides=(8, 16, 32),
+        regress_ranges=((-1, 128), (128, 256), (256, 1e8)),
+        group_reg_dims=(2, 1, 3, 1, 16,
+                        4),  # offset, depth, size, rot, kpts, bbox2d
+        reg_branch=(
+            (256, ),  # offset
+            (256, ),  # depth
+            (256, ),  # size
+            (256, ),  # rot
+            (256, ),  # kpts
+            (256, )  # bbox2d
+        ),
+        centerness_branch=(256, ),
+        loss_cls=dict(
+            type='mmdet.FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+        loss_dir=dict(
+            type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_centerness=dict(
+            type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        use_depth_classifier=True,
+        depth_branch=(256, ),
+        depth_range=(0, 50),
+        depth_unit=10,
+        division='uniform',
+        depth_bins=6,
+        pred_keypoints=True,
+        weight_dim=1,
+        loss_depth=dict(
+            type='UncertainSmoothL1Loss', alpha=1.0, beta=3.0,
+            loss_weight=1.0),
+        loss_bbox2d=dict(
+            type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.0),
+        loss_consistency=dict(type='mmdet.GIoULoss', loss_weight=0.0),
+        bbox_coder=dict(
+            type='PGDBBoxCoder',
+            base_depths=((41.01, 18.44), ),
+            base_dims=(
+                (4.73, 1.77, 2.08),
+                (0.91, 1.74, 0.84),
+                (1.81, 1.77, 0.84),
+            ),
+            code_size=7)),
+    # set weight 1.0 for base 7 dims (offset, depth, size, rot)
+    # 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
+    train_cfg=dict(code_weight=[
+        1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
+        0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0
+    ]),
+    test_cfg=dict(nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20))
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(
+        type='SGD',
+        lr=0.008,
+    ),
+    paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
+    clip_grad=dict(max_norm=35, norm_type=2))
+
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=1.0 / 3,
+        by_epoch=False,
+        begin=0,
+        end=500),
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=24,
+        by_epoch=True,
+        milestones=[16, 22],
+        gamma=0.1)
+]
+total_epochs = 24
+runner = dict(max_epochs=total_epochs)
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=24)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')