From 5d2e5612ea345bf03b40bbef32cc2e9b4333a403 Mon Sep 17 00:00:00 2001
From: qiyulei-mt <qiyu.lei@mthreads.com>
Date: Thu, 20 Feb 2025 18:10:51 +0800
Subject: [PATCH] support musa backend in mmdetection3d

---
 mmdet3d/apis/inference.py                     |   6 +-
 mmdet3d/evaluation/metrics/nuscenes_metric.py |  19 ++-
 .../data_preprocessors/data_preprocessor.py   |  11 +-
 mmdet3d/structures/bbox_3d/base_box3d.py      |  12 ++
 mmdet3d/structures/point_data.py              |  19 ++-
 mmdet3d/structures/points/base_points.py      |  12 ++
 mmdet3d/testing/model_utils.py                |  20 ++-
 .../test_lidar_det3d_inferencer.py            |   7 +-
 .../test_lidar_seg3d_inferencer.py            |  11 +-
 .../test_multi_modality_det3d_inferencer.py   |   7 +-
 .../test_functional/test_kitti_eval.py        |   9 +-
 .../test_functional/test_panoptic_seg_eval.py |   3 +-
 .../test_functional/test_seg_eval.py          |   4 +-
 .../test_metrics/test_kitti_metric.py         |   5 +-
 .../test_cylinder3d_backbone.py               |  28 +++-
 .../test_models/test_backbones/test_dgcnn.py  |  26 +++-
 .../test_backbones/test_mink_resnet.py        |  30 +++-
 .../test_backbones/test_minkunet_backbone.py  |  28 +++-
 .../test_backbones/test_multi_backbone.py     |  28 +++-
 .../test_backbones/test_pointnet2_sa_msg.py   |  28 +++-
 .../test_backbones/test_pointnet2_sa_ssg.py   |  28 +++-
 .../test_backbones/test_spvcnn_backbone.py    |  28 +++-
 .../test_data_preprocessor.py                 |  16 +-
 .../test_decode_heads/test_cylinder3d_head.py |  30 ++--
 .../test_decode_heads/test_minkunet_head.py   |  22 ++-
 .../test_decode_heads/test_paconv_head.py     |  38 +++--
 .../test_decode_heads/test_pointnet2_head.py  |  37 +++--
 .../test_dense_heads/test_fcaf3d_head.py      |  30 ++--
 .../test_dense_heads/test_freeanchors.py      |  20 ++-
 .../test_dense_heads/test_imvoxel_head.py     |  22 ++-
 .../test_models/test_dense_heads/test_ssn.py  |  15 +-
 .../test_models/test_detectors/test_3dssd.py  |  20 ++-
 .../test_detectors/test_center_point.py       |  20 ++-
 .../test_models/test_detectors/test_fcaf3d.py |  20 ++-
 .../test_detectors/test_groupfree3d.py        |  20 ++-
 .../test_models/test_detectors/test_h3dnet.py |  15 +-
 .../test_detectors/test_imvotenet.py          |  27 +++-
 .../test_detectors/test_imvoxelnet.py         |  32 +++-
 .../test_models/test_detectors/test_mvxnet.py |  21 ++-
 .../test_models/test_detectors/test_parta2.py |  20 ++-
 .../test_detectors/test_pointrcnn.py          |  20 ++-
 .../test_models/test_detectors/test_pvrcnn.py |  20 ++-
 .../test_models/test_detectors/test_sassd.py  |  20 ++-
 .../test_detectors/test_votenet.py            |  15 +-
 .../test_detectors/test_voxelnet.py           |  25 ++-
 .../test_models/test_layers/test_box3d_nms.py |  47 +++++-
 .../test_dgcnn_fa_module.py                   |  27 +++-
 .../test_dgcnn_fp_module.py                   |  34 +++-
 .../test_dgcnn_gf_module.py                   |  24 ++-
 .../test_minkowski_engine_module.py           |  57 +++++--
 .../test_paconv/test_paconv_modules.py        | 145 +++++++++++++-----
 .../test_paconv/test_paconv_ops.py            |  49 +++++-
 .../test_point_fp_module.py                   |  33 +++-
 .../test_point_sa_module.py                   |  82 +++++++---
 .../test_spconv/test_spconv_module.py         |  55 +++++--
 .../test_torchsparse_module.py                |  57 +++++--
 .../test_losses/test_rotated_iou_loss.py      |  30 +++-
 .../test_sparse_encoders.py                   |  56 +++++--
 .../test_middle_encoders/test_sparse_unet.py  |  28 +++-
 tests/test_models/test_necks/test_dla_neck.py |  27 +++-
 .../test_necks/test_imvoxel_neck.py           |  26 +++-
 .../test_necks/test_pointnet2_fp_neck.py      |  30 +++-
 .../test_segmentors/test_cylinder3d.py        |  21 ++-
 .../test_segmentors/test_minkunet.py          |  20 ++-
 .../test_segmentors/test_seg3d_tta_model.py   |  16 +-
 .../test_anchor/test_anchor_3d_generator.py   |  13 +-
 .../test_iou_piecewise_sampler.py             |  28 +++-
 .../test_pillar_encoder.py                    |   3 +-
 .../test_voxel_encoders.py                    |  49 +++++-
 tests/test_structures/test_bbox/test_box3d.py | 107 +++++++++----
 70 files changed, 1532 insertions(+), 426 deletions(-)

diff --git a/mmdet3d/apis/inference.py b/mmdet3d/apis/inference.py
index e355b958ee..2845d2c788 100644
--- a/mmdet3d/apis/inference.py
+++ b/mmdet3d/apis/inference.py
@@ -13,6 +13,7 @@
 from mmengine.dataset import Compose, pseudo_collate
 from mmengine.registry import init_default_scope
 from mmengine.runner import load_checkpoint
+from mmengine.device import is_musa_available
 
 from mmdet3d.registry import DATASETS, MODELS
 from mmdet3d.structures import Box3DMode, Det3DDataSample, get_box_type
@@ -104,7 +105,10 @@ def init_model(config: Union[str, Path, Config],
 
     model.cfg = config  # save the config in the model for convenience
     if device != 'cpu':
-        torch.cuda.set_device(device)
+        if is_musa_available():
+            torch.musa.set_device(device)
+        else:
+            torch.cuda.set_device(device)
     else:
         warnings.warn('Don\'t suggest using CPU device. '
                       'Some functions are not supported for now.')
diff --git a/mmdet3d/evaluation/metrics/nuscenes_metric.py b/mmdet3d/evaluation/metrics/nuscenes_metric.py
index e30c0bcd96..7b17cb111f 100644
--- a/mmdet3d/evaluation/metrics/nuscenes_metric.py
+++ b/mmdet3d/evaluation/metrics/nuscenes_metric.py
@@ -10,6 +10,7 @@
 from mmengine import Config, load
 from mmengine.evaluator import BaseMetric
 from mmengine.logging import MMLogger
+from mmengine.device import is_musa_available
 from nuscenes.eval.detection.config import config_factory
 from nuscenes.eval.detection.data_classes import DetectionConfig
 from nuscenes.utils.data_classes import Box as NuScenesBox
@@ -777,11 +778,19 @@ def nusc_box_to_cam_box3d(
     dims[:, [0, 1, 2]] = dims[:, [1, 2, 0]]
     rots = -rots
 
-    boxes_3d = torch.cat([locs, dims, rots, velocity], dim=1).cuda()
-    cam_boxes3d = CameraInstance3DBoxes(
-        boxes_3d, box_dim=9, origin=(0.5, 0.5, 0.5))
-    scores = torch.Tensor([b.score for b in boxes]).cuda()
-    labels = torch.LongTensor([b.label for b in boxes]).cuda()
+    if is_musa_available():
+        boxes_3d = torch.cat([locs, dims, rots, velocity], dim=1).musa()
+        cam_boxes3d = CameraInstance3DBoxes(
+            boxes_3d, box_dim=9, origin=(0.5, 0.5, 0.5))
+        scores = torch.Tensor([b.score for b in boxes]).musa()
+        labels = torch.LongTensor([b.label for b in boxes]).musa()
+    else:
+        boxes_3d = torch.cat([locs, dims, rots, velocity], dim=1).cuda()
+        cam_boxes3d = CameraInstance3DBoxes(
+            boxes_3d, box_dim=9, origin=(0.5, 0.5, 0.5))
+        scores = torch.Tensor([b.score for b in boxes]).cuda()
+        labels = torch.LongTensor([b.label for b in boxes]).cuda()
+
     nms_scores = scores.new_zeros(scores.shape[0], 10 + 1)
     indices = labels.new_tensor(list(range(scores.shape[0])))
     nms_scores[indices, labels] = scores
diff --git a/mmdet3d/models/data_preprocessors/data_preprocessor.py b/mmdet3d/models/data_preprocessors/data_preprocessor.py
index 05c8452a0c..54b4b42fb0 100644
--- a/mmdet3d/models/data_preprocessors/data_preprocessor.py
+++ b/mmdet3d/models/data_preprocessors/data_preprocessor.py
@@ -9,6 +9,7 @@
 from mmdet.models.utils.misc import samplelist_boxtype2tensor
 from mmengine.model import stack_batch
 from mmengine.utils import is_seq_of
+from mmengine.device import is_musa_available
 from torch import Tensor
 from torch.nn import functional as F
 
@@ -434,12 +435,18 @@ def voxelize(self, points: List[Tensor],
                 res_coors_numpy = res_coors.cpu().numpy()
                 inds, point2voxel_map = self.sparse_quantize(
                     res_coors_numpy, return_index=True, return_inverse=True)
-                point2voxel_map = torch.from_numpy(point2voxel_map).cuda()
+                if is_musa_available():
+                    point2voxel_map = torch.from_numpy(point2voxel_map).musa()
+                else:
+                    point2voxel_map = torch.from_numpy(point2voxel_map).cuda()
                 if self.training and self.max_voxels is not None:
                     if len(inds) > self.max_voxels:
                         inds = np.random.choice(
                             inds, self.max_voxels, replace=False)
-                inds = torch.from_numpy(inds).cuda()
+                if is_musa_available():
+                    inds = torch.from_numpy(inds).musa()
+                else:
+                    inds = torch.from_numpy(inds).cuda()
                 if hasattr(data_sample.gt_pts_seg, 'pts_semantic_mask'):
                     data_sample.gt_pts_seg.voxel_semantic_mask \
                         = data_sample.gt_pts_seg.pts_semantic_mask[inds]
diff --git a/mmdet3d/structures/bbox_3d/base_box3d.py b/mmdet3d/structures/bbox_3d/base_box3d.py
index 7fb703c731..5e88b69bb0 100644
--- a/mmdet3d/structures/bbox_3d/base_box3d.py
+++ b/mmdet3d/structures/bbox_3d/base_box3d.py
@@ -457,6 +457,18 @@ def cuda(self, *args, **kwargs) -> 'BaseInstance3DBoxes':
             box_dim=self.box_dim,
             with_yaw=self.with_yaw)
 
+    def musa(self, *args, **kwargs) -> 'BaseInstance3DBoxes':
+        """Convert current boxes to cuda device.
+
+        Returns:
+            :obj:`BaseInstance3DBoxes`: A new boxes object on the cuda device.
+        """
+        original_type = type(self)
+        return original_type(
+            self.tensor.musa(*args, **kwargs),
+            box_dim=self.box_dim,
+            with_yaw=self.with_yaw)
+
     def clone(self) -> 'BaseInstance3DBoxes':
         """Clone the boxes.
 
diff --git a/mmdet3d/structures/point_data.py b/mmdet3d/structures/point_data.py
index f12d4c8692..b232910679 100644
--- a/mmdet3d/structures/point_data.py
+++ b/mmdet3d/structures/point_data.py
@@ -5,11 +5,21 @@
 import numpy as np
 import torch
 from mmengine.structures import BaseDataElement
+from mmengine.device import is_musa_available
 
 IndexType = Union[str, slice, int, list, torch.LongTensor,
                   torch.cuda.LongTensor, torch.BoolTensor,
                   torch.cuda.BoolTensor, np.ndarray]
 
+TensorType = (torch.BoolTensor, torch.cuda.BoolTensor)
+
+if is_musa_available():
+    IndexType = Union[str, slice, int, list, torch.LongTensor,
+                    torch.cuda.LongTensor, torch.BoolTensor,
+                    torch.cuda.BoolTensor, np.ndarray, torch.musa.LongTensor, torch.musa.BoolTensor]
+
+    TensorType = (torch.BoolTensor, torch.cuda.BoolTensor, torch.musa.BoolTensor)
+
 
 class PointData(BaseDataElement):
     """Data structure for point-level annotations or predictions.
@@ -85,9 +95,7 @@ def __getitem__(self, item: IndexType) -> 'PointData':
             # Mode details in https://github.com/numpy/numpy/issues/9464
             item = item.astype(np.int64) if item.dtype == np.int32 else item
             item = torch.from_numpy(item)
-        assert isinstance(
-            item, (str, slice, int, torch.LongTensor, torch.cuda.LongTensor,
-                   torch.BoolTensor, torch.cuda.BoolTensor))
+        assert isinstance(item, IndexType)
 
         if isinstance(item, str):
             return getattr(self, item)
@@ -103,7 +111,7 @@ def __getitem__(self, item: IndexType) -> 'PointData':
         if isinstance(item, torch.Tensor):
             assert item.dim() == 1, 'Only support to get the' \
                                     ' values along the first dimension.'
-            if isinstance(item, (torch.BoolTensor, torch.cuda.BoolTensor)):
+            if isinstance(item, TensorType):
                 assert len(item) == len(self), 'The shape of the ' \
                                                'input(BoolTensor) ' \
                                                f'{len(item)} ' \
@@ -122,8 +130,7 @@ def __getitem__(self, item: IndexType) -> 'PointData':
                         v, (str, list, tuple)) or (hasattr(v, '__getitem__')
                                                    and hasattr(v, 'cat')):
                     # convert to indexes from BoolTensor
-                    if isinstance(item,
-                                  (torch.BoolTensor, torch.cuda.BoolTensor)):
+                    if isinstance(item, TensorType):
                         indexes = torch.nonzero(item).view(
                             -1).cpu().numpy().tolist()
                     else:
diff --git a/mmdet3d/structures/points/base_points.py b/mmdet3d/structures/points/base_points.py
index 4cb54ce895..fdde9613b6 100644
--- a/mmdet3d/structures/points/base_points.py
+++ b/mmdet3d/structures/points/base_points.py
@@ -461,6 +461,18 @@ def cuda(self, *args, **kwargs) -> 'BasePoints':
             points_dim=self.points_dim,
             attribute_dims=self.attribute_dims)
 
+    def musa(self, *args, **kwargs) -> 'BasePoints':
+        """Convert current points to cuda device.
+
+        Returns:
+            :obj:`BasePoints`: A new points object on the cuda device.
+        """
+        original_type = type(self)
+        return original_type(
+            self.tensor.musa(*args, **kwargs),
+            points_dim=self.points_dim,
+            attribute_dims=self.attribute_dims)
+
     def clone(self) -> 'BasePoints':
         """Clone the points.
 
diff --git a/mmdet3d/testing/model_utils.py b/mmdet3d/testing/model_utils.py
index da449398d6..5e8d900a31 100644
--- a/mmdet3d/testing/model_utils.py
+++ b/mmdet3d/testing/model_utils.py
@@ -6,18 +6,34 @@
 import numpy as np
 import torch
 from mmengine.structures import InstanceData
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
                                 Det3DDataSample, LiDARInstance3DBoxes,
                                 PointData)
 
 
+AVAILABLE_DEVICES = []
+
+if is_cuda_available():
+    AVAILABLE_DEVICES.append(("cuda", "cuda"))
+
+if is_musa_available():
+    AVAILABLE_DEVICES.append(("musa", "musa"))
+
+
 def setup_seed(seed):
     torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
+    if is_musa_available():
+        torch.musa.manual_seed_all(seed)
+    else:
+        torch.cuda.manual_seed_all(seed)
     np.random.seed(seed)
     random.seed(seed)
-    torch.backends.cudnn.deterministic = True
+    if is_musa_available():
+        torch.backends.mudnn.deterministic = True
+    else:
+        torch.backends.cudnn.deterministic = True
 
 
 def _get_config_directory():
diff --git a/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py b/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py
index f4fc771606..305b5400e4 100644
--- a/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_lidar_det3d_inferencer.py
@@ -7,6 +7,7 @@
 import numpy as np
 import torch
 from mmengine.utils import is_list_of
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.apis import LidarDet3DInferencer
 from mmdet3d.structures import Det3DDataSample
@@ -41,7 +42,7 @@ def assert_predictions_equal(self, preds1, preds2):
                     np.allclose(pred1['labels_3d'], pred2['labels_3d']))
 
     def test_call(self):
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             return
         # single point cloud
         inputs = dict(points='tests/data/kitti/training/velodyne/000000.bin')
@@ -84,7 +85,7 @@ def test_call(self):
         self.assertIn('predictions', res_bs2)
 
     def test_visualize(self):
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             return
         inputs = dict(points='tests/data/kitti/training/velodyne/000000.bin'),
         # img_out_dir
@@ -95,7 +96,7 @@ def test_visualize(self):
             # self.assertTrue(osp.exists(osp.join(tmp_dir, '000000.png')))
 
     def test_postprocess(self):
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             return
         # return_datasample
         inputs = dict(points='tests/data/kitti/training/velodyne/000000.bin')
diff --git a/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py b/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py
index 9bbdb037a1..89c5b5ce72 100644
--- a/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_lidar_seg3d_inferencer.py
@@ -9,6 +9,7 @@
 import pytest
 import torch
 from mmengine.utils import is_list_of
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.apis import LidarSeg3DInferencer
 from mmdet3d.structures import Det3DDataSample
@@ -36,7 +37,9 @@ def assert_predictions_equal(self, preds1, preds2):
                             pred2['pts_semantic_mask']))
 
     @pytest.mark.skipif(
-        not torch.cuda.is_available(), reason='requires CUDA support')
+        not is_cuda_available(), reason='requires CUDA support')
+    @pytest.mark.skipif(
+        not is_musa_available(), reason='requires MUSA support')
     @pytest.mark.skipif(
         'DISPLAY' not in os.environ, reason='requires DISPLAY device')
     def test_call(self):
@@ -84,7 +87,9 @@ def test_call(self):
         self.assertIn('predictions', res_bs2)
 
     @pytest.mark.skipif(
-        not torch.cuda.is_available(), reason='requires CUDA support')
+        not is_cuda_available(), reason='requires CUDA support')
+    @pytest.mark.skipif(
+        not is_musa_available(), reason='requires MUSA support')
     @pytest.mark.skipif(
         'DISPLAY' not in os.environ, reason='requires DISPLAY device')
     def test_visualizer(self):
@@ -94,7 +99,7 @@ def test_visualizer(self):
             self.inferencer(inputs, out_dir=tmp_dir)
 
     def test_post_processor(self):
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             return
         # return_datasample
         inputs = dict(points='tests/data/s3dis/points/Area_1_office_2.bin')
diff --git a/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py b/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py
index c51f7c764b..e73e78b521 100644
--- a/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_multi_modality_det3d_inferencer.py
@@ -8,6 +8,7 @@
 import numpy as np
 import torch
 from mmengine.utils import is_list_of
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.apis import MultiModalityDet3DInferencer
 from mmdet3d.structures import Det3DDataSample
@@ -42,7 +43,7 @@ def assert_predictions_equal(self, preds1, preds2):
                     np.allclose(pred1['labels_3d'], pred2['labels_3d']))
 
     def test_call(self):
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             return
         infos_path = 'demo/data/kitti/000008.pkl'
         points_path = 'demo/data/kitti/000008.bin'
@@ -86,7 +87,7 @@ def test_call(self):
         self.assertIn('visualization', res_ndarray)
 
     def test_visualize(self):
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             return
         inputs = dict(
             points='demo/data/kitti/000008.bin',
@@ -100,7 +101,7 @@ def test_visualize(self):
             # self.assertTrue(osp.exists(osp.join(tmp_dir, '000000.png')))
 
     def test_postprocess(self):
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             return
         # return_datasample
         infos_path = 'demo/data/kitti/000008.pkl'
diff --git a/tests/test_evaluation/test_functional/test_kitti_eval.py b/tests/test_evaluation/test_functional/test_kitti_eval.py
index f8608af794..888c9cac41 100644
--- a/tests/test_evaluation/test_functional/test_kitti_eval.py
+++ b/tests/test_evaluation/test_functional/test_kitti_eval.py
@@ -2,13 +2,14 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.evaluation import do_eval, eval_class, kitti_eval
 
 
 def test_do_eval():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and CUDA')
+    if not is_cuda_available() and not is_musa_available():
+        pytest.skip('test requires GPU and CUDA or MUSA')
     gt_name = np.array(
         ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare'])
     gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.])
@@ -128,8 +129,8 @@ def test_do_eval():
 
 
 def test_kitti_eval():
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and CUDA')
+    if not is_cuda_available() and not is_musa_available():
+        pytest.skip('test requires GPU and CUDA or MUSA')
     gt_name = np.array(
         ['Pedestrian', 'Cyclist', 'Car', 'Car', 'Car', 'DontCare', 'DontCare'])
     gt_truncated = np.array([0., 0., 0., -1., -1., -1., -1.])
diff --git a/tests/test_evaluation/test_functional/test_panoptic_seg_eval.py b/tests/test_evaluation/test_functional/test_panoptic_seg_eval.py
index d67abe6be7..78a8700604 100644
--- a/tests/test_evaluation/test_functional/test_panoptic_seg_eval.py
+++ b/tests/test_evaluation/test_functional/test_panoptic_seg_eval.py
@@ -2,12 +2,13 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.evaluation.functional.panoptic_seg_eval import panoptic_seg_eval
 
 
 def test_panoptic_seg_eval():
-    if not torch.cuda.is_available():
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
 
     classes = ['unlabeled', 'person', 'dog', 'grass', 'sky']
diff --git a/tests/test_evaluation/test_functional/test_seg_eval.py b/tests/test_evaluation/test_functional/test_seg_eval.py
index d2b44854aa..7f3bddb24d 100644
--- a/tests/test_evaluation/test_functional/test_seg_eval.py
+++ b/tests/test_evaluation/test_functional/test_seg_eval.py
@@ -2,12 +2,12 @@
 import numpy as np
 import pytest
 import torch
-
+from mmengine.device import is_cuda_available, is_musa_available
 from mmdet3d.evaluation.functional.seg_eval import seg_eval
 
 
 def test_indoor_eval():
-    if not torch.cuda.is_available():
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     seg_preds = [
         np.array([
diff --git a/tests/test_evaluation/test_metrics/test_kitti_metric.py b/tests/test_evaluation/test_metrics/test_kitti_metric.py
index 070314150b..7f5a885b86 100644
--- a/tests/test_evaluation/test_metrics/test_kitti_metric.py
+++ b/tests/test_evaluation/test_metrics/test_kitti_metric.py
@@ -2,6 +2,7 @@
 import pytest
 import torch
 from mmengine.structures import InstanceData
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.evaluation.metrics import KittiMetric
 from mmdet3d.structures import Det3DDataSample, LiDARInstance3DBoxes
@@ -49,7 +50,7 @@ def _init_multi_modal_evaluate_input():
 
 
 def test_multi_modal_kitti_metric():
-    if not torch.cuda.is_available():
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     kittimetric = KittiMetric(
         data_root + '/kitti_infos_train.pkl', metric=['mAP'])
@@ -72,7 +73,7 @@ def test_multi_modal_kitti_metric():
 
 
 def test_kitti_metric_mAP():
-    if not torch.cuda.is_available():
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     kittimetric = KittiMetric(
         data_root + '/kitti_infos_train.pkl', metric=['mAP'])
diff --git a/tests/test_models/test_backbones/test_cylinder3d_backbone.py b/tests/test_models/test_backbones/test_cylinder3d_backbone.py
index ea6b3e7ba2..9c7992cc9f 100644
--- a/tests/test_models/test_backbones/test_cylinder3d_backbone.py
+++ b/tests/test_models/test_backbones/test_cylinder3d_backbone.py
@@ -1,12 +1,30 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_cylinder3d():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_cylinder3d(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     cfg = dict(
         type='Asymm3DSpconv',
@@ -15,15 +33,15 @@ def test_cylinder3d():
         base_channels=32,
         norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.1))
     self = MODELS.build(cfg)
-    self.cuda()
+    self.to(device)
 
     batch_size = 1
     coorx = torch.randint(0, 48, (50, 1))
     coory = torch.randint(0, 36, (50, 1))
     coorz = torch.randint(0, 4, (50, 1))
     coorbatch = torch.zeros(50, 1)
-    coors = torch.cat([coorbatch, coorx, coory, coorz], dim=1).cuda()
-    voxel_features = torch.rand(50, 16).cuda()
+    coors = torch.cat([coorbatch, coorx, coory, coorz], dim=1).to(device)
+    voxel_features = torch.rand(50, 16).to(device)
 
     # test forward
     feature = self(voxel_features, coors, batch_size)
diff --git a/tests/test_models/test_backbones/test_dgcnn.py b/tests/test_models/test_backbones/test_dgcnn.py
index 27d7cffb52..5cbe60ff04 100644
--- a/tests/test_models/test_backbones/test_dgcnn.py
+++ b/tests/test_models/test_backbones/test_dgcnn.py
@@ -2,12 +2,30 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_dgcnn_gf():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_dgcnn_gf(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
 
     # DGCNNGF used in segmentation
@@ -22,10 +40,10 @@ def test_dgcnn_gf():
         act_cfg=dict(type='ReLU'))
 
     self = MODELS.build(cfg)
-    self.cuda()
+    self.to(device)
 
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32)
-    xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda()  # (B, N, 6)
+    xyz = torch.from_numpy(xyz).view(1, -1, 6).to(device)  # (B, N, 6)
     # test forward
     ret_dict = self(xyz)
     gf_points = ret_dict['gf_points']
diff --git a/tests/test_models/test_backbones/test_mink_resnet.py b/tests/test_models/test_backbones/test_mink_resnet.py
index 04552b7272..be7f7890a7 100644
--- a/tests/test_models/test_backbones/test_mink_resnet.py
+++ b/tests/test_models/test_backbones/test_mink_resnet.py
@@ -2,12 +2,30 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_mink_resnet():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_mink_resnet(device):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
 
     try:
@@ -20,9 +38,9 @@ def test_mink_resnet():
     # batch of 2 point clouds
     for i in range(2):
         c = torch.from_numpy(np.random.rand(500, 3) * 100)
-        coordinates.append(c.float().cuda())
+        coordinates.append(c.float().to(device))
         f = torch.from_numpy(np.random.rand(500, 3))
-        features.append(f.float().cuda())
+        features.append(f.float().to(device))
     tensor_coordinates, tensor_features = ME.utils.sparse_collate(
         coordinates, features)
     x = ME.SparseTensor(
@@ -30,7 +48,7 @@ def test_mink_resnet():
 
     # MinkResNet34 with 4 outputs
     cfg = dict(type='MinkResNet', depth=34, in_channels=3)
-    self = MODELS.build(cfg).cuda()
+    self = MODELS.build(cfg).to(device)
     self.init_weights()
 
     y = self(x)
@@ -47,7 +65,7 @@ def test_mink_resnet():
     # MinkResNet50 with 2 outputs
     cfg = dict(
         type='MinkResNet', depth=34, in_channels=3, num_stages=2, pool=False)
-    self = MODELS.build(cfg).cuda()
+    self = MODELS.build(cfg).to(device)
     self.init_weights()
 
     y = self(x)
diff --git a/tests/test_models/test_backbones/test_minkunet_backbone.py b/tests/test_models/test_backbones/test_minkunet_backbone.py
index 180bcbd23b..c359c5d906 100644
--- a/tests/test_models/test_backbones/test_minkunet_backbone.py
+++ b/tests/test_models/test_backbones/test_minkunet_backbone.py
@@ -2,12 +2,30 @@
 import pytest
 import torch
 import torch.nn.functional as F
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_minkunet_backbone():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_minkunet_backbone(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
 
     try:
@@ -22,11 +40,11 @@ def test_minkunet_backbone():
         coordinates.append(c)
         f = torch.rand(100, 4)
         features.append(f)
-    features = torch.cat(features, dim=0).cuda()
-    coordinates = torch.cat(coordinates, dim=0).cuda()
+    features = torch.cat(features, dim=0).to(device)
+    coordinates = torch.cat(coordinates, dim=0).to(device)
 
     cfg = dict(type='MinkUNetBackbone')
-    self = MODELS.build(cfg).cuda()
+    self = MODELS.build(cfg).to(device)
     self.init_weights()
 
     y = self(features, coordinates)
diff --git a/tests/test_models/test_backbones/test_multi_backbone.py b/tests/test_models/test_backbones/test_multi_backbone.py
index 02186fa243..72004b1033 100644
--- a/tests/test_models/test_backbones/test_multi_backbone.py
+++ b/tests/test_models/test_backbones/test_multi_backbone.py
@@ -2,12 +2,30 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_multi_backbone():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_multi_backbone(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
 
     # test list config
@@ -59,12 +77,12 @@ def test_multi_backbone():
         ])
 
     self = MODELS.build(cfg_list)
-    self.cuda()
+    self.to(device)
 
     assert len(self.backbone_list) == 4
 
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32)
-    xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda()  # (B, N, 6)
+    xyz = torch.from_numpy(xyz).view(1, -1, 6).to(device)  # (B, N, 6)
     # test forward
     ret_dict = self(xyz[:, :, :4])
 
@@ -90,7 +108,7 @@ def test_multi_backbone():
             norm_cfg=dict(type='BN2d')))
 
     self = MODELS.build(cfg_dict)
-    self.cuda()
+    self.to(device)
 
     assert len(self.backbone_list) == 2
 
diff --git a/tests/test_models/test_backbones/test_pointnet2_sa_msg.py b/tests/test_models/test_backbones/test_pointnet2_sa_msg.py
index c8e87506e2..5036839a15 100644
--- a/tests/test_models/test_backbones/test_pointnet2_sa_msg.py
+++ b/tests/test_models/test_backbones/test_pointnet2_sa_msg.py
@@ -2,12 +2,30 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_pointnet2_sa_msg():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_pointnet2_sa_msg(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
 
     # PN2MSG used in 3DSSD
@@ -31,14 +49,14 @@ def test_pointnet2_sa_msg():
             normalize_xyz=False))
 
     self = MODELS.build(cfg)
-    self.cuda()
+    self.to(device)
     assert self.SA_modules[0].mlps[0].layer0.conv.in_channels == 4
     assert self.SA_modules[0].mlps[0].layer0.conv.out_channels == 8
     assert self.SA_modules[0].mlps[1].layer1.conv.out_channels == 8
     assert self.SA_modules[2].mlps[2].layer2.conv.out_channels == 64
 
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32)
-    xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda()  # (B, N, 6)
+    xyz = torch.from_numpy(xyz).view(1, -1, 6).to(device)  # (B, N, 6)
     # test forward
     ret_dict = self(xyz[:, :, :4])
     sa_xyz = ret_dict['sa_xyz'][-1]
@@ -96,7 +114,7 @@ def test_pointnet2_sa_msg():
             normalize_xyz=False))
 
     self = MODELS.build(cfg)
-    self.cuda()
+    self.to(device)
     ret_dict = self(xyz)
     sa_xyz = ret_dict['sa_xyz']
     sa_features = ret_dict['sa_features']
diff --git a/tests/test_models/test_backbones/test_pointnet2_sa_ssg.py b/tests/test_models/test_backbones/test_pointnet2_sa_ssg.py
index cd4d993566..c9569cd2fa 100644
--- a/tests/test_models/test_backbones/test_pointnet2_sa_ssg.py
+++ b/tests/test_models/test_backbones/test_pointnet2_sa_ssg.py
@@ -2,12 +2,30 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_pointnet2_sa_ssg():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_pointnet2_sa_ssg(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
 
     cfg = dict(
@@ -19,7 +37,7 @@ def test_pointnet2_sa_ssg():
         sa_channels=((8, 16), (16, 16)),
         fp_channels=((16, 16), (16, 16)))
     self = MODELS.build(cfg)
-    self.cuda()
+    self.to(device)
     assert self.SA_modules[0].mlps[0].layer0.conv.in_channels == 6
     assert self.SA_modules[0].mlps[0].layer0.conv.out_channels == 8
     assert self.SA_modules[0].mlps[0].layer1.conv.out_channels == 16
@@ -29,7 +47,7 @@ def test_pointnet2_sa_ssg():
     assert self.FP_modules[1].mlps.layer0.conv.in_channels == 19
 
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', dtype=np.float32)
-    xyz = torch.from_numpy(xyz).view(1, -1, 6).cuda()  # (B, N, 6)
+    xyz = torch.from_numpy(xyz).view(1, -1, 6).to(device)  # (B, N, 6)
     # test forward
     ret_dict = self(xyz)
     fp_xyz = ret_dict['fp_xyz']
@@ -62,7 +80,7 @@ def test_pointnet2_sa_ssg():
     # test only xyz input without features
     cfg['in_channels'] = 3
     self = MODELS.build(cfg)
-    self.cuda()
+    self.to(device)
     ret_dict = self(xyz[..., :3])
     assert len(fp_xyz) == len(fp_features) == len(fp_indices) == 3
     assert len(sa_xyz) == len(sa_features) == len(sa_indices) == 3
diff --git a/tests/test_models/test_backbones/test_spvcnn_backbone.py b/tests/test_models/test_backbones/test_spvcnn_backbone.py
index 504f2cc9fe..7d1fe36b70 100644
--- a/tests/test_models/test_backbones/test_spvcnn_backbone.py
+++ b/tests/test_models/test_backbones/test_spvcnn_backbone.py
@@ -2,12 +2,30 @@
 import pytest
 import torch
 import torch.nn.functional as F
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_spvcnn_backbone():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_spvcnn_backbone(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
 
     try:
@@ -22,11 +40,11 @@ def test_spvcnn_backbone():
         coordinates.append(c)
         f = torch.rand(100, 4)
         features.append(f)
-    features = torch.cat(features, dim=0).cuda()
-    coordinates = torch.cat(coordinates, dim=0).cuda()
+    features = torch.cat(features, dim=0).to(device)
+    coordinates = torch.cat(coordinates, dim=0).to(device)
 
     cfg = dict(type='SPVCNNBackbone')
-    self = MODELS.build(cfg).cuda()
+    self = MODELS.build(cfg).to(device)
     self.init_weights()
 
     y = self(features, coordinates)
diff --git a/tests/test_models/test_data_preprocessors/test_data_preprocessor.py b/tests/test_models/test_data_preprocessors/test_data_preprocessor.py
index 3db374ca00..df0869d2e1 100644
--- a/tests/test_models/test_data_preprocessors/test_data_preprocessor.py
+++ b/tests/test_models/test_data_preprocessors/test_data_preprocessor.py
@@ -1,11 +1,15 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.models.data_preprocessors import Det3DDataPreprocessor
 from mmdet3d.structures import Det3DDataSample, PointData
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestDet3DDataPreprocessor(TestCase):
@@ -30,7 +34,13 @@ def test_init(self):
         with self.assertRaises(AssertionError):
             Det3DDataPreprocessor(bgr_to_rgb=True, rgb_to_bgr=True)
 
-    def test_forward(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_forward(self, _, device: str):
         processor = Det3DDataPreprocessor(mean=[0, 0, 0], std=[1, 1, 1])
 
         points = torch.randn((5000, 3))
@@ -98,7 +108,7 @@ def test_forward(self):
             self.assertEqual(data_sample.pad_shape, expected_shape)
 
         # test cylindrical voxelization
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             pytest.skip('test requires GPU and CUDA')
         point_cloud_range = [0, -180, -4, 50, 180, 2]
         grid_shape = [480, 360, 32]
@@ -109,7 +119,7 @@ def test_forward(self):
             max_voxels=-1)
         processor = Det3DDataPreprocessor(
             voxel=True, voxel_type='cylindrical',
-            voxel_layer=voxel_layer).cuda()
+            voxel_layer=voxel_layer).to(device)
         num_points = 5000
         xy = torch.rand(num_points, 2) * 140 - 70
         z = torch.rand(num_points, 1) * 9 - 6
diff --git a/tests/test_models/test_decode_heads/test_cylinder3d_head.py b/tests/test_models/test_decode_heads/test_cylinder3d_head.py
index c8fae827e8..f0cb407c0a 100644
--- a/tests/test_models/test_decode_heads/test_cylinder3d_head.py
+++ b/tests/test_models/test_decode_heads/test_cylinder3d_head.py
@@ -1,19 +1,29 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import pytest
 import torch
 from mmcv.ops import SparseConvTensor
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.models.decode_heads import Cylinder3DHead
 from mmdet3d.structures import Det3DDataSample, PointData
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestCylinder3DHead(TestCase):
 
-    def test_cylinder3d_head_loss(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_cylinder3d_head_loss(self, _, device: str):
         """Tests Cylinder3D head loss."""
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             pytest.skip('test requires GPU and torch+cuda')
         cylinder3d_head = Cylinder3DHead(
             channels=128,
@@ -25,13 +35,13 @@ def test_cylinder3d_head_loss(self):
                 loss_weight=1.0),
             loss_lovasz=dict(
                 type='LovaszLoss', loss_weight=1.0, reduction='none'),
-        ).cuda()
+        ).to(device)
 
-        voxel_feats = torch.rand(50, 128).cuda()
-        coorx = torch.randint(0, 480, (50, 1)).int().cuda()
-        coory = torch.randint(0, 360, (50, 1)).int().cuda()
-        coorz = torch.randint(0, 32, (50, 1)).int().cuda()
-        coorbatch0 = torch.zeros(50, 1).int().cuda()
+        voxel_feats = torch.rand(50, 128).to(device)
+        coorx = torch.randint(0, 480, (50, 1)).int().to(device)
+        coory = torch.randint(0, 360, (50, 1)).int().to(device)
+        coorz = torch.randint(0, 32, (50, 1)).int().to(device)
+        coorbatch0 = torch.zeros(50, 1).int().to(device)
         coors = torch.cat([coorbatch0, coorx, coory, coorz], dim=1)
         grid_size = [480, 360, 32]
         batch_size = 1
@@ -45,7 +55,7 @@ def test_cylinder3d_head_loss(self):
 
         # When truth is non-empty then losses
         # should be nonzero for random inputs
-        voxel_semantic_mask = torch.randint(0, 20, (50, )).long().cuda()
+        voxel_semantic_mask = torch.randint(0, 20, (50, )).long().to(device)
         gt_pts_seg = PointData(voxel_semantic_mask=voxel_semantic_mask)
 
         datasample = Det3DDataSample()
@@ -60,7 +70,7 @@ def test_cylinder3d_head_loss(self):
         self.assertGreater(loss_lovasz, 0, 'lovasz loss should be positive')
 
         batch_inputs_dict = dict(voxels=dict(voxel_coors=coors))
-        datasample.point2voxel_map = torch.randint(0, 50, (100, )).int().cuda()
+        datasample.point2voxel_map = torch.randint(0, 50, (100, )).int().to(device)
         point_logits = cylinder3d_head.predict(sparse_voxels,
                                                batch_inputs_dict, [datasample])
         assert point_logits[0].shape == torch.Size([100, 20])
diff --git a/tests/test_models/test_decode_heads/test_minkunet_head.py b/tests/test_models/test_decode_heads/test_minkunet_head.py
index c684565ded..1ca726fca4 100644
--- a/tests/test_models/test_decode_heads/test_minkunet_head.py
+++ b/tests/test_models/test_decode_heads/test_minkunet_head.py
@@ -1,27 +1,37 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import pytest
 import torch
 import torch.nn.functional as F
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.models.decode_heads import MinkUNetHead
 from mmdet3d.structures import Det3DDataSample, PointData
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestMinkUNetHead(TestCase):
 
-    def test_minkunet_head_loss(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_minkunet_head_loss(self, _, device: str):
         """Tests PAConv head loss."""
 
         try:
             import torchsparse
         except ImportError:
             pytest.skip('test requires Torchsparse installation')
-        if torch.cuda.is_available():
+        if i:
             minkunet_head = MinkUNetHead(channels=4, num_classes=19)
 
-            minkunet_head.cuda()
+            minkunet_head.to(device)
             coordinates, features = [], []
             for i in range(2):
                 c = torch.randint(0, 10, (100, 3)).int()
@@ -29,8 +39,8 @@ def test_minkunet_head_loss(self):
                 coordinates.append(c)
                 f = torch.rand(100, 4)
                 features.append(f)
-            features = torch.cat(features, dim=0).cuda()
-            coordinates = torch.cat(coordinates, dim=0).cuda()
+            features = torch.cat(features, dim=0).to(device)
+            coordinates = torch.cat(coordinates, dim=0).to(device)
             x = torchsparse.SparseTensor(feats=features, coords=coordinates)
 
             # Test forward
@@ -40,7 +50,7 @@ def test_minkunet_head_loss(self):
 
             # When truth is non-empty then losses
             # should be nonzero for random inputs
-            voxel_semantic_mask = torch.randint(0, 19, (100, )).long().cuda()
+            voxel_semantic_mask = torch.randint(0, 19, (100, )).long().to(device)
             gt_pts_seg = PointData(voxel_semantic_mask=voxel_semantic_mask)
 
             datasample = Det3DDataSample()
diff --git a/tests/test_models/test_decode_heads/test_paconv_head.py b/tests/test_models/test_decode_heads/test_paconv_head.py
index 92286b6717..c57a4effaa 100644
--- a/tests/test_models/test_decode_heads/test_paconv_head.py
+++ b/tests/test_models/test_decode_heads/test_paconv_head.py
@@ -1,18 +1,28 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.models.decode_heads import PAConvHead
 from mmdet3d.structures import Det3DDataSample, PointData
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestPAConvHead(TestCase):
 
-    def test_paconv_head_loss(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_paconv_head_loss(self, _, device: str):
         """Tests PAConv head loss."""
 
-        if torch.cuda.is_available():
+        if is_cuda_available() or is_musa_available():
             paconv_head = PAConvHead(
                 fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
                              (128 + 6, 128, 128, 128)),
@@ -29,21 +39,21 @@ def test_paconv_head_loss(self):
                     loss_weight=1.0),
                 ignore_index=20)
 
-            paconv_head.cuda()
+            paconv_head.to(device)
             # PAConv head expects dict format features
             sa_xyz = [
-                torch.rand(1, 4096, 3).float().cuda(),
-                torch.rand(1, 1024, 3).float().cuda(),
-                torch.rand(1, 256, 3).float().cuda(),
-                torch.rand(1, 64, 3).float().cuda(),
-                torch.rand(1, 16, 3).float().cuda(),
+                torch.rand(1, 4096, 3).float().to(device),
+                torch.rand(1, 1024, 3).float().to(device),
+                torch.rand(1, 256, 3).float().to(device),
+                torch.rand(1, 64, 3).float().to(device),
+                torch.rand(1, 16, 3).float().to(device),
             ]
             sa_features = [
-                torch.rand(1, 6, 4096).float().cuda(),
-                torch.rand(1, 64, 1024).float().cuda(),
-                torch.rand(1, 128, 256).float().cuda(),
-                torch.rand(1, 256, 64).float().cuda(),
-                torch.rand(1, 512, 16).float().cuda(),
+                torch.rand(1, 6, 4096).float().to(device),
+                torch.rand(1, 64, 1024).float().to(device),
+                torch.rand(1, 128, 256).float().to(device),
+                torch.rand(1, 256, 64).float().to(device),
+                torch.rand(1, 512, 16).float().to(device),
             ]
             feat_dict = dict(sa_xyz=sa_xyz, sa_features=sa_features)
 
@@ -54,7 +64,7 @@ def test_paconv_head_loss(self):
 
             # When truth is non-empty then losses
             # should be nonzero for random inputs
-            pts_semantic_mask = torch.randint(0, 20, (4096, )).long().cuda()
+            pts_semantic_mask = torch.randint(0, 20, (4096, )).long().to(device)
             gt_pts_seg = PointData(pts_semantic_mask=pts_semantic_mask)
 
             datasample = Det3DDataSample()
diff --git a/tests/test_models/test_decode_heads/test_pointnet2_head.py b/tests/test_models/test_decode_heads/test_pointnet2_head.py
index c10ae1f97d..2300f9131e 100644
--- a/tests/test_models/test_decode_heads/test_pointnet2_head.py
+++ b/tests/test_models/test_decode_heads/test_pointnet2_head.py
@@ -1,18 +1,27 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.models.decode_heads import PointNet2Head
 from mmdet3d.structures import Det3DDataSample, PointData
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestPointNet2Head(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
 
-    def test_paconv_head_loss(self):
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_paconv_head_loss(self, _, device: str):
         """Tests PAConv head loss."""
 
-        if torch.cuda.is_available():
+        if is_cuda_available() or is_musa_available():
             pointnet2_head = PointNet2Head(
                 fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
                              (128, 128, 128, 128)),
@@ -29,22 +38,22 @@ def test_paconv_head_loss(self):
                     loss_weight=1.0),
                 ignore_index=20)
 
-            pointnet2_head.cuda()
+            pointnet2_head.to(device)
 
             # DGCNN head expects dict format features
             sa_xyz = [
-                torch.rand(1, 4096, 3).float().cuda(),
-                torch.rand(1, 1024, 3).float().cuda(),
-                torch.rand(1, 256, 3).float().cuda(),
-                torch.rand(1, 64, 3).float().cuda(),
-                torch.rand(1, 16, 3).float().cuda(),
+                torch.rand(1, 4096, 3).float().to(device),
+                torch.rand(1, 1024, 3).float().to(device),
+                torch.rand(1, 256, 3).float().to(device),
+                torch.rand(1, 64, 3).float().to(device),
+                torch.rand(1, 16, 3).float().to(device),
             ]
             sa_features = [
-                torch.rand(1, 6, 4096).float().cuda(),
-                torch.rand(1, 64, 1024).float().cuda(),
-                torch.rand(1, 128, 256).float().cuda(),
-                torch.rand(1, 256, 64).float().cuda(),
-                torch.rand(1, 512, 16).float().cuda(),
+                torch.rand(1, 6, 4096).float().to(device),
+                torch.rand(1, 64, 1024).float().to(device),
+                torch.rand(1, 128, 256).float().to(device),
+                torch.rand(1, 256, 64).float().to(device),
+                torch.rand(1, 512, 16).float().to(device),
             ]
             feat_dict = dict(sa_xyz=sa_xyz, sa_features=sa_features)
 
@@ -55,7 +64,7 @@ def test_paconv_head_loss(self):
 
             # When truth is non-empty then losses
             # should be nonzero for random inputs
-            pts_semantic_mask = torch.randint(0, 20, (4096, )).long().cuda()
+            pts_semantic_mask = torch.randint(0, 20, (4096, )).long().to(device)
             gt_pts_seg = PointData(pts_semantic_mask=pts_semantic_mask)
 
             datasample = Det3DDataSample()
diff --git a/tests/test_models/test_dense_heads/test_fcaf3d_head.py b/tests/test_models/test_dense_heads/test_fcaf3d_head.py
index eec8317598..8e1f68ca45 100755
--- a/tests/test_models/test_dense_heads/test_fcaf3d_head.py
+++ b/tests/test_models/test_dense_heads/test_fcaf3d_head.py
@@ -1,19 +1,29 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d import *  # noqa
 from mmdet3d.models.dense_heads import FCAF3DHead
 from mmdet3d.testing import create_detector_inputs
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestFCAF3DHead(TestCase):
 
-    def test_fcaf3d_head_loss(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_fcaf3d_head_loss(self, _, device: str):
         """Test fcaf3d head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             pytest.skip('test requires GPU and torch+cuda')
 
         try:
@@ -36,24 +46,24 @@ def test_fcaf3d_head_loss(self):
             bbox_loss=dict(type='AxisAlignedIoULoss'),
             cls_loss=dict(type='mmdet.FocalLoss'),
         )
-        fcaf3d_head = fcaf3d_head.cuda()
+        fcaf3d_head = fcaf3d_head.to(device)
 
         # fake input of head
-        coordinates, features = [torch.randn(500, 3).cuda() * 100
-                                 ], [torch.randn(500, 3).cuda()]
+        coordinates, features = [torch.randn(500, 3).to(device) * 100
+                                 ], [torch.randn(500, 3).to(device)]
         tensor_coordinates, tensor_features = ME.utils.sparse_collate(
             coordinates, features)
         x = ME.SparseTensor(
             features=tensor_features, coordinates=tensor_coordinates)
         # backbone
         conv1 = ME.MinkowskiConvolution(
-            3, 64, kernel_size=3, stride=2, dimension=3).cuda()
+            3, 64, kernel_size=3, stride=2, dimension=3).to(device)
         conv2 = ME.MinkowskiConvolution(
-            64, 128, kernel_size=3, stride=2, dimension=3).cuda()
+            64, 128, kernel_size=3, stride=2, dimension=3).to(device)
         conv3 = ME.MinkowskiConvolution(
-            128, 256, kernel_size=3, stride=2, dimension=3).cuda()
+            128, 256, kernel_size=3, stride=2, dimension=3).to(device)
         conv4 = ME.MinkowskiConvolution(
-            256, 512, kernel_size=3, stride=2, dimension=3).cuda()
+            256, 512, kernel_size=3, stride=2, dimension=3).to(device)
 
         # backbone outputs of 4 levels
         x1 = conv1(x)
@@ -71,7 +81,7 @@ def test_fcaf3d_head_loss(self):
             points_feat_dim=6,
             gt_bboxes_dim=6)
         data_samples = [
-            sample.cuda() for sample in packed_inputs['data_samples']
+            sample.to(device) for sample in packed_inputs['data_samples']
         ]
 
         gt_losses = fcaf3d_head.loss(x, data_samples)
diff --git a/tests/test_models/test_dense_heads/test_freeanchors.py b/tests/test_models/test_dense_heads/test_freeanchors.py
index 3d9543481c..fbb0457a16 100644
--- a/tests/test_models/test_dense_heads/test_freeanchors.py
+++ b/tests/test_models/test_dense_heads/test_freeanchors.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestFreeAnchor(unittest.TestCase):
 
-    def test_freeanchor(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_freeanchor(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models.dense_heads, 'FreeAnchor3DHead')
@@ -49,12 +58,15 @@ def test_freeanchor(self):
         # for item in aug_data:
         #     item['data_sample'].set_metainfo(metainfo)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_dense_heads/test_imvoxel_head.py b/tests/test_models/test_dense_heads/test_imvoxel_head.py
index 0d55a78bf6..a782213304 100755
--- a/tests/test_models/test_dense_heads/test_imvoxel_head.py
+++ b/tests/test_models/test_dense_heads/test_imvoxel_head.py
@@ -1,19 +1,29 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d import *  # noqa
 from mmdet3d.models.dense_heads import ImVoxelHead
 from mmdet3d.testing import create_detector_inputs
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestImVoxelHead(TestCase):
 
-    def test_imvoxel_head_loss(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_imvoxel_head_loss(self, _, device: str):
         """Test imvoxel head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not is_cuda_available() and not is_musa_available():
             pytest.skip('test requires GPU and torch+cuda')
 
         # build head
@@ -33,13 +43,13 @@ def test_imvoxel_head_loss(self):
             bbox_loss=dict(type='RotatedIoU3DLoss'),
             cls_loss=dict(type='mmdet.FocalLoss'),
         )
-        imvoxel_head = imvoxel_head.cuda()
+        imvoxel_head = imvoxel_head.to(device)
 
         # fake input of head
         # (x, valid_preds)
         x = [
-            torch.randn(1, 32, 10, 10, 4).cuda(),
-            torch.ones(1, 1, 10, 10, 4).cuda()
+            torch.randn(1, 32, 10, 10, 4).to(device),
+            torch.ones(1, 1, 10, 10, 4).to(device)
         ]
 
         # fake annotation
@@ -52,7 +62,7 @@ def test_imvoxel_head_loss(self):
             with_pts_semantic_mask=False,
             with_pts_instance_mask=False)
         data_samples = [
-            sample.cuda() for sample in packed_inputs['data_samples']
+            sample.to(device) for sample in packed_inputs['data_samples']
         ]
 
         losses = imvoxel_head.loss(x, data_samples)
diff --git a/tests/test_models/test_dense_heads/test_ssn.py b/tests/test_models/test_dense_heads/test_ssn.py
index 80a440d231..31d38126db 100644
--- a/tests/test_models/test_dense_heads/test_ssn.py
+++ b/tests/test_models/test_dense_heads/test_ssn.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestSSN(unittest.TestCase):
 
-    def test_ssn(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_ssn(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models.dense_heads, 'ShapeAwareHead')
@@ -48,8 +57,8 @@ def test_ssn(self):
         # for item in aug_data:
         #     item['data_sample'].set_metainfo(metainfo)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
diff --git a/tests/test_models/test_detectors/test_3dssd.py b/tests/test_models/test_detectors/test_3dssd.py
index 627994d91a..f21bd9bca4 100644
--- a/tests/test_models/test_detectors/test_3dssd.py
+++ b/tests/test_models/test_detectors/test_3dssd.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class Test3DSSD(unittest.TestCase):
 
-    def test_3dssd(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_3dssd(self,  _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'SSD3DNet')
@@ -22,12 +31,15 @@ def test_3dssd(self):
         packed_inputs = create_detector_inputs(
             num_gt_instance=num_gt_instance, num_classes=1)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_detectors/test_center_point.py b/tests/test_models/test_detectors/test_center_point.py
index cab2b799d7..699d20a895 100644
--- a/tests/test_models/test_detectors/test_center_point.py
+++ b/tests/test_models/test_detectors/test_center_point.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestCenterPoint(unittest.TestCase):
 
-    def test_center_point(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_center_point(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'CenterPoint')
@@ -32,14 +41,17 @@ def test_center_point(self):
             det_sample.gt_instances_3d.bboxes_3d = bbox_3d_class(
                 torch.rand(num_instances, 9), box_dim=9)
 
-        if torch.cuda.is_available():
+        if is_cuda_available() or is_musa_available():
 
-            model = model.cuda()
+            model = model.to(device)
             # test simple_test
 
             data = model.data_preprocessor(packed_inputs, True)
             with torch.no_grad():
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 losses = model.forward(**data, mode='loss')
             assert losses['task0.loss_heatmap'] >= 0
             assert losses['task0.loss_bbox'] >= 0
diff --git a/tests/test_models/test_detectors/test_fcaf3d.py b/tests/test_models/test_detectors/test_fcaf3d.py
index ce98515150..69873ac7ea 100755
--- a/tests/test_models/test_detectors/test_fcaf3d.py
+++ b/tests/test_models/test_detectors/test_fcaf3d.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestFCAF3d(unittest.TestCase):
 
-    def test_fcaf3d(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_fcaf3d(self, _, device: str):
         try:
             import MinkowskiEngine  # noqa: F401
         except ImportError:
@@ -30,11 +39,14 @@ def test_fcaf3d(self):
             points_feat_dim=6,
             gt_bboxes_dim=6)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, False)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_detectors/test_groupfree3d.py b/tests/test_models/test_detectors/test_groupfree3d.py
index 784de7a408..837d24b775 100644
--- a/tests/test_models/test_detectors/test_groupfree3d.py
+++ b/tests/test_models/test_detectors/test_groupfree3d.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestGroupfree3d(unittest.TestCase):
 
-    def test_groupfree3d(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_groupfree3d(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'GroupFree3DNet')
@@ -26,12 +35,15 @@ def test_groupfree3d(self):
             with_pts_semantic_mask=True,
             with_pts_instance_mask=True)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_detectors/test_h3dnet.py b/tests/test_models/test_detectors/test_h3dnet.py
index 09507c3df7..67155852a4 100644
--- a/tests/test_models/test_detectors/test_h3dnet.py
+++ b/tests/test_models/test_detectors/test_h3dnet.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestH3D(unittest.TestCase):
 
-    def test_h3dnet(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_h3dnet(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'H3DNet')
@@ -26,8 +35,8 @@ def test_h3dnet(self):
             with_pts_semantic_mask=True,
             with_pts_instance_mask=True)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
diff --git a/tests/test_models/test_detectors/test_imvotenet.py b/tests/test_models/test_detectors/test_imvotenet.py
index ac0fb9a5f3..6f758ba1ef 100644
--- a/tests/test_models/test_detectors/test_imvotenet.py
+++ b/tests/test_models/test_detectors/test_imvotenet.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestImvoteNet(unittest.TestCase):
 
-    def test_imvotenet_only_img(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_imvotenet_only_img(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'ImVoteNet')
@@ -23,8 +32,8 @@ def test_imvotenet_only_img(self):
         packed_inputs = create_detector_inputs(
             with_points=False, with_img=True, img_size=128)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
@@ -36,7 +45,10 @@ def test_imvotenet_only_img(self):
 
             # save the memory
             with torch.no_grad():
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 losses = model.forward(**data, mode='loss')
 
             self.assertGreater(sum(losses['loss_rpn_cls']), 0)
@@ -44,7 +56,8 @@ def test_imvotenet_only_img(self):
             self.assertGreater(losses['loss_cls'], 0)
             self.assertGreater(losses['loss_bbox'], 0)
 
-    def test_imvotenet(self):
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_imvotenet(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'ImVoteNet')
@@ -60,8 +73,8 @@ def test_imvotenet(self):
             img_size=128,
             bboxes_3d_type='depth')
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
diff --git a/tests/test_models/test_detectors/test_imvoxelnet.py b/tests/test_models/test_detectors/test_imvoxelnet.py
index 41584060ee..0e240f92f9 100644
--- a/tests/test_models/test_detectors/test_imvoxelnet.py
+++ b/tests/test_models/test_detectors/test_imvoxelnet.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestImVoxelNet(unittest.TestCase):
 
-    def test_imvoxelnet_kitti(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_imvoxelnet_kitti(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'ImVoxelNet')
@@ -29,12 +38,15 @@ def test_imvoxelnet_kitti(self):
             with_pts_semantic_mask=False,
             with_pts_instance_mask=False)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
@@ -49,7 +61,8 @@ def test_imvoxelnet_kitti(self):
             self.assertGreaterEqual(losses['loss_bbox'][0], 0)
             self.assertGreaterEqual(losses['loss_dir'][0], 0)
 
-    def test_imvoxelnet_sunrgbd(self):
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_imvoxelnet_sunrgbd(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'ImVoxelNet')
@@ -68,12 +81,15 @@ def test_imvoxelnet_sunrgbd(self):
             with_pts_semantic_mask=False,
             with_pts_instance_mask=False)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_detectors/test_mvxnet.py b/tests/test_models/test_detectors/test_mvxnet.py
index b0463d3248..233dd21f78 100644
--- a/tests/test_models/test_detectors/test_mvxnet.py
+++ b/tests/test_models/test_detectors/test_mvxnet.py
@@ -2,15 +2,25 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
+
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestMVXNet(unittest.TestCase):
 
-    def test_mvxnet(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_mvxnet(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'DynamicMVXFasterRCNN')
@@ -25,14 +35,17 @@ def test_mvxnet(self):
         packed_inputs = create_detector_inputs(
             with_img=False, num_gt_instance=num_gt_instance, points_feat_dim=4)
 
-        if torch.cuda.is_available():
+        if is_cuda_available() or is_musa_available():
 
-            model = model.cuda()
+            model = model.to(device)
             # test simple_test
             data = model.data_preprocessor(packed_inputs, True)
             # save the memory when do the unitest
             with torch.no_grad():
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 losses = model.forward(**data, mode='loss')
             assert losses['loss_cls'][0] >= 0
             assert losses['loss_bbox'][0] >= 0
diff --git a/tests/test_models/test_detectors/test_parta2.py b/tests/test_models/test_detectors/test_parta2.py
index 0409e973bc..e685032e0e 100644
--- a/tests/test_models/test_detectors/test_parta2.py
+++ b/tests/test_models/test_detectors/test_parta2.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestPartA2(unittest.TestCase):
 
-    def test_parta2(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_parta2(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'PartA2')
@@ -38,12 +47,15 @@ def test_parta2(self):
         #     for batch_id in len(item['data_samples']):
         #         item['data_samples'][batch_id].set_metainfo(metainfo)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.mush.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_detectors/test_pointrcnn.py b/tests/test_models/test_detectors/test_pointrcnn.py
index cb03e2871e..b91feb6b52 100644
--- a/tests/test_models/test_detectors/test_pointrcnn.py
+++ b/tests/test_models/test_detectors/test_pointrcnn.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestPointRCNN(unittest.TestCase):
 
-    def test_pointrcnn(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_pointrcnn(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'PointRCNN')
@@ -23,12 +32,15 @@ def test_pointrcnn(self):
         packed_inputs = create_detector_inputs(
             num_points=10101, num_gt_instance=num_gt_instance)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.mush.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_detectors/test_pvrcnn.py b/tests/test_models/test_detectors/test_pvrcnn.py
index 05d1801f4b..49a45f781b 100644
--- a/tests/test_models/test_detectors/test_pvrcnn.py
+++ b/tests/test_models/test_detectors/test_pvrcnn.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestPVRCNN(unittest.TestCase):
 
-    def test_pvrcnn(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_pvrcnn(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'PointVoxelRCNN')
@@ -38,8 +47,8 @@ def test_pvrcnn(self):
         #     for batch_id in len(item['data_samples']):
         #         item['data_samples'][batch_id].set_metainfo(metainfo)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
@@ -53,7 +62,10 @@ def test_pvrcnn(self):
             # save the memory
             with torch.no_grad():
                 losses = model.forward(**data, mode='loss')
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.mush.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
             self.assertGreater(losses['loss_rpn_cls'][0], 0)
             self.assertGreaterEqual(losses['loss_rpn_bbox'][0], 0)
             self.assertGreaterEqual(losses['loss_rpn_dir'][0], 0)
diff --git a/tests/test_models/test_detectors/test_sassd.py b/tests/test_models/test_detectors/test_sassd.py
index c9cc12f71d..675e8a86de 100644
--- a/tests/test_models/test_detectors/test_sassd.py
+++ b/tests/test_models/test_detectors/test_sassd.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestSDSSD(unittest.TestCase):
 
-    def test_3dssd(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_3dssd(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'SASSD')
@@ -23,12 +32,15 @@ def test_3dssd(self):
         packed_inputs = create_detector_inputs(
             num_gt_instance=num_gt_instance, num_classes=1)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.mush.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
diff --git a/tests/test_models/test_detectors/test_votenet.py b/tests/test_models/test_detectors/test_votenet.py
index 456db1b80b..3c12434364 100644
--- a/tests/test_models/test_detectors/test_votenet.py
+++ b/tests/test_models/test_detectors/test_votenet.py
@@ -2,15 +2,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestVotenet(unittest.TestCase):
 
-    def test_voxel_net(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_voxel_net(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'VoteNet')
@@ -36,8 +45,8 @@ def test_voxel_net(self):
         # for item in aug_data:
         #     item['data_sample'].set_metainfo(metainfo)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
diff --git a/tests/test_models/test_detectors/test_voxelnet.py b/tests/test_models/test_detectors/test_voxelnet.py
index eaccb225d4..d5d26c86bb 100644
--- a/tests/test_models/test_detectors/test_voxelnet.py
+++ b/tests/test_models/test_detectors/test_voxelnet.py
@@ -3,15 +3,24 @@
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestVoxelNet(unittest.TestCase):
 
-    def test_voxelnet(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_voxelnet(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'VoxelNet')
@@ -38,12 +47,15 @@ def test_voxelnet(self):
         # for item in aug_data:
         #     item['data_sample'].set_metainfo(metainfo)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.mush.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('bboxes_3d', results[0].pred_instances_3d)
@@ -67,7 +79,10 @@ def test_voxelnet(self):
 
             with torch.no_grad():
                 losses = model.forward(**data, mode='loss')
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.mush.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
             self.assertGreaterEqual(losses['loss_dir'][0], 0)
             self.assertGreaterEqual(losses['loss_bbox'][0], 0)
             self.assertGreaterEqual(losses['loss_cls'][0], 0)
diff --git a/tests/test_models/test_layers/test_box3d_nms.py b/tests/test_models/test_layers/test_box3d_nms.py
index d0b8752ce0..2cd5c7c7b0 100644
--- a/tests/test_models/test_layers/test_box3d_nms.py
+++ b/tests/test_models/test_layers/test_box3d_nms.py
@@ -2,6 +2,7 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 
 def test_aligned_3d_nms():
@@ -77,9 +78,24 @@ def test_circle_nms():
 
 
 # copied from tests/test_ops/test_iou3d.py from mmcv<=1.5
-@pytest.mark.skipif(
-    not torch.cuda.is_available(), reason='requires CUDA support')
-def test_nms_bev():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_nms_bev(device: str):
     from mmdet3d.models.layers import nms_bev
 
     np_boxes = np.array(
@@ -90,15 +106,30 @@ def test_nms_bev():
     np_inds = np.array([1, 0, 3])
     boxes = torch.from_numpy(np_boxes)
     scores = torch.from_numpy(np_scores)
-    inds = nms_bev(boxes.cuda(), scores.cuda(), thresh=0.3)
+    inds = nms_bev(boxes.to(device), scores.to(device), thresh=0.3)
 
     assert np.allclose(inds.cpu().numpy(), np_inds)
 
 
 # copied from tests/test_ops/test_iou3d.py from mmcv<=1.5
-@pytest.mark.skipif(
-    not torch.cuda.is_available(), reason='requires CUDA support')
-def test_nms_normal_bev():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_nms_normal_bev(device: str):
     from mmdet3d.models.layers import nms_normal_bev
 
     np_boxes = np.array(
@@ -109,6 +140,6 @@ def test_nms_normal_bev():
     np_inds = np.array([1, 0, 3])
     boxes = torch.from_numpy(np_boxes)
     scores = torch.from_numpy(np_scores)
-    inds = nms_normal_bev(boxes.cuda(), scores.cuda(), thresh=0.3)
+    inds = nms_normal_bev(boxes.to(device), scores.to(device), thresh=0.3)
 
     assert np.allclose(inds.cpu().numpy(), np_inds)
diff --git a/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fa_module.py b/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fa_module.py
index cb1f86c922..e697bd2628 100644
--- a/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fa_module.py
+++ b/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fa_module.py
@@ -2,17 +2,36 @@
 import pytest
 import torch
 
+from mmengine.device import is_musa_available, is_cuda_available
 
-def test_dgcnn_fa_module():
-    if not torch.cuda.is_available():
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_dgcnn_fa_module(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import DGCNNFAModule
 
-    self = DGCNNFAModule(mlp_channels=[24, 16]).cuda()
+    self = DGCNNFAModule(mlp_channels=[24, 16]).to(device)
     assert self.mlps.layer0.conv.in_channels == 24
     assert self.mlps.layer0.conv.out_channels == 16
 
-    points = [torch.rand(1, 200, 12).float().cuda() for _ in range(3)]
+    points = [torch.rand(1, 200, 12).float().to(device) for _ in range(3)]
 
     fa_points = self(points)
     assert fa_points.shape == torch.Size([1, 200, 40])
diff --git a/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fp_module.py b/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fp_module.py
index ec57db6c6d..fb2d1dcb32 100644
--- a/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fp_module.py
+++ b/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_fp_module.py
@@ -3,22 +3,42 @@
 import pytest
 import torch
 
+from mmengine.device import is_musa_available, is_cuda_available
 
-def test_dgcnn_fp_module():
-    if not torch.cuda.is_available():
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_dgcnn_fp_module(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import DGCNNFPModule
 
-    self = DGCNNFPModule(mlp_channels=[24, 16]).cuda()
+    self = DGCNNFPModule(mlp_channels=[24, 16]).to(device)
     assert self.mlps.layer0.conv.in_channels == 24
     assert self.mlps.layer0.conv.out_channels == 16
 
-    xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin',
-                      np.float32).reshape((-1, 6))
+    xyz = np.fromfile("tests/data/sunrgbd/points/000001.bin", np.float32).reshape(
+        (-1, 6)
+    )
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
-    points = xyz.repeat([1, 1, 8]).cuda()
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).to(device)
+    points = xyz.repeat([1, 1, 8]).to(device)
 
     fp_points = self(points)
     assert fp_points.shape == torch.Size([1, 200, 16])
diff --git a/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_gf_module.py b/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_gf_module.py
index ddc14a4ee0..328c02e61d 100644
--- a/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_gf_module.py
+++ b/tests/test_models/test_layers/test_dgcnn_modules/test_dgcnn_gf_module.py
@@ -3,9 +3,21 @@
 import pytest
 import torch
 
+from mmengine.device import is_musa_available, is_cuda_available
 
-def test_dgcnn_gf_module():
-    if not torch.cuda.is_available():
+
+@pytest.mark.parametrize('device', [
+    pytest.param(
+        'cuda',
+        marks=pytest.mark.skipif(
+            not is_cuda_available(), reason='requires CUDA support')),
+    pytest.param(
+        'musa',
+        marks=pytest.mark.skipif(
+            not is_musa_available(), reason='requires MUSA support'))
+])
+def test_dgcnn_gf_module(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import DGCNNGFModule
 
@@ -16,7 +28,7 @@ def test_dgcnn_gf_module():
         radius=None,
         norm_cfg=dict(type='BN2d'),
         act_cfg=dict(type='ReLU'),
-        pool_mode='max').cuda()
+        pool_mode='max').to(device)
 
     assert self.mlps[0].layer0.conv.in_channels == 18
     assert self.mlps[0].layer0.conv.out_channels == 64
@@ -24,7 +36,7 @@ def test_dgcnn_gf_module():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, C)
-    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).to(device)
     points = xyz.repeat([1, 1, 3])
 
     # test forward
@@ -40,7 +52,7 @@ def test_dgcnn_gf_module():
         radius=None,
         norm_cfg=dict(type='BN2d'),
         act_cfg=dict(type='ReLU'),
-        pool_mode='max').cuda()
+        pool_mode='max').to(device)
 
     # test forward
     new_points = self(xyz)
@@ -54,4 +66,4 @@ def test_dgcnn_gf_module():
         radius=0.2,
         norm_cfg=dict(type='BN2d'),
         act_cfg=dict(type='ReLU'),
-        pool_mode='max').cuda()
+        pool_mode='max').to(device)
diff --git a/tests/test_models/test_layers/test_minkowski_engine/test_minkowski_engine_module.py b/tests/test_models/test_layers/test_minkowski_engine/test_minkowski_engine_module.py
index 5996fe206f..d621e6bf7d 100644
--- a/tests/test_models/test_layers/test_minkowski_engine/test_minkowski_engine_module.py
+++ b/tests/test_models/test_layers/test_minkowski_engine/test_minkowski_engine_module.py
@@ -2,6 +2,7 @@
 import pytest
 import torch
 
+from mmengine.device import is_musa_available, is_cuda_available
 from mmdet3d.models.layers.minkowski_engine_block import \
     IS_MINKOWSKI_ENGINE_AVAILABLE
 
@@ -14,52 +15,86 @@
     pytest.skip('test requires Minkowski Engine.', allow_module_level=True)
 
 
-def test_MinkowskiConvModule():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_MinkowskiConvModule(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     voxel_features = torch.tensor(
         [[6.56126, 0.9648336, -1.7339306, 0.315],
          [6.8162713, -2.480431, -1.3616394, 0.36],
          [11.643568, -4.744306, -1.3580885, 0.16],
          [23.482342, 6.5036807, 0.5806964, 0.35]],
-        dtype=torch.float32).cuda()  # n, point_features
+        dtype=torch.float32).to(device)  # n, point_features
     coordinates = torch.tensor(
         [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
          [1, 35, 930, 469]],
-        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).to(device)  # n, 4(batch, ind_x, ind_y, ind_z)
 
     # test
     input_sp_tensor = SparseTensor(voxel_features, coordinates)
 
-    self = MinkowskiConvModule(4, 4, kernel_size=2, stride=2).cuda()
+    self = MinkowskiConvModule(4, 4, kernel_size=2, stride=2).to(device)
 
     out_features = self(input_sp_tensor)
     assert out_features.F.shape == torch.Size([4, 4])
 
 
-def test_MinkowskiResidualBlock():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_MinkowskiResidualBlock(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     voxel_features = torch.tensor(
         [[6.56126, 0.9648336, -1.7339306, 0.315],
          [6.8162713, -2.480431, -1.3616394, 0.36],
          [11.643568, -4.744306, -1.3580885, 0.16],
          [23.482342, 6.5036807, 0.5806964, 0.35]],
-        dtype=torch.float32).cuda()  # n, point_features
+        dtype=torch.float32).to(device)  # n, point_features
     coordinates = torch.tensor(
         [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
          [1, 35, 930, 469]],
-        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).to(device)  # n, 4(batch, ind_x, ind_y, ind_z)
 
     # test
     input_sp_tensor = SparseTensor(voxel_features, coordinates)
 
-    sparse_block0 = MinkowskiBasicBlock(4, 4, kernel_size=3).cuda()
+    sparse_block0 = MinkowskiBasicBlock(4, 4, kernel_size=3).to(device)
     sparse_block1 = MinkowskiBottleneck(
         4,
         4,
         downsample=MinkowskiConvModule(4, 16, kernel_size=1, act_cfg=None),
-        kernel_size=3).cuda()
+        kernel_size=3).to(device)
 
     # test forward
     out_features0 = sparse_block0(input_sp_tensor)
diff --git a/tests/test_models/test_layers/test_paconv/test_paconv_modules.py b/tests/test_models/test_layers/test_paconv/test_paconv_modules.py
index 9c278d4793..f27ea4348b 100644
--- a/tests/test_models/test_layers/test_paconv/test_paconv_modules.py
+++ b/tests/test_models/test_layers/test_paconv/test_paconv_modules.py
@@ -2,10 +2,28 @@
 import numpy as np
 import pytest
 import torch
-
-
-def test_paconv_sa_module_msg():
-    if not torch.cuda.is_available():
+from mmengine.device import is_musa_available, is_cuda_available
+
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_paconv_sa_module_msg(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import PAConvSAModuleMSG
 
@@ -16,7 +34,7 @@ def test_paconv_sa_module_msg():
             radii=[0.2, 0.4],
             sample_nums=[4, 8],
             mlp_channels=[[12, 16], [12, 32]],
-            paconv_num_kernels=[[4]]).cuda()
+            paconv_num_kernels=[[4]]).to(device)
 
     # paconv_num_kernels inner num should match as mlp_channels
     with pytest.raises(AssertionError):
@@ -25,7 +43,7 @@ def test_paconv_sa_module_msg():
             radii=[0.2, 0.4],
             sample_nums=[4, 8],
             mlp_channels=[[12, 16], [12, 32]],
-            paconv_num_kernels=[[4, 4], [8, 8]]).cuda()
+            paconv_num_kernels=[[4, 4], [8, 8]]).to(device)
 
     self = PAConvSAModuleMSG(
         num_point=16,
@@ -36,7 +54,7 @@ def test_paconv_sa_module_msg():
         norm_cfg=dict(type='BN2d'),
         use_xyz=False,
         pool_mod='max',
-        paconv_kernel_input='w_neighbor').cuda()
+        paconv_kernel_input='w_neighbor').to(device)
 
     assert self.mlps[0].layer0.in_channels == 12 * 2
     assert self.mlps[0].layer0.out_channels == 16
@@ -59,9 +77,9 @@ def test_paconv_sa_module_msg():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).to(device)
     # (B, C, N)
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -79,7 +97,7 @@ def test_paconv_sa_module_msg():
         norm_cfg=dict(type='BN2d'),
         use_xyz=False,
         pool_mod='max',
-        paconv_kernel_input='identity').cuda()
+        paconv_kernel_input='identity').to(device)
 
     assert self.mlps[0].layer0.in_channels == 12 * 1
     assert self.mlps[0].layer0.out_channels == 16
@@ -91,9 +109,9 @@ def test_paconv_sa_module_msg():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).to(device)
     # (B, C, N)
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -102,8 +120,25 @@ def test_paconv_sa_module_msg():
     assert inds.shape == torch.Size([1, 16])
 
 
-def test_paconv_sa_module():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_paconv_sa_module(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import build_sa_module
     sa_cfg = dict(
@@ -117,7 +152,7 @@ def test_paconv_sa_module():
         use_xyz=True,
         pool_mod='max',
         paconv_kernel_input='w_neighbor')
-    self = build_sa_module(sa_cfg).cuda()
+    self = build_sa_module(sa_cfg).to(device)
 
     assert self.mlps[0].layer0.in_channels == 15 * 2
     assert self.mlps[0].layer0.out_channels == 32
@@ -126,9 +161,9 @@ def test_paconv_sa_module():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).to(device)
     # (B, C, N)
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -148,21 +183,38 @@ def test_paconv_sa_module():
         use_xyz=True,
         pool_mod='max',
         paconv_kernel_input='identity')
-    self = build_sa_module(sa_cfg).cuda()
+    self = build_sa_module(sa_cfg).to(device)
     assert self.mlps[0].layer0.in_channels == 15 * 1
 
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
-    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).to(device)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
     new_xyz, new_features, inds = self(xyz, features)
     assert new_xyz.shape == torch.Size([1, 16, 3])
     assert new_features.shape == torch.Size([1, 32, 16])
     assert inds.shape == torch.Size([1, 16])
 
 
-def test_paconv_cuda_sa_module_msg():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_paconv_cuda_sa_module_msg(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import PAConvCUDASAModuleMSG
 
@@ -173,7 +225,7 @@ def test_paconv_cuda_sa_module_msg():
             radii=[0.2, 0.4],
             sample_nums=[4, 8],
             mlp_channels=[[12, 16], [12, 32]],
-            paconv_num_kernels=[[4]]).cuda()
+            paconv_num_kernels=[[4]]).to(device)
 
     # paconv_num_kernels inner num should match as mlp_channels
     with pytest.raises(AssertionError):
@@ -182,7 +234,7 @@ def test_paconv_cuda_sa_module_msg():
             radii=[0.2, 0.4],
             sample_nums=[4, 8],
             mlp_channels=[[12, 16], [12, 32]],
-            paconv_num_kernels=[[4, 4], [8, 8]]).cuda()
+            paconv_num_kernels=[[4, 4], [8, 8]]).to(device)
 
     self = PAConvCUDASAModuleMSG(
         num_point=16,
@@ -193,7 +245,7 @@ def test_paconv_cuda_sa_module_msg():
         norm_cfg=dict(type='BN2d'),
         use_xyz=False,
         pool_mod='max',
-        paconv_kernel_input='w_neighbor').cuda()
+        paconv_kernel_input='w_neighbor').to(device)
 
     assert self.mlps[0][0].in_channels == 12 * 2
     assert self.mlps[0][0].out_channels == 16
@@ -218,9 +270,9 @@ def test_paconv_cuda_sa_module_msg():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).to(device)
     # (B, C, N)
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -239,11 +291,28 @@ def test_paconv_cuda_sa_module_msg():
             norm_cfg=dict(type='BN2d'),
             use_xyz=False,
             pool_mod='max',
-            paconv_kernel_input='identity').cuda()
-
-
-def test_paconv_cuda_sa_module():
-    if not torch.cuda.is_available():
+            paconv_kernel_input='identity').to(device)
+
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_paconv_cuda_sa_module(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import build_sa_module
     sa_cfg = dict(
@@ -257,7 +326,7 @@ def test_paconv_cuda_sa_module():
         use_xyz=True,
         pool_mod='max',
         paconv_kernel_input='w_neighbor')
-    self = build_sa_module(sa_cfg).cuda()
+    self = build_sa_module(sa_cfg).to(device)
 
     assert self.mlps[0][0].in_channels == 15 * 2
     assert self.mlps[0][0].out_channels == 32
@@ -266,9 +335,9 @@ def test_paconv_cuda_sa_module():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).to(device)
     # (B, C, N)
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -288,12 +357,12 @@ def test_paconv_cuda_sa_module():
         use_xyz=True,
         pool_mod='max',
         paconv_kernel_input='w_neighbor')
-    self = build_sa_module(sa_cfg).cuda()
+    self = build_sa_module(sa_cfg).to(device)
 
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
-    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).to(device)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
     new_xyz, new_features, inds = self(xyz, features)
     assert new_xyz.shape == torch.Size([1, 16, 3])
     assert new_features.shape == torch.Size([1, 32, 16])
diff --git a/tests/test_models/test_layers/test_paconv/test_paconv_ops.py b/tests/test_models/test_layers/test_paconv/test_paconv_ops.py
index 9f3f9cdb30..d5d0b44c16 100644
--- a/tests/test_models/test_layers/test_paconv/test_paconv_ops.py
+++ b/tests/test_models/test_layers/test_paconv/test_paconv_ops.py
@@ -1,11 +1,29 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.models.layers import PAConv, PAConvCUDA
 
 
-def test_paconv():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_paconv(device: str):
     B = 2
     in_channels = 6
     out_channels = 12
@@ -25,8 +43,25 @@ def test_paconv():
     assert new_features.shape == torch.Size([B, out_channels, npoint, K])
 
 
-def test_paconv_cuda():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_paconv_cuda(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     B = 2
     in_channels = 6
@@ -35,11 +70,11 @@ def test_paconv_cuda():
     npoint = 4
     K = 3
     num_kernels = 4
-    points_xyz = torch.randn(B, 3, npoint, K).float().cuda()
-    features = torch.randn(B, in_channels, N).float().cuda()
-    points_idx = torch.randint(0, N, (B, npoint, K)).long().cuda()
+    points_xyz = torch.randn(B, 3, npoint, K).float().to(device)
+    features = torch.randn(B, in_channels, N).float().to(device)
+    points_idx = torch.randint(0, N, (B, npoint, K)).long().to(device)
 
-    paconv = PAConvCUDA(in_channels, out_channels, num_kernels).cuda()
+    paconv = PAConvCUDA(in_channels, out_channels, num_kernels).to(device)
     assert paconv.weight_bank.shape == torch.Size(
         [in_channels * 2, out_channels * num_kernels])
 
diff --git a/tests/test_models/test_layers/test_pointnet_modules/test_point_fp_module.py b/tests/test_models/test_layers/test_pointnet_modules/test_point_fp_module.py
index c413f275fb..a43a9cc95d 100644
--- a/tests/test_models/test_layers/test_pointnet_modules/test_point_fp_module.py
+++ b/tests/test_models/test_layers/test_pointnet_modules/test_point_fp_module.py
@@ -3,13 +3,32 @@
 import pytest
 import torch
 
+from mmengine.device import is_musa_available, is_cuda_available
 
-def test_pointnet_fp_module():
-    if not torch.cuda.is_available():
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_pointnet_fp_module(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import PointFPModule
 
-    self = PointFPModule(mlp_channels=[24, 16]).cuda()
+    self = PointFPModule(mlp_channels=[24, 16]).to(device)
     assert self.mlps.layer0.conv.in_channels == 24
     assert self.mlps.layer0.conv.out_channels == 16
 
@@ -17,14 +36,14 @@ def test_pointnet_fp_module():
                       np.float32).reshape((-1, 6))
 
     # (B, N, 3)
-    xyz1 = torch.from_numpy(xyz[0::2, :3]).view(1, -1, 3).cuda()
+    xyz1 = torch.from_numpy(xyz[0::2, :3]).view(1, -1, 3).to(device)
     # (B, C1, N)
-    features1 = xyz1.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features1 = xyz1.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # (B, M, 3)
-    xyz2 = torch.from_numpy(xyz[1::3, :3]).view(1, -1, 3).cuda()
+    xyz2 = torch.from_numpy(xyz[1::3, :3]).view(1, -1, 3).to(device)
     # (B, C2, N)
-    features2 = xyz2.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features2 = xyz2.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     fp_features = self(xyz1, xyz2, features1, features2)
     assert fp_features.shape == torch.Size([1, 16, 50])
diff --git a/tests/test_models/test_layers/test_pointnet_modules/test_point_sa_module.py b/tests/test_models/test_layers/test_pointnet_modules/test_point_sa_module.py
index 2b3e678345..b6e105bc3d 100644
--- a/tests/test_models/test_layers/test_pointnet_modules/test_point_sa_module.py
+++ b/tests/test_models/test_layers/test_pointnet_modules/test_point_sa_module.py
@@ -3,9 +3,28 @@
 import pytest
 import torch
 
-
-def test_pointnet_sa_module_msg():
-    if not torch.cuda.is_available():
+from mmengine.device import is_musa_available, is_cuda_available
+
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_pointnet_sa_module_msg(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import PointSAModuleMSG
 
@@ -16,7 +35,7 @@ def test_pointnet_sa_module_msg():
         mlp_channels=[[12, 16], [12, 32]],
         norm_cfg=dict(type='BN2d'),
         use_xyz=False,
-        pool_mod='max').cuda()
+        pool_mod='max').to(device)
 
     assert self.mlps[0].layer0.conv.in_channels == 12
     assert self.mlps[0].layer0.conv.out_channels == 16
@@ -26,9 +45,9 @@ def test_pointnet_sa_module_msg():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz).view(1, -1, 3).to(device)
     # (B, C, N)
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -46,7 +65,7 @@ def test_pointnet_sa_module_msg():
         use_xyz=False,
         pool_mod='max',
         fps_mod=['D-FPS'],
-        fps_sample_range_list=[-1]).cuda()
+        fps_sample_range_list=[-1]).to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -64,7 +83,7 @@ def test_pointnet_sa_module_msg():
         use_xyz=False,
         pool_mod='max',
         fps_mod=['F-FPS'],
-        fps_sample_range_list=[-1]).cuda()
+        fps_sample_range_list=[-1]).to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -82,7 +101,7 @@ def test_pointnet_sa_module_msg():
         use_xyz=False,
         pool_mod='max',
         fps_mod=['FS'],
-        fps_sample_range_list=[-1]).cuda()
+        fps_sample_range_list=[-1]).to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -100,7 +119,7 @@ def test_pointnet_sa_module_msg():
         use_xyz=False,
         pool_mod='max',
         fps_mod=['F-FPS', 'D-FPS'],
-        fps_sample_range_list=[64, -1]).cuda()
+        fps_sample_range_list=[64, -1]).to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -116,7 +135,7 @@ def test_pointnet_sa_module_msg():
         mlp_channels=[[12, 16], [12, 32]],
         norm_cfg=dict(type='BN2d'),
         use_xyz=False,
-        pool_mod='max').cuda()
+        pool_mod='max').to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -133,7 +152,7 @@ def test_pointnet_sa_module_msg():
             use_xyz=False,
             pool_mod='max',
             fps_mod=['F-FPS', 'D-FPS'],
-            fps_sample_range_list=[-1]).cuda()
+            fps_sample_range_list=[-1]).to(device)
 
     # length of 'num_point' should be same as 'fps_sample_range_list'
     with pytest.raises(AssertionError):
@@ -146,11 +165,28 @@ def test_pointnet_sa_module_msg():
             use_xyz=False,
             pool_mod='max',
             fps_mod=['F-FPS'],
-            fps_sample_range_list=[-1]).cuda()
-
-
-def test_pointnet_sa_module():
-    if not torch.cuda.is_available():
+            fps_sample_range_list=[-1]).to(device)
+
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_pointnet_sa_module(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     from mmdet3d.models.layers import build_sa_module
     sa_cfg = dict(
@@ -162,7 +198,7 @@ def test_pointnet_sa_module():
         norm_cfg=dict(type='BN2d'),
         use_xyz=True,
         pool_mod='max')
-    self = build_sa_module(sa_cfg).cuda()
+    self = build_sa_module(sa_cfg).to(device)
 
     assert self.mlps[0].layer0.conv.in_channels == 15
     assert self.mlps[0].layer0.conv.out_channels == 32
@@ -170,9 +206,9 @@ def test_pointnet_sa_module():
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
     # (B, N, 3)
-    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).to(device)
     # (B, C, N)
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
 
     # test forward
     new_xyz, new_features, inds = self(xyz, features)
@@ -196,12 +232,12 @@ def test_pointnet_sa_module():
 
     # test kNN sampling when radius is None
     sa_cfg['normalize_xyz'] = False
-    self = build_sa_module(sa_cfg).cuda()
+    self = build_sa_module(sa_cfg).to(device)
 
     xyz = np.fromfile('tests/data/sunrgbd/points/000001.bin', np.float32)
 
-    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).cuda()
-    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().cuda()
+    xyz = torch.from_numpy(xyz[..., :3]).view(1, -1, 3).to(device)
+    features = xyz.repeat([1, 1, 4]).transpose(1, 2).contiguous().to(device)
     new_xyz, new_features, inds = self(xyz, features)
     assert new_xyz.shape == torch.Size([1, 16, 3])
     assert new_features.shape == torch.Size([1, 32, 16])
diff --git a/tests/test_models/test_layers/test_spconv/test_spconv_module.py b/tests/test_models/test_layers/test_spconv/test_spconv_module.py
index 1ae7691e85..8062261425 100644
--- a/tests/test_models/test_layers/test_spconv/test_spconv_module.py
+++ b/tests/test_models/test_layers/test_spconv/test_spconv_module.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.models.layers import SparseBasicBlock
 from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
@@ -12,19 +13,36 @@
     from mmcv.ops import SparseConvTensor, SparseInverseConv3d, SubMConv3d
 
 
-def test_SparseBasicBlock():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_SparseBasicBlock(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     voxel_features = torch.tensor(
         [[6.56126, 0.9648336, -1.7339306, 0.315],
          [6.8162713, -2.480431, -1.3616394, 0.36],
          [11.643568, -4.744306, -1.3580885, 0.16],
          [23.482342, 6.5036807, 0.5806964, 0.35]],
-        dtype=torch.float32).cuda()  # n, point_features
+        dtype=torch.float32).to(device)  # n, point_features
     coordinates = torch.tensor(
         [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
          [1, 35, 930, 469]],
-        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).to(device)  # n, 4(batch, ind_x, ind_y, ind_z)
 
     # test
     input_sp_tensor = SparseConvTensor(voxel_features, coordinates,
@@ -33,7 +51,7 @@ def test_SparseBasicBlock():
         4,
         4,
         conv_cfg=dict(type='SubMConv3d', indice_key='subm1'),
-        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)).cuda()
+        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01)).to(device)
     # test conv and bn layer
     assert isinstance(self.conv1, SubMConv3d)
     assert self.conv1.in_channels == 4
@@ -48,8 +66,25 @@ def test_SparseBasicBlock():
     assert out_features.features.shape == torch.Size([4, 4])
 
 
-def test_make_sparse_convmodule():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_make_sparse_convmodule(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     from mmdet3d.models.layers import make_sparse_convmodule
 
@@ -58,11 +93,11 @@ def test_make_sparse_convmodule():
          [6.8162713, -2.480431, -1.3616394, 0.36],
          [11.643568, -4.744306, -1.3580885, 0.16],
          [23.482342, 6.5036807, 0.5806964, 0.35]],
-        dtype=torch.float32).cuda()  # n, point_features
+        dtype=torch.float32).to(device)  # n, point_features
     coordinates = torch.tensor(
         [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
          [1, 35, 930, 469]],
-        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).to(device)  # n, 4(batch, ind_x, ind_y, ind_z)
 
     # test
     input_sp_tensor = SparseConvTensor(voxel_features, coordinates,
@@ -77,7 +112,7 @@ def test_make_sparse_convmodule():
         padding=0,
         conv_type='SubMConv3d',
         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
-        order=('conv', 'norm', 'act')).cuda()
+        order=('conv', 'norm', 'act')).to(device)
     assert isinstance(sparse_block0[0], SubMConv3d)
     assert sparse_block0[0].in_channels == 4
     assert sparse_block0[0].out_channels == 16
diff --git a/tests/test_models/test_layers/test_torchsparse/test_torchsparse_module.py b/tests/test_models/test_layers/test_torchsparse/test_torchsparse_module.py
index 10aa503a4d..778601a5e0 100644
--- a/tests/test_models/test_layers/test_torchsparse/test_torchsparse_module.py
+++ b/tests/test_models/test_layers/test_torchsparse/test_torchsparse_module.py
@@ -2,6 +2,7 @@
 import pytest
 import torch
 
+from mmengine.device import is_musa_available, is_cuda_available
 from mmdet3d.models.layers.torchsparse import IS_TORCHSPARSE_AVAILABLE
 
 if IS_TORCHSPARSE_AVAILABLE:
@@ -14,48 +15,82 @@
     pytest.skip('test requires Torchsparse', allow_module_level=True)
 
 
-def test_TorchsparseConvModule():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_TorchsparseConvModule(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     voxel_features = torch.tensor(
         [[6.56126, 0.9648336, -1.7339306, 0.315],
          [6.8162713, -2.480431, -1.3616394, 0.36],
          [11.643568, -4.744306, -1.3580885, 0.16],
          [23.482342, 6.5036807, 0.5806964, 0.35]],
-        dtype=torch.float32).cuda()  # n, point_features
+        dtype=torch.float32).to(device)  # n, point_features
     coordinates = torch.tensor(
         [[12, 819, 131, 0], [16, 750, 136, 0], [16, 705, 232, 1],
          [35, 930, 469, 1]],
-        dtype=torch.int32).cuda()  # n, 4(ind_x, ind_y, ind_z, batch)
+        dtype=torch.int32).to(device)  # n, 4(ind_x, ind_y, ind_z, batch)
 
     # test
     input_sp_tensor = SparseTensor(voxel_features, coordinates)
 
-    self = TorchSparseConvModule(4, 4, kernel_size=2, stride=2).cuda()
+    self = TorchSparseConvModule(4, 4, kernel_size=2, stride=2).to(device)
 
     out_features = self(input_sp_tensor)
     assert out_features.F.shape == torch.Size([4, 4])
 
 
-def test_TorchsparseResidualBlock():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_TorchsparseResidualBlock(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     voxel_features = torch.tensor(
         [[6.56126, 0.9648336, -1.7339306, 0.315],
          [6.8162713, -2.480431, -1.3616394, 0.36],
          [11.643568, -4.744306, -1.3580885, 0.16],
          [23.482342, 6.5036807, 0.5806964, 0.35]],
-        dtype=torch.float32).cuda()  # n, point_features
+        dtype=torch.float32).to(device)  # n, point_features
     coordinates = torch.tensor(
         [[12, 819, 131, 0], [16, 750, 136, 0], [16, 705, 232, 1],
          [35, 930, 469, 1]],
-        dtype=torch.int32).cuda()  # n, 4(ind_x, ind_y, ind_z, batch)
+        dtype=torch.int32).to(device)  # n, 4(ind_x, ind_y, ind_z, batch)
 
     # test
     input_sp_tensor = SparseTensor(voxel_features, coordinates)
 
-    sparse_block0 = TorchSparseBasicBlock(4, 16, kernel_size=3).cuda()
-    sparse_block1 = TorchSparseBottleneck(4, 16, kernel_size=3).cuda()
+    sparse_block0 = TorchSparseBasicBlock(4, 16, kernel_size=3).to(device)
+    sparse_block1 = TorchSparseBottleneck(4, 16, kernel_size=3).to(device)
 
     # test forward
     out_features0 = sparse_block0(input_sp_tensor)
diff --git a/tests/test_models/test_losses/test_rotated_iou_loss.py b/tests/test_models/test_losses/test_rotated_iou_loss.py
index 1f293522ee..4654e955fc 100644
--- a/tests/test_models/test_losses/test_rotated_iou_loss.py
+++ b/tests/test_models/test_losses/test_rotated_iou_loss.py
@@ -2,26 +2,46 @@
 
 import numpy as np
 import torch
+import pytest
+
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.models.losses import RotatedIoU3DLoss
 
 
-def test_rotated_iou_3d_loss():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_rotated_iou_3d_loss(device: str):
 
-    if not torch.cuda.is_available():
+    if not is_cuda_available() and is_musa_available():
         return
 
     boxes1 = torch.tensor([[.5, .5, .5, 1., 1., 1., .0],
                            [.5, .5, .5, 1., 1., 1., .0],
                            [.5, .5, .5, 1., 1., 1., .0],
                            [.5, .5, .5, 1., 1., 1., .0],
-                           [.5, .5, .5, 1., 1., 1., .0]]).cuda()
+                           [.5, .5, .5, 1., 1., 1., .0]]).to(device)
     boxes2 = torch.tensor([[.5, .5, .5, 1., 1., 1., .0],
                            [.5, .5, .5, 1., 1., 2., np.pi / 2],
                            [.5, .5, .5, 1., 1., 1., np.pi / 4],
                            [1., 1., 1., 1., 1., 1., .0],
-                           [-1.5, -1.5, -1.5, 2.5, 2.5, 2.5, .0]]).cuda()
+                           [-1.5, -1.5, -1.5, 2.5, 2.5, 2.5, .0]]).to(device)
 
-    expect_ious = 1 - torch.tensor([[1., .5, .7071, 1 / 15, .0]]).cuda()
+    expect_ious = 1 - torch.tensor([[1., .5, .7071, 1 / 15, .0]]).to(device)
     ious = RotatedIoU3DLoss(reduction='none')(boxes1, boxes2)
     assert torch.allclose(ious, expect_ious, atol=1e-4)
diff --git a/tests/test_models/test_middle_encoders/test_sparse_encoders.py b/tests/test_models/test_middle_encoders/test_sparse_encoders.py
index 698282321f..ea3d27cbbf 100644
--- a/tests/test_models/test_middle_encoders/test_sparse_encoders.py
+++ b/tests/test_models/test_middle_encoders/test_sparse_encoders.py
@@ -1,12 +1,31 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_sparse_encoder():
-    if not torch.cuda.is_available():
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_sparse_encoder(device: str):
+    if not is_cuda_available() and not is_cuda_available():
         pytest.skip('test requires GPU and torch+cuda')
     sparse_encoder_cfg = dict(
         type='SparseEncoder',
@@ -19,16 +38,33 @@ def test_sparse_encoder():
                                                                        1)),
         block_type='basicblock')
 
-    sparse_encoder = MODELS.build(sparse_encoder_cfg).cuda()
-    voxel_features = torch.rand([207842, 5]).cuda()
-    coors = torch.randint(0, 4, [207842, 4]).cuda()
+    sparse_encoder = MODELS.build(sparse_encoder_cfg).to(device)
+    voxel_features = torch.rand([207842, 5]).to(device)
+    coors = torch.randint(0, 4, [207842, 4]).to(device)
 
     ret = sparse_encoder(voxel_features, coors, 4)
     assert ret.shape == torch.Size([4, 256, 128, 128])
 
 
-def test_sparse_encoder_for_ssd():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_sparse_encoder_for_ssd(device: str):
+    if not is_cuda_available() and not is_cuda_available():
         pytest.skip('test requires GPU and torch+cuda')
     sparse_encoder_for_ssd_cfg = dict(
         type='SparseEncoderSASSD',
@@ -41,9 +77,9 @@ def test_sparse_encoder_for_ssd():
                                                                        1)),
         block_type='basicblock')
 
-    sparse_encoder = MODELS.build(sparse_encoder_for_ssd_cfg).cuda()
-    voxel_features = torch.rand([207842, 5]).cuda()
-    coors = torch.randint(0, 4, [207842, 4]).cuda()
+    sparse_encoder = MODELS.build(sparse_encoder_for_ssd_cfg).to(device)
+    voxel_features = torch.rand([207842, 5]).to(device)
+    coors = torch.randint(0, 4, [207842, 4]).to(device)
 
     ret, _ = sparse_encoder(voxel_features, coors, 4, True)
     assert ret.shape == torch.Size([4, 256, 128, 128])
diff --git a/tests/test_models/test_middle_encoders/test_sparse_unet.py b/tests/test_models/test_middle_encoders/test_sparse_unet.py
index 6fe1e5b85f..83fc25f68c 100644
--- a/tests/test_models/test_middle_encoders/test_sparse_unet.py
+++ b/tests/test_models/test_middle_encoders/test_sparse_unet.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.models.layers import SparseBasicBlock
 from mmdet3d.models.layers.spconv import IS_SPCONV2_AVAILABLE
@@ -11,11 +12,28 @@
     from mmcv.ops import SparseConv3d, SparseInverseConv3d, SubMConv3d
 
 
-def test_SparseUNet():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_SparseUNet(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     from mmdet3d.models.middle_encoders.sparse_unet import SparseUNet
-    self = SparseUNet(in_channels=4, sparse_shape=[41, 1600, 1408]).cuda()
+    self = SparseUNet(in_channels=4, sparse_shape=[41, 1600, 1408]).to(device)
 
     # test encoder layers
     assert len(self.encoder_layers) == 4
@@ -42,11 +60,11 @@ def test_SparseUNet():
          [6.8162713, -2.480431, -1.3616394, 0.36],
          [11.643568, -4.744306, -1.3580885, 0.16],
          [23.482342, 6.5036807, 0.5806964, 0.35]],
-        dtype=torch.float32).cuda()  # n, point_features
+        dtype=torch.float32).to(device)  # n, point_features
     coordinates = torch.tensor(
         [[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
          [1, 35, 930, 469]],
-        dtype=torch.int32).cuda()  # n, 4(batch, ind_x, ind_y, ind_z)
+        dtype=torch.int32).to(device)  # n, 4(batch, ind_x, ind_y, ind_z)
 
     unet_ret_dict = self.forward(voxel_features, coordinates, 2)
     seg_features = unet_ret_dict['seg_features']
diff --git a/tests/test_models/test_necks/test_dla_neck.py b/tests/test_models/test_necks/test_dla_neck.py
index 3bde3e1870..f48eb0d9a2 100644
--- a/tests/test_models/test_necks/test_dla_neck.py
+++ b/tests/test_models/test_necks/test_dla_neck.py
@@ -1,17 +1,36 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
 import torch
+import pytest
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_dla_neck():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_dla_neck(device: str):
 
     s = 32
     in_channels = [16, 32, 64, 128, 256, 512]
     feat_sizes = [s // 2**i for i in range(6)]  # [32, 16, 8, 4, 2, 1]
 
-    if torch.cuda.is_available():
+    if is_cuda_available() or is_musa_available():
         # Test DLA Neck with DCNv2 on GPU
         neck_cfg = dict(
             type='DLANeck',
@@ -21,9 +40,9 @@ def test_dla_neck():
             norm_cfg=dict(type='GN', num_groups=32))
         neck = MODELS.build(neck_cfg)
         neck.init_weights()
-        neck.cuda()
+        neck.to(device)
         feats = [
-            torch.rand(4, in_channels[i], feat_sizes[i], feat_sizes[i]).cuda()
+            torch.rand(4, in_channels[i], feat_sizes[i], feat_sizes[i]).to(device)
             for i in range(len(in_channels))
         ]
         outputs = neck(feats)
diff --git a/tests/test_models/test_necks/test_imvoxel_neck.py b/tests/test_models/test_necks/test_imvoxel_neck.py
index 9d3a071d78..757c16c821 100644
--- a/tests/test_models/test_necks/test_imvoxel_neck.py
+++ b/tests/test_models/test_necks/test_imvoxel_neck.py
@@ -1,16 +1,34 @@
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_imvoxel_neck():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_imvoxel_neck(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
 
     neck_cfg = dict(
         type='OutdoorImVoxelNeck', in_channels=64, out_channels=256)
-    neck = MODELS.build(neck_cfg).cuda()
-    inputs = torch.rand([1, 64, 216, 248, 12], device='cuda')
+    neck = MODELS.build(neck_cfg).to(device)
+    inputs = torch.rand([1, 64, 216, 248, 12], device=device)
     outputs = neck(inputs)
     assert outputs[0].shape == (1, 256, 248, 216)
diff --git a/tests/test_models/test_necks/test_pointnet2_fp_neck.py b/tests/test_models/test_necks/test_pointnet2_fp_neck.py
index cf5df0b3e8..587de9b1fb 100644
--- a/tests/test_models/test_necks/test_pointnet2_fp_neck.py
+++ b/tests/test_models/test_necks/test_pointnet2_fp_neck.py
@@ -1,11 +1,29 @@
 import pytest
 import torch
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_pointnet2_fp_neck():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_pointnet2_fp_neck(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
 
     xyzs = [16384, 4096, 1024, 256, 64]
@@ -25,10 +43,10 @@ def test_pointnet2_fp_neck():
     neck = MODELS.build(neck_cfg)
     neck.init_weights()
 
-    if torch.cuda.is_available():
-        sa_xyz = [x.cuda() for x in sa_xyz]
-        sa_features = [x.cuda() for x in sa_features]
-        neck.cuda()
+    if is_cuda_available() or is_musa_available():
+        sa_xyz = [x.to(device) for x in sa_xyz]
+        sa_features = [x.to(device) for x in sa_features]
+        neck.to(device)
 
     feats_sa = {'sa_xyz': sa_xyz, 'sa_features': sa_features}
     outputs = neck(feats_sa)
diff --git a/tests/test_models/test_segmentors/test_cylinder3d.py b/tests/test_models/test_segmentors/test_cylinder3d.py
index c6e6a0b496..7ec3ec6915 100644
--- a/tests/test_models/test_segmentors/test_cylinder3d.py
+++ b/tests/test_models/test_segmentors/test_cylinder3d.py
@@ -1,17 +1,27 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import unittest
+import pytest
 
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestCylinder3D(unittest.TestCase):
 
-    def test_cylinder3d(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_cylinder3d(self, _, device):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'Cylinder3D')
@@ -27,12 +37,15 @@ def test_cylinder3d(self):
             num_classes=1,
             with_pts_semantic_mask=True)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('pts_semantic_mask', results[0].pred_pts_seg)
diff --git a/tests/test_models/test_segmentors/test_minkunet.py b/tests/test_models/test_segmentors/test_minkunet.py
index 16312c293e..a8ab623ab8 100644
--- a/tests/test_models/test_segmentors/test_minkunet.py
+++ b/tests/test_models/test_segmentors/test_minkunet.py
@@ -4,15 +4,24 @@
 import pytest
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.registry import MODELS
 from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
                              setup_seed)
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestMinkUNet(unittest.TestCase):
 
-    def test_minkunet(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_minkunet(self, _, device: str):
         try:
             import torchsparse  # noqa
         except ImportError:
@@ -31,12 +40,15 @@ def test_minkunet(self):
             num_classes=19,
             with_pts_semantic_mask=True)
 
-        if torch.cuda.is_available():
-            model = model.cuda()
+        if is_cuda_available() or is_musa_available():
+            model = model.to(device)
             # test simple_test
             with torch.no_grad():
                 data = model.data_preprocessor(packed_inputs, True)
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 results = model.forward(**data, mode='predict')
             self.assertEqual(len(results), 1)
             self.assertIn('pts_semantic_mask', results[0].pred_pts_seg)
diff --git a/tests/test_models/test_segmentors/test_seg3d_tta_model.py b/tests/test_models/test_segmentors/test_seg3d_tta_model.py
index 24bfa225f3..fffd9bb9e4 100644
--- a/tests/test_models/test_segmentors/test_seg3d_tta_model.py
+++ b/tests/test_models/test_segmentors/test_seg3d_tta_model.py
@@ -1,18 +1,28 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
+import unittest
 
 import torch
 from mmengine import ConfigDict, DefaultScope
+from mmengine.device import is_musa_available, is_cuda_available
+from parameterized import parameterized
 
 from mmdet3d.models import Seg3DTTAModel
 from mmdet3d.registry import MODELS
 from mmdet3d.structures import Det3DDataSample
 from mmdet3d.testing import get_detector_cfg
+from mmdet3d.testing.model_utils import AVAILABLE_DEVICES
 
 
 class TestSeg3DTTAModel(TestCase):
 
-    def test_seg3d_tta_model(self):
+    @classmethod
+    def setUpClass(cls):
+        if not (is_cuda_available() or is_musa_available()):
+            raise unittest.SkipTest("requires CUDA or MUSA support")
+
+    @parameterized.expand(AVAILABLE_DEVICES)
+    def test_seg3d_tta_model(self, _, device: str):
         import mmdet3d.models
 
         assert hasattr(mmdet3d.models, 'Cylinder3D')
@@ -35,6 +45,6 @@ def test_seg3d_tta_model(self):
                         pcd_horizontal_flip=pcd_horizontal_flip_list[i],
                         pcd_vertical_flip=pcd_vertical_flip_list[i]))
             ])
-        if torch.cuda.is_available():
-            model.eval().cuda()
+        if is_cuda_available() or is_musa_available():
+            model.eval().to(device)
             model.test_step(dict(inputs=points, data_samples=data_samples))
diff --git a/tests/test_models/test_task_modules/test_anchor/test_anchor_3d_generator.py b/tests/test_models/test_task_modules/test_anchor/test_anchor_3d_generator.py
index 294aa93f13..dabe571419 100644
--- a/tests/test_models/test_task_modules/test_anchor/test_anchor_3d_generator.py
+++ b/tests/test_models/test_task_modules/test_anchor/test_anchor_3d_generator.py
@@ -7,6 +7,7 @@
 """
 import torch
 from mmengine import DefaultScope
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.registry import TASK_UTILS
 
@@ -19,8 +20,10 @@ def test_anchor_3d_range_generator():
     DefaultScope.get_instance(
         'test_ancho3drange_generator', scope_name='mmdet3d')
 
-    if torch.cuda.is_available():
+    if is_cuda_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = "musa"
     else:
         device = 'cpu'
     anchor_generator_cfg = dict(
@@ -60,8 +63,10 @@ def test_aligned_anchor_generator():
     DefaultScope.get_instance(
         'test_aligned_ancho3drange_generator', scope_name='mmdet3d')
 
-    if torch.cuda.is_available():
+    if is_cuda_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = "musa"
     else:
         device = 'cpu'
 
@@ -209,8 +214,10 @@ def test_aligned_anchor_generator_per_cls():
     DefaultScope.get_instance(
         'test_ancho3drange_generator_percls', scope_name='mmdet3d')
 
-    if torch.cuda.is_available():
+    if is_cuda_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = "musa"
     else:
         device = 'cpu'
 
diff --git a/tests/test_models/test_task_modules/test_samplers/test_iou_piecewise_sampler.py b/tests/test_models/test_task_modules/test_samplers/test_iou_piecewise_sampler.py
index b996364908..ea2be8f019 100644
--- a/tests/test_models/test_task_modules/test_samplers/test_iou_piecewise_sampler.py
+++ b/tests/test_models/test_task_modules/test_samplers/test_iou_piecewise_sampler.py
@@ -2,13 +2,31 @@
 import pytest
 import torch
 from mmengine.structures import InstanceData
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.models.task_modules import IoUNegPiecewiseSampler
 from mmdet3d.models.task_modules.assigners import Max3DIoUAssigner
 
 
-def test_iou_piecewise_sampler():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_iou_piecewise_sampler(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip()
     assigner = Max3DIoUAssigner(
         pos_iou_thr=0.55,
@@ -23,11 +41,11 @@ def test_iou_piecewise_sampler():
          [5, 5, 5, 15, 15, 15, 0.7], [5, 5, 5, 15, 15, 15, 0.7],
          [5, 5, 5, 15, 15, 15, 0.7], [32, 32, 16, 8, 38, 42, -0.3],
          [32, 32, 16, 8, 38, 42, -0.3], [32, 32, 16, 8, 38, 42, -0.3]],
-        dtype=torch.float32).cuda()
+        dtype=torch.float32).to(device)
     gt_bboxes = torch.tensor(
         [[0, 0, 0, 10, 10, 9, 0.2], [5, 10, 10, 20, 20, 15, 0.6]],
-        dtype=torch.float32).cuda()
-    gt_labels = torch.tensor([1, 1], dtype=torch.int64).cuda()
+        dtype=torch.float32).to(device)
+    gt_labels = torch.tensor([1, 1], dtype=torch.int64).to(device)
     gt_instanses = InstanceData()
     gt_instanses.bboxes_3d = gt_bboxes
     gt_instanses.labels_3d = gt_labels
diff --git a/tests/test_models/test_voxel_encoders/test_pillar_encoder.py b/tests/test_models/test_voxel_encoders/test_pillar_encoder.py
index 8f1f619f80..d587d53f12 100644
--- a/tests/test_models/test_voxel_encoders/test_pillar_encoder.py
+++ b/tests/test_models/test_voxel_encoders/test_pillar_encoder.py
@@ -1,12 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from mmengine.device import is_cuda_available, is_musa_available
 
 from mmdet3d.registry import MODELS
 
 
 def test_pillar_feature_net():
-    if not torch.cuda.is_available():
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     pillar_feature_net_cfg = dict(
         type='PillarFeatureNet',
diff --git a/tests/test_models/test_voxel_encoders/test_voxel_encoders.py b/tests/test_models/test_voxel_encoders/test_voxel_encoders.py
index f5eb630ab1..cd51c2cb2e 100644
--- a/tests/test_models/test_voxel_encoders/test_voxel_encoders.py
+++ b/tests/test_models/test_voxel_encoders/test_voxel_encoders.py
@@ -2,12 +2,30 @@
 import pytest
 import torch
 import torch.nn.functional as F
+from mmengine.device import is_musa_available, is_cuda_available
 
 from mmdet3d.registry import MODELS
 
 
-def test_hard_simple_VFE():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_hard_simple_VFE(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     hard_simple_VFE_cfg = dict(type='HardSimpleVFE', num_features=5)
     hard_simple_VFE = MODELS.build(hard_simple_VFE_cfg)
@@ -18,8 +36,25 @@ def test_hard_simple_VFE():
     assert outputs.shape == torch.Size([240000, 5])
 
 
-def test_seg_VFE():
-    if not torch.cuda.is_available():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_seg_VFE(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     seg_VFE_cfg = dict(
         type='SegVFE',
@@ -29,14 +64,14 @@ def test_seg_VFE():
         feat_compression=16,
         return_point_feats=True)
     seg_VFE = MODELS.build(seg_VFE_cfg)
-    seg_VFE = seg_VFE.cuda()
-    features = torch.rand([240000, 6]).cuda()
+    seg_VFE = seg_VFE.to(device)
+    features = torch.rand([240000, 6]).to(device)
     coors = []
     for i in range(4):
         coor = torch.randint(0, 10, (60000, 3))
         coor = F.pad(coor, (1, 0), mode='constant', value=i)
         coors.append(coor)
-    coors = torch.cat(coors, dim=0).cuda()
+    coors = torch.cat(coors, dim=0).to(device)
     out_features, out_coors, out_point_features = seg_VFE(features, coors)
     assert out_features.shape[0] == out_coors.shape[0]
     assert len(out_point_features) == 4
diff --git a/tests/test_structures/test_bbox/test_box3d.py b/tests/test_structures/test_bbox/test_box3d.py
index 1fb1893116..3bcb07a4ae 100644
--- a/tests/test_structures/test_bbox/test_box3d.py
+++ b/tests/test_structures/test_bbox/test_box3d.py
@@ -13,6 +13,7 @@
                                               points_cam2img,
                                               rotation_3d_in_axis, xywhr2xyxyr)
 from mmdet3d.structures.points import CameraPoints, DepthPoints, LiDARPoints
+from mmengine.device import is_musa_available, is_cuda_available
 
 
 def test_bbox3d_mapping_back():
@@ -1118,14 +1119,31 @@ def test_camera_boxes3d():
     assert torch.allclose(boxes_origin_given.tensor, expected_tensor)
 
 
-def test_boxes3d_overlaps():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_boxes3d_overlaps(device: str):
     """Test the iou calculation of boxes in different modes.
 
     CommandLine:
         xdoctest tests/test_box3d.py::test_boxes3d_overlaps zero
     """
-    if not torch.cuda.is_available():
-        pytest.skip('test requires GPU and torch+cuda')
+    if not is_cuda_available() and not is_musa_available():
+        pytest.skip('test requires GPU and torch+cuda or torch+musa')
 
     # Test LiDAR boxes 3D overlaps
     boxes1_tensor = torch.tensor(
@@ -1133,20 +1151,20 @@ def test_boxes3d_overlaps():
          [8.9, -2.5, -1.6, 1.54, 4.01, 1.57, -1.5215927],
          [28.3, 0.5, -1.3, 1.47, 2.23, 1.48, -4.7115927],
          [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, -0.35]],
-        device='cuda')
+        device=device)
     boxes1 = LiDARInstance3DBoxes(boxes1_tensor)
 
     boxes2_tensor = torch.tensor([[1.2, -3.0, -1.9, 1.8, 3.4, 1.7, -1.9],
                                   [8.1, -2.9, -1.8, 1.5, 4.1, 1.6, -1.8],
                                   [31.3, -8.2, -1.6, 1.74, 3.77, 1.48, -0.35],
                                   [20.1, -28.5, -1.9, 1.6, 3.5, 1.4, -5.1]],
-                                 device='cuda')
+                                 device=device)
     boxes2 = LiDARInstance3DBoxes(boxes2_tensor)
 
     expected_iou_tensor = torch.tensor(
         [[0.3710, 0.0000, 0.0000, 0.0000], [0.0000, 0.3322, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 1.0000, 0.0000]],
-        device='cuda')
+        device=device)
     overlaps_3d_iou = boxes1.overlaps(boxes1, boxes2)
     assert torch.allclose(
         expected_iou_tensor, overlaps_3d_iou, rtol=1e-4, atol=1e-7)
@@ -1154,7 +1172,7 @@ def test_boxes3d_overlaps():
     expected_iof_tensor = torch.tensor(
         [[0.5582, 0.0000, 0.0000, 0.0000], [0.0000, 0.5025, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000], [0.0000, 0.0000, 1.0000, 0.0000]],
-        device='cuda')
+        device=device)
     overlaps_3d_iof = boxes1.overlaps(boxes1, boxes2, mode='iof')
     assert torch.allclose(
         expected_iof_tensor, overlaps_3d_iof, rtol=1e-4, atol=1e-7)
@@ -1186,7 +1204,24 @@ def test_boxes3d_overlaps():
         boxes1.overlaps(cam_boxes1, boxes1)
 
 
-def test_depth_boxes3d():
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_depth_boxes3d(device: str):
     # test empty initialization
     empty_boxes = []
     boxes = DepthInstance3DBoxes(empty_boxes)
@@ -1429,11 +1464,11 @@ def test_depth_boxes3d():
     assert torch.allclose(boxes.corners, expected_tensor, 1e-3)
 
     # test points in boxes
-    if torch.cuda.is_available():
-        box_idxs_of_pts = boxes.points_in_boxes_all(points.cuda())
+    if is_cuda_available() or is_musa_available():
+        box_idxs_of_pts = boxes.points_in_boxes_all(points.to(device))
         expected_idxs_of_pts = torch.tensor(
             [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
-            device='cuda:0',
+            device=device,
             dtype=torch.int32)
         assert torch.all(box_idxs_of_pts == expected_idxs_of_pts)
 
@@ -1679,26 +1714,44 @@ def test_points_cam2img():
     assert torch.allclose(point_2d_res, expected_point_2d_res, 1e-3)
 
 
-def test_points_in_boxes():
-    if not torch.cuda.is_available():
+
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param(
+            "cuda",
+            marks=pytest.mark.skipif(
+                not is_cuda_available(), reason="requires CUDA support"
+            ),
+        ),
+        pytest.param(
+            "musa",
+            marks=pytest.mark.skipif(
+                not is_musa_available(), reason="requires MUSA support"
+            ),
+        ),
+    ],
+)
+def test_points_in_boxes(device: str):
+    if not is_cuda_available() and not is_musa_available():
         pytest.skip('test requires GPU and torch+cuda')
     lidar_pts = torch.tensor([[1.0, 4.3, 0.1], [1.0, 4.4,
                                                 0.1], [1.1, 4.3, 0.1],
                               [0.9, 4.3, 0.1], [1.0, -0.3, 0.1],
                               [1.0, -0.4, 0.1], [2.9, 0.1, 6.0],
-                              [-0.9, 3.9, 6.0]]).cuda()
+                              [-0.9, 3.9, 6.0]]).to(device)
     lidar_boxes = torch.tensor([[1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 6],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 2],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 7 * np.pi / 6],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, -np.pi / 6]],
-                               dtype=torch.float32).cuda()
+                               dtype=torch.float32).to(device)
     lidar_boxes = LiDARInstance3DBoxes(lidar_boxes)
 
     point_indices = lidar_boxes.points_in_boxes_all(lidar_pts)
     expected_point_indices = torch.tensor(
         [[1, 0, 1, 1], [0, 0, 0, 0], [1, 0, 1, 0], [0, 0, 0, 1], [1, 0, 1, 1],
          [0, 0, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0]],
-        dtype=torch.int32).cuda()
+        dtype=torch.int32).to(device)
     assert point_indices.shape == torch.Size([8, 4])
     assert (point_indices == expected_point_indices).all()
 
@@ -1706,23 +1759,23 @@ def test_points_in_boxes():
                                                 0.1], [1.1, 4.3, 0.1],
                               [0.9, 4.3, 0.1], [1.0, -0.3, 0.1],
                               [1.0, -0.4, 0.1], [2.9, 0.1, 6.0],
-                              [-0.9, 3.9, 6.0]]).cuda()
+                              [-0.9, 3.9, 6.0]]).to(device)
     lidar_boxes = torch.tensor([[1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 6],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 2],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 7 * np.pi / 6],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, -np.pi / 6]],
-                               dtype=torch.float32).cuda()
+                               dtype=torch.float32).to(device)
     lidar_boxes = LiDARInstance3DBoxes(lidar_boxes)
 
     point_indices = lidar_boxes.points_in_boxes_part(lidar_pts)
     expected_point_indices = torch.tensor([0, -1, 0, 3, 0, -1, 1, 1],
-                                          dtype=torch.int32).cuda()
+                                          dtype=torch.int32).to(device)
     assert point_indices.shape == torch.Size([8])
     assert (point_indices == expected_point_indices).all()
 
     depth_boxes = torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3],
                                 [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]],
-                               dtype=torch.float32).cuda()
+                               dtype=torch.float32).to(device)
     depth_boxes = DepthInstance3DBoxes(depth_boxes)
     depth_pts = torch.tensor(
         [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
@@ -1730,20 +1783,20 @@ def test_points_in_boxes():
           [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [
               -16, -18, 9
           ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]],
-        dtype=torch.float32).cuda()
+        dtype=torch.float32).to(device)
 
     point_indices = depth_boxes.points_in_boxes_all(depth_pts)
     expected_point_indices = torch.tensor(
         [[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0],
          [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
-        dtype=torch.int32).cuda()
+        dtype=torch.int32).to(device)
     assert point_indices.shape == torch.Size([15, 2])
     assert (point_indices == expected_point_indices).all()
 
     point_indices = depth_boxes.points_in_boxes_part(depth_pts)
     expected_point_indices = torch.tensor(
         [0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
-        dtype=torch.int32).cuda()
+        dtype=torch.int32).to(device)
     assert point_indices.shape == torch.Size([15])
     assert (point_indices == expected_point_indices).all()
 
@@ -1753,7 +1806,7 @@ def test_points_in_boxes():
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 2],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 7 * np.pi / 6],
                                 [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, -np.pi / 6]],
-                               dtype=torch.float32).cuda()
+                               dtype=torch.float32).to(device)
     cam_boxes = DepthInstance3DBoxes(depth_boxes).convert_to(Box3DMode.CAM)
     depth_pts = torch.tensor(
         [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
@@ -1762,7 +1815,7 @@ def test_points_in_boxes():
          [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4], [1.0, 4.3, 0.1],
          [1.0, 4.4, 0.1], [1.1, 4.3, 0.1], [0.9, 4.3, 0.1], [1.0, -0.3, 0.1],
          [1.0, -0.4, 0.1], [2.9, 0.1, 6.0], [-0.9, 3.9, 6.0]],
-        dtype=torch.float32).cuda()
+        dtype=torch.float32).to(device)
 
     cam_pts = DepthPoints(depth_pts).convert_to(Coord3DMode.CAM).tensor
 
@@ -1776,7 +1829,7 @@ def test_points_in_boxes():
          [0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0],
          [0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0],
          [1, 0, 0, 1, 0, 0], [1, 0, 0, 1, 0, 0]],
-        dtype=torch.int32).cuda()
+        dtype=torch.int32).to(device)
     assert point_indices.shape == torch.Size([23, 6])
     assert (point_indices == expected_point_indices).all()
 
@@ -1788,7 +1841,7 @@ def test_points_in_boxes():
         0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1, 2, -1, 2, 5, 2,
         -1, 0, 0
     ],
-                                          dtype=torch.int32).cuda()
+                                          dtype=torch.int32).to(device)
     assert point_indices.shape == torch.Size([23])
     assert (point_indices == expected_point_indices).all()