open-edge-platform · kprokofi · Nov 12, 2025 · Nov 12, 2025 · Nov 13, 2025 · Nov 18, 2025
@@ -41,6 +41,12 @@ class OTXHlabelClsModel(OTXModel):
         data_input_params (DataInputParams | None, optional): Parameters for image data preprocessing. If None is given,
             default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "hlabel_classification_model".
+        apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
+            It is recommended to use GPU transforms. Defaults to True.
+        batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used.
+        batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
         Defaults to DefaultSchedulerCallable.
@@ -55,6 +61,9 @@ def __init__(
         label_info: HLabelInfo,
         data_input_params: DataInputParams | None = None,
         model_name: str = "hlabel_classification_model",
+        apply_gpu_transforms: bool = True,
+        batch_train_transforms: AugmentationSequential | Compose | None = None,
+        batch_val_transforms: AugmentationSequential | Compose | None = None,
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,

@@ -9,6 +9,7 @@
 
 import torch
 from torch import Tensor
+import kornia
 
 from otx.backend.native.exporter.base import OTXModelExporter
 from otx.backend.native.exporter.native import OTXNativeModelExporter
@@ -23,6 +24,10 @@
 from otx.types.export import TaskLevelExportParameters
 from otx.types.label import LabelInfoTypes
 from otx.types.task import OTXTaskType
+from kornia.augmentation.container import AugmentationSequential
+from kornia.augmentation import Normalize
+from kornia.augmentation.auto import AutoAugment
+from torchvision.transforms.v2 import Compose
 
 if TYPE_CHECKING:
     from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
@@ -40,6 +45,12 @@ class OTXMulticlassClsModel(OTXModel):
         data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
             If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "multiclass_classification_model".
+        apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
+            It is recommended to use GPU transforms. Defaults to True.
+        batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used.
+        batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
         Defaults to DefaultSchedulerCallable.
@@ -52,6 +63,9 @@ def __init__(
         label_info: LabelInfoTypes | int | Sequence,
         data_input_params: DataInputParams | None = None,
         model_name: str = "multiclass_classification_model",
+        apply_gpu_transforms: bool = True,
+        batch_train_transforms: AugmentationSequential | Compose | None = None,
+        batch_val_transforms: AugmentationSequential | Compose | None = None,
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -61,8 +75,10 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.MULTI_CLASS_CLS,
             model_name=model_name,
+            apply_gpu_transforms=apply_gpu_transforms,
+            batch_train_transforms=batch_train_transforms,
+            batch_val_transforms=batch_val_transforms,
             optimizer=optimizer,
             scheduler=scheduler,
             metric=metric,
@@ -121,6 +137,12 @@ def _customize_outputs(
             scores=list(scores),
         )
 
+    @property
+    def _default_train_transforms(self):
+        return AugmentationSequential(kornia.augmentation.RandomHorizontalFlip(),
+                                      kornia.augmentation.ColorJiggle(0.1, 0.1, 0.1, 0.1),
+                                      Normalize(self.data_input_params.mean, self.data_input_params.std))
+
     @property
     def _export_parameters(self) -> TaskLevelExportParameters:
         """Defines parameters required to export a particular model implementation."""
@@ -185,6 +207,10 @@ def forward_explain(self, inputs: OTXDataBatch) -> OTXPredBatch:
             feature_vector=[feature_vector.unsqueeze(0) for feature_vector in outputs["feature_vector"]],
         )
 
+    @property
+    def task(self) -> OTXTaskType:
+        return OTXTaskType.MULTI_CLASS_CLS
+
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(224, 224), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375))
+        return DataInputParams(input_size=(224, 224), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING, Literal
 
 from torch import Tensor, nn
+import kornia
 
 from otx.backend.native.models.base import DataInputParams, DefaultOptimizerCallable, DefaultSchedulerCallable
 from otx.backend.native.models.classification.backbones.efficientnet import EfficientNetBackbone
@@ -96,3 +97,21 @@ def forward_for_tracing(self, image: Tensor) -> Tensor | dict[str, Tensor]:
             return self.model(images=image, mode="explain")
 
         return self.model(images=image, mode="tensor")
+
+    @property
+    def transforms(self):
+        if self.training:
+            return kornia.augmentation.AugmentationSequential(
+                    # kornia.augmentation.RandomResizedCrop(self.data_input_params.input_size, scale=(0.08, 1.0)),
+                    kornia.augmentation.RandomAffine(degrees=10.0, translate=[0.1, 0.1], scale=[0.5,1.5], shear=2.0),
+                    kornia.augmentation.ColorJiggle(0.1, 0.1, 0.1, 0.1),
+                    kornia.augmentation.RandomHorizontalFlip(),
+                    kornia.augmentation.RandomGaussianBlur(5, (0.1, 2.0)),
+                    kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
+                    data_keys=["input"],
+                    same_on_batch=False
+            )
+        return kornia.augmentation.AugmentationSequential(
+            kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
+            data_keys=["input"],
+        )
@@ -38,6 +38,12 @@ class OTXMultilabelClsModel(OTXModel):
             if `Sequence` is given, label info will be constructed from the sequence of label names.
         data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
         model_name (str, optional): Name of the model. Defaults to "multilabel_classification_model".
+        apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
+            It is recommended to use GPU transforms. Defaults to True.
+        batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used.
+        batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
         Defaults to DefaultSchedulerCallable.
@@ -50,6 +56,9 @@ def __init__(
         label_info: LabelInfoTypes | Sequence,
         data_input_params: DataInputParams | None = None,
         model_name: str = "multiclass_classification_model",
+        apply_gpu_transforms: bool = True,
+        batch_train_transforms: AugmentationSequential | Compose | None = None,
+        batch_val_transforms: AugmentationSequential | Compose | None = None,
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -59,8 +68,10 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.MULTI_LABEL_CLS,
             model_name=model_name,
+            apply_gpu_transforms=apply_gpu_transforms,
+            batch_train_transforms=batch_train_transforms,
+            batch_val_transforms=batch_val_transforms,
             optimizer=optimizer,
             scheduler=scheduler,
             metric=metric,

@@ -6,6 +6,7 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, ClassVar, Literal
+import kornia as K
 
 from otx.backend.native.exporter.base import OTXModelExporter
 from otx.backend.native.exporter.native import OTXNativeModelExporter
@@ -203,4 +204,4 @@ def _exporter(self) -> OTXModelExporter:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(800, 992), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
+        return DataInputParams(input_size=(800, 992), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))
@@ -15,6 +15,9 @@
 import torch
 from torchmetrics import Metric, MetricCollection
 from torchvision import tv_tensors
+import kornia
+from kornia.geometry.boxes import Boxes
+from kornia.augmentation.container import AugmentationSequential
 
 from otx.backend.native.models.base import DataInputParams, DefaultOptimizerCallable, DefaultSchedulerCallable, OTXModel
 from otx.backend.native.models.utils.utils import InstanceData
@@ -71,6 +74,9 @@ def __init__(
         label_info: LabelInfoTypes | int | Sequence,
         data_input_params: DataInputParams | dict | None = None,
         model_name: str = "otx_detection_model",
+        apply_gpu_transforms: bool = True,
+        batch_train_transforms: AugmentationSequential | Compose | None = None,
+        batch_val_transforms: AugmentationSequential | Compose | None = None,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
         metric: MetricCallable = MeanAveragePrecisionFMeasureCallable,
@@ -80,9 +86,11 @@ def __init__(
     ) -> None:
         super().__init__(
             label_info=label_info,
-            model_name=model_name,
-            task=OTXTaskType.DETECTION,
             data_input_params=data_input_params,
+            model_name=model_name,
+            apply_gpu_transforms=apply_gpu_transforms,
+            batch_train_transforms=batch_train_transforms,
+            batch_val_transforms=batch_val_transforms,
             optimizer=optimizer,
             scheduler=scheduler,
             metric=metric,
@@ -187,7 +195,6 @@ def _customize_inputs(
 
         inputs["entity"] = entity
         inputs["mode"] = "loss" if self.training else "predict"
-
         return inputs
 
     def _customize_outputs(
@@ -546,6 +553,19 @@ def get_num_anchors(self) -> list[int]:
 
         return [1] * 10
 
+    @staticmethod
+    @torch.no_grad()
+    def _apply_batch_augmentations(augmentations_pipeline: AugmentationSequential | Compose | None, batch: OTXDataBatch) -> None:
+        if augmentations_pipeline is not None:
+            # Convert bounding boxes to Kornia Boxes [N, 4, 2]
+            kornia_boxes = Boxes.from_tensor(batch.bboxes, mode='xyxy')
+            batch.images, kornia_boxes = augmentations_pipeline(batch.images, kornia_boxes)
+            batch.bboxes = kornia_boxes.to_tensor(mode='xyxy')
+
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
+        return DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))
+
+    @property
+    def task(self) -> OTXTaskType:
+        return OTXTaskType.DETECTION
@@ -17,6 +17,7 @@
 from otx.backend.native.models.utils.utils import load_checkpoint
 from otx.config.data import TileConfig
 from otx.metrics.fmeasure import MeanAveragePrecisionFMeasureCallable
+import kornia
 
 if TYPE_CHECKING:
     from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
@@ -174,3 +175,17 @@ def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
             ckpt.pop("model.decoder.anchors")
             ckpt.pop("model.decoder.valid_mask")
             return super().load_state_dict(ckpt, *args, strict=False, **kwargs)
+
+    @property
+    def transforms(self):
+        if self.training:
+            return kornia.augmentation.AugmentationSequential(
+                    kornia.augmentation.RandomHorizontalFlip(),
+                    kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
+                    data_keys=["input", "bbox"],
+                    same_on_batch=False
+            )
+        return kornia.augmentation.AugmentationSequential(
+            kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
+            data_keys=["input", "bbox"],
+        )
@@ -18,6 +18,8 @@
 from torchmetrics import Metric, MetricCollection
 from torchvision import tv_tensors
 from torchvision.models.detection.image_list import ImageList
+from kornia.geometry.boxes import Boxes
+from kornia.augmentation.container import AugmentationSequential
 
 from otx.backend.native.models.base import DataInputParams, DefaultOptimizerCallable, DefaultSchedulerCallable, OTXModel
 from otx.backend.native.models.instance_segmentation.segmentors.maskrcnn_tv import MaskRCNN
@@ -61,6 +63,12 @@ class OTXInstanceSegModel(OTXModel):
         data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
             If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "inst_segm_model".
+        apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
+            It is recommended to use GPU transforms. Defaults to True.
+        batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used.
+        batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
+            If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
         optimizer (OptimizerCallable, optional): Optimizer for the model. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Scheduler for the model.
             Defaults to DefaultSchedulerCallable.
@@ -76,6 +84,9 @@ def __init__(
         label_info: LabelInfoTypes | int | Sequence,
         data_input_params: DataInputParams | None = None,
         model_name: str = "inst_segm_model",
+        apply_gpu_transforms: bool = True,
+        batch_train_transforms: AugmentationSequential | Compose | None = None,
+        batch_val_transforms: AugmentationSequential | Compose | None = None,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
         metric: MetricCallable = MaskRLEMeanAPFMeasureCallable,
@@ -85,8 +96,10 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.INSTANCE_SEGMENTATION,
             model_name=model_name,
+            apply_gpu_transforms=apply_gpu_transforms,
+            batch_train_transforms=batch_train_transforms,
+            batch_val_transforms=batch_val_transforms,
             optimizer=optimizer,
             scheduler=scheduler,
             metric=metric,
@@ -616,6 +629,22 @@ def _restore_model_forward(self) -> None:
         self.model.forward = func_type(self.original_model_forward, self.model)
         self.original_model_forward = None
 
+    @staticmethod
+    @torch.no_grad()
+    def _apply_batch_augmentations(augmentations_pipeline: AugmentationSequential | Compose | None, batch: OTXDataBatch) -> None:
+        if augmentations_pipeline is not None:
+            # Convert bounding boxes to Kornia Boxes [N, 4, 2]
+            kornia_boxes = Boxes.from_tensor(batch.bboxes, mode='xyxy')
+            breakpoint()
+            batch.images, kornia_boxes, masks = augmentations_pipeline(batch.images, kornia_boxes, batch.masks)
+            batch.bboxes = kornia_boxes.to_tensor(mode='xyxy')
+            breakpoint()
+            batch.masks = masks
+
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(1024, 1024), mean=(103.53, 116.28, 123.675), std=(57.375, 57.12, 58.395))
+        return DataInputParams(input_size=(1024, 1024), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
+
+    @property
+    def task(self) -> OTXTaskType:
+        return OTXTaskType.INSTANCE_SEGMENTATION