Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 155 additions & 44 deletions library/src/otx/backend/native/models/base.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ class OTXHlabelClsModel(OTXModel):
data_input_params (DataInputParams | None, optional): Parameters for image data preprocessing. If None is given,
default parameters for the specific model will be used.
model_name (str, optional): Name of the model. Defaults to "hlabel_classification_model".
apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
It is recommended to use GPU transforms. Defaults to True.
batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used.
batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
Defaults to DefaultSchedulerCallable.
Expand All @@ -55,6 +61,9 @@ def __init__(
label_info: HLabelInfo,
data_input_params: DataInputParams | None = None,
model_name: str = "hlabel_classification_model",
apply_gpu_transforms: bool = True,
batch_train_transforms: AugmentationSequential | Compose | None = None,
batch_val_transforms: AugmentationSequential | Compose | None = None,
freeze_backbone: bool = False,
optimizer: OptimizerCallable = DefaultOptimizerCallable,
scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import torch
from torch import Tensor
import kornia

from otx.backend.native.exporter.base import OTXModelExporter
from otx.backend.native.exporter.native import OTXNativeModelExporter
Expand All @@ -23,6 +24,10 @@
from otx.types.export import TaskLevelExportParameters
from otx.types.label import LabelInfoTypes
from otx.types.task import OTXTaskType
from kornia.augmentation.container import AugmentationSequential
from kornia.augmentation import Normalize
from kornia.augmentation.auto import AutoAugment
from torchvision.transforms.v2 import Compose

if TYPE_CHECKING:
from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
Expand All @@ -40,6 +45,12 @@ class OTXMulticlassClsModel(OTXModel):
data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
If None is given, default parameters for the specific model will be used.
model_name (str, optional): Name of the model. Defaults to "multiclass_classification_model".
apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
It is recommended to use GPU transforms. Defaults to True.
batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used.
batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
Defaults to DefaultSchedulerCallable.
Expand All @@ -52,6 +63,9 @@ def __init__(
label_info: LabelInfoTypes | int | Sequence,
data_input_params: DataInputParams | None = None,
model_name: str = "multiclass_classification_model",
apply_gpu_transforms: bool = True,
batch_train_transforms: AugmentationSequential | Compose | None = None,
batch_val_transforms: AugmentationSequential | Compose | None = None,
freeze_backbone: bool = False,
optimizer: OptimizerCallable = DefaultOptimizerCallable,
scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
Expand All @@ -61,8 +75,10 @@ def __init__(
super().__init__(
label_info=label_info,
data_input_params=data_input_params,
task=OTXTaskType.MULTI_CLASS_CLS,
model_name=model_name,
apply_gpu_transforms=apply_gpu_transforms,
batch_train_transforms=batch_train_transforms,
batch_val_transforms=batch_val_transforms,
optimizer=optimizer,
scheduler=scheduler,
metric=metric,
Expand Down Expand Up @@ -121,6 +137,12 @@ def _customize_outputs(
scores=list(scores),
)

@property
def _default_train_transforms(self):
return AugmentationSequential(kornia.augmentation.RandomHorizontalFlip(),
kornia.augmentation.ColorJiggle(0.1, 0.1, 0.1, 0.1),
Normalize(self.data_input_params.mean, self.data_input_params.std))

@property
def _export_parameters(self) -> TaskLevelExportParameters:
"""Defines parameters required to export a particular model implementation."""
Expand Down Expand Up @@ -185,6 +207,10 @@ def forward_explain(self, inputs: OTXDataBatch) -> OTXPredBatch:
feature_vector=[feature_vector.unsqueeze(0) for feature_vector in outputs["feature_vector"]],
)

@property
def task(self) -> OTXTaskType:
return OTXTaskType.MULTI_CLASS_CLS

@property
def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
return DataInputParams(input_size=(224, 224), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375))
return DataInputParams(input_size=(224, 224), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import TYPE_CHECKING, Literal

from torch import Tensor, nn
import kornia

from otx.backend.native.models.base import DataInputParams, DefaultOptimizerCallable, DefaultSchedulerCallable
from otx.backend.native.models.classification.backbones.efficientnet import EfficientNetBackbone
Expand Down Expand Up @@ -96,3 +97,21 @@ def forward_for_tracing(self, image: Tensor) -> Tensor | dict[str, Tensor]:
return self.model(images=image, mode="explain")

return self.model(images=image, mode="tensor")

@property
def transforms(self):
if self.training:
return kornia.augmentation.AugmentationSequential(
# kornia.augmentation.RandomResizedCrop(self.data_input_params.input_size, scale=(0.08, 1.0)),
kornia.augmentation.RandomAffine(degrees=10.0, translate=[0.1, 0.1], scale=[0.5,1.5], shear=2.0),
kornia.augmentation.ColorJiggle(0.1, 0.1, 0.1, 0.1),
kornia.augmentation.RandomHorizontalFlip(),
kornia.augmentation.RandomGaussianBlur(5, (0.1, 2.0)),
kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
data_keys=["input"],
same_on_batch=False
)
return kornia.augmentation.AugmentationSequential(
kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
data_keys=["input"],
)
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ class OTXMultilabelClsModel(OTXModel):
if `Sequence` is given, label info will be constructed from the sequence of label names.
data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
model_name (str, optional): Name of the model. Defaults to "multilabel_classification_model".
apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
It is recommended to use GPU transforms. Defaults to True.
batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used.
batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
Defaults to DefaultSchedulerCallable.
Expand All @@ -50,6 +56,9 @@ def __init__(
label_info: LabelInfoTypes | Sequence,
data_input_params: DataInputParams | None = None,
model_name: str = "multiclass_classification_model",
apply_gpu_transforms: bool = True,
batch_train_transforms: AugmentationSequential | Compose | None = None,
batch_val_transforms: AugmentationSequential | Compose | None = None,
freeze_backbone: bool = False,
optimizer: OptimizerCallable = DefaultOptimizerCallable,
scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
Expand All @@ -59,8 +68,10 @@ def __init__(
super().__init__(
label_info=label_info,
data_input_params=data_input_params,
task=OTXTaskType.MULTI_LABEL_CLS,
model_name=model_name,
apply_gpu_transforms=apply_gpu_transforms,
batch_train_transforms=batch_train_transforms,
batch_val_transforms=batch_val_transforms,
optimizer=optimizer,
scheduler=scheduler,
metric=metric,
Expand Down
3 changes: 2 additions & 1 deletion library/src/otx/backend/native/models/detection/atss.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING, ClassVar, Literal
import kornia as K

from otx.backend.native.exporter.base import OTXModelExporter
from otx.backend.native.exporter.native import OTXNativeModelExporter
Expand Down Expand Up @@ -203,4 +204,4 @@ def _exporter(self) -> OTXModelExporter:

@property
def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
return DataInputParams(input_size=(800, 992), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
return DataInputParams(input_size=(800, 992), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))
28 changes: 24 additions & 4 deletions library/src/otx/backend/native/models/detection/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
import torch
from torchmetrics import Metric, MetricCollection
from torchvision import tv_tensors
import kornia
from kornia.geometry.boxes import Boxes
from kornia.augmentation.container import AugmentationSequential

from otx.backend.native.models.base import DataInputParams, DefaultOptimizerCallable, DefaultSchedulerCallable, OTXModel
from otx.backend.native.models.utils.utils import InstanceData
Expand Down Expand Up @@ -71,6 +74,9 @@ def __init__(
label_info: LabelInfoTypes | int | Sequence,
data_input_params: DataInputParams | dict | None = None,
model_name: str = "otx_detection_model",
apply_gpu_transforms: bool = True,
batch_train_transforms: AugmentationSequential | Compose | None = None,
batch_val_transforms: AugmentationSequential | Compose | None = None,
optimizer: OptimizerCallable = DefaultOptimizerCallable,
scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
metric: MetricCallable = MeanAveragePrecisionFMeasureCallable,
Expand All @@ -80,9 +86,11 @@ def __init__(
) -> None:
super().__init__(
label_info=label_info,
model_name=model_name,
task=OTXTaskType.DETECTION,
data_input_params=data_input_params,
model_name=model_name,
apply_gpu_transforms=apply_gpu_transforms,
batch_train_transforms=batch_train_transforms,
batch_val_transforms=batch_val_transforms,
optimizer=optimizer,
scheduler=scheduler,
metric=metric,
Expand Down Expand Up @@ -187,7 +195,6 @@ def _customize_inputs(

inputs["entity"] = entity
inputs["mode"] = "loss" if self.training else "predict"

return inputs

def _customize_outputs(
Expand Down Expand Up @@ -546,6 +553,19 @@ def get_num_anchors(self) -> list[int]:

return [1] * 10

@staticmethod
@torch.no_grad()
def _apply_batch_augmentations(augmentations_pipeline: AugmentationSequential | Compose | None, batch: OTXDataBatch) -> None:
if augmentations_pipeline is not None:
# Convert bounding boxes to Kornia Boxes [N, 4, 2]
kornia_boxes = Boxes.from_tensor(batch.bboxes, mode='xyxy')
batch.images, kornia_boxes = augmentations_pipeline(batch.images, kornia_boxes)
batch.bboxes = kornia_boxes.to_tensor(mode='xyxy')

@property
def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
return DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
return DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))

@property
def task(self) -> OTXTaskType:
return OTXTaskType.DETECTION
15 changes: 15 additions & 0 deletions library/src/otx/backend/native/models/detection/d_fine.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from otx.backend.native.models.utils.utils import load_checkpoint
from otx.config.data import TileConfig
from otx.metrics.fmeasure import MeanAveragePrecisionFMeasureCallable
import kornia

if TYPE_CHECKING:
from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
Expand Down Expand Up @@ -174,3 +175,17 @@ def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
ckpt.pop("model.decoder.anchors")
ckpt.pop("model.decoder.valid_mask")
return super().load_state_dict(ckpt, *args, strict=False, **kwargs)

@property
def transforms(self):
if self.training:
return kornia.augmentation.AugmentationSequential(
kornia.augmentation.RandomHorizontalFlip(),
kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
data_keys=["input", "bbox"],
same_on_batch=False
)
return kornia.augmentation.AugmentationSequential(
kornia.augmentation.Normalize(self.data_input_params.mean, self.data_input_params.std),
data_keys=["input", "bbox"],
)
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from torchmetrics import Metric, MetricCollection
from torchvision import tv_tensors
from torchvision.models.detection.image_list import ImageList
from kornia.geometry.boxes import Boxes
from kornia.augmentation.container import AugmentationSequential

from otx.backend.native.models.base import DataInputParams, DefaultOptimizerCallable, DefaultSchedulerCallable, OTXModel
from otx.backend.native.models.instance_segmentation.segmentors.maskrcnn_tv import MaskRCNN
Expand Down Expand Up @@ -61,6 +63,12 @@ class OTXInstanceSegModel(OTXModel):
data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
If None is given, default parameters for the specific model will be used.
model_name (str, optional): Name of the model. Defaults to "inst_segm_model".
apply_gpu_transforms (bool, optional): Flag to indicate whether to apply GPU transforms.
It is recommended to use GPU transforms. Defaults to True.
batch_train_transforms (AugmentationSequential | Compose | None): GPU transforms for training applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used.
batch_val_transforms (AugmentationSequential | Compose | None): GPU transforms for validation / testing applied directly to the batch.
If None is given, default augmentation pipeline for the model will be used. Typically just normalization.
optimizer (OptimizerCallable, optional): Optimizer for the model. Defaults to DefaultOptimizerCallable.
scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Scheduler for the model.
Defaults to DefaultSchedulerCallable.
Expand All @@ -76,6 +84,9 @@ def __init__(
label_info: LabelInfoTypes | int | Sequence,
data_input_params: DataInputParams | None = None,
model_name: str = "inst_segm_model",
apply_gpu_transforms: bool = True,
batch_train_transforms: AugmentationSequential | Compose | None = None,
batch_val_transforms: AugmentationSequential | Compose | None = None,
optimizer: OptimizerCallable = DefaultOptimizerCallable,
scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
metric: MetricCallable = MaskRLEMeanAPFMeasureCallable,
Expand All @@ -85,8 +96,10 @@ def __init__(
super().__init__(
label_info=label_info,
data_input_params=data_input_params,
task=OTXTaskType.INSTANCE_SEGMENTATION,
model_name=model_name,
apply_gpu_transforms=apply_gpu_transforms,
batch_train_transforms=batch_train_transforms,
batch_val_transforms=batch_val_transforms,
optimizer=optimizer,
scheduler=scheduler,
metric=metric,
Expand Down Expand Up @@ -616,6 +629,22 @@ def _restore_model_forward(self) -> None:
self.model.forward = func_type(self.original_model_forward, self.model)
self.original_model_forward = None

@staticmethod
@torch.no_grad()
def _apply_batch_augmentations(augmentations_pipeline: AugmentationSequential | Compose | None, batch: OTXDataBatch) -> None:
if augmentations_pipeline is not None:
# Convert bounding boxes to Kornia Boxes [N, 4, 2]
kornia_boxes = Boxes.from_tensor(batch.bboxes, mode='xyxy')
breakpoint()
batch.images, kornia_boxes, masks = augmentations_pipeline(batch.images, kornia_boxes, batch.masks)
batch.bboxes = kornia_boxes.to_tensor(mode='xyxy')
breakpoint()
batch.masks = masks

@property
def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
return DataInputParams(input_size=(1024, 1024), mean=(103.53, 116.28, 123.675), std=(57.375, 57.12, 58.395))
return DataInputParams(input_size=(1024, 1024), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

@property
def task(self) -> OTXTaskType:
return OTXTaskType.INSTANCE_SEGMENTATION
Loading
Loading