From 84379b59041d0554253130b2b6999124bd4bae9e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 27 Jun 2025 10:41:07 +0100 Subject: [PATCH 1/7] Add clamping_mode parameter to BoundingBoxes constructor --- torchvision/tv_tensors/_bounding_boxes.py | 27 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index e661eaf8d73..15fa37252c8 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -3,7 +3,7 @@ from collections.abc import Mapping, Sequence from enum import Enum -from typing import Any +from typing import Any, Literal import torch from torch.utils._pytree import tree_flatten @@ -46,6 +46,9 @@ def is_rotated_bounding_format(format: BoundingBoxFormat) -> bool: ) +CLAMPING_MODE_TYPE = Literal["hard", "soft", "none"] + + class BoundingBoxes(TVTensor): """:class:`torch.Tensor` subclass for bounding boxes with shape ``[N, K]``. @@ -72,9 +75,10 @@ class BoundingBoxes(TVTensor): format: BoundingBoxFormat canvas_size: tuple[int, int] + clamping_mode: CLAMPING_MODE_T @classmethod - def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat | str, canvas_size: tuple[int, int], check_dims: bool = True) -> BoundingBoxes: # type: ignore[override] + def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat | str, canvas_size: tuple[int, int], clamping_mode: CLAMPING_MODE_TYPE = "soft", check_dims: bool = True) -> BoundingBoxes: # type: ignore[override] if check_dims: if tensor.ndim == 1: tensor = tensor.unsqueeze(0) @@ -85,6 +89,7 @@ def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat | str, canvas_ bounding_boxes = tensor.as_subclass(cls) bounding_boxes.format = format bounding_boxes.canvas_size = canvas_size + bounding_boxes.clamping_mode = clamping_mode return bounding_boxes def __new__( @@ -93,6 +98,7 @@ def __new__( *, format: BoundingBoxFormat | str, canvas_size: tuple[int, int], + clamping_mode: CLAMPING_MODE_TYPE = "soft", dtype: torch.dtype | None = None, device: torch.device | str | int | None = None, requires_grad: bool | None = None, @@ -114,16 +120,25 @@ def _wrap_output( # something like some_xyxy_bbox + some_xywh_bbox; we don't guard against those cases. flat_params, _ = tree_flatten(args + (tuple(kwargs.values()) if kwargs else ())) # type: ignore[operator] first_bbox_from_args = next(x for x in flat_params if isinstance(x, BoundingBoxes)) - format, canvas_size = first_bbox_from_args.format, first_bbox_from_args.canvas_size + format, canvas_size, clamping_mode = ( + first_bbox_from_args.format, + first_bbox_from_args.canvas_size, + first_bbox_from_args.clamping_mode, + ) if isinstance(output, torch.Tensor) and not isinstance(output, BoundingBoxes): - output = BoundingBoxes._wrap(output, format=format, canvas_size=canvas_size, check_dims=False) + output = BoundingBoxes._wrap( + output, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode, check_dims=False + ) elif isinstance(output, (tuple, list)): # This branch exists for chunk() and unbind() output = type(output)( - BoundingBoxes._wrap(part, format=format, canvas_size=canvas_size, check_dims=False) for part in output + BoundingBoxes._wrap( + part, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode, check_dims=False + ) + for part in output ) return output def __repr__(self, *, tensor_contents: Any = None) -> str: # type: ignore[override] - return self._make_repr(format=self.format, canvas_size=self.canvas_size) + return self._make_repr(format=self.format, canvas_size=self.canvas_size, clamping_mode=self.clamping_mode) From da7f360b125c478b4fdecf45aef4df0aa7d3ff83 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 27 Jun 2025 13:28:34 +0100 Subject: [PATCH 2/7] Add clamping_mode parameter to clamp_bounding_boxes functional and class --- test/common_utils.py | 3 +- test/test_transforms_v2.py | 52 +++++++++++++++++-- torchvision/transforms/v2/_meta.py | 9 +++- torchvision/transforms/v2/functional/_meta.py | 27 +++++++--- torchvision/tv_tensors/_bounding_boxes.py | 15 ++++-- 5 files changed, 88 insertions(+), 18 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 9da3cf52d1c..1461d1adff2 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -410,6 +410,7 @@ def make_bounding_boxes( canvas_size=DEFAULT_SIZE, *, format=tv_tensors.BoundingBoxFormat.XYXY, + clamping_mode="soft", num_boxes=1, dtype=None, device="cpu", @@ -474,7 +475,7 @@ def sample_position(values, max_value): # numerical issues during the testing buffer = 4 out_boxes = clamp_bounding_boxes( - out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer) + out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer), clamping_mode=clamping_mode ) if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR: out_boxes[:, :2] += buffer // 2 diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 7e667586ac1..75782cf2a7d 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -5506,20 +5506,23 @@ def test_correctness_image(self, mean, std, dtype, fn): class TestClampBoundingBoxes: @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("clamping_mode", ("hard", "none")) # TODOBB add soft @pytest.mark.parametrize("dtype", [torch.int64, torch.float32]) @pytest.mark.parametrize("device", cpu_and_cuda()) - def test_kernel(self, format, dtype, device): - bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) + def test_kernel(self, format, clamping_mode, dtype, device): + bounding_boxes = make_bounding_boxes(format=format, clamping_mode=clamping_mode, dtype=dtype, device=device) check_kernel( F.clamp_bounding_boxes, bounding_boxes, format=bounding_boxes.format, canvas_size=bounding_boxes.canvas_size, + clamping_mode=clamping_mode, ) @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) - def test_functional(self, format): - check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format)) + @pytest.mark.parametrize("clamping_mode", ("hard", "none")) # TODOBB add soft + def test_functional(self, format, clamping_mode): + check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format, clamping_mode=clamping_mode)) def test_errors(self): input_tv_tensor = make_bounding_boxes() @@ -5540,6 +5543,47 @@ def test_errors(self): def test_transform(self): check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes()) + + @pytest.mark.parametrize("rotated", (True, False)) + @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none")) + @pytest.mark.parametrize("clamping_mode", ("hard", "none", None)) # TODOBB add soft here. + @pytest.mark.parametrize("pass_pure_tensor", (True, False)) + @pytest.mark.parametrize("fn", [F.clamp_bounding_boxes, transform_cls_to_functional(transforms.ClampBoundingBoxes)]) + def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode, pass_pure_tensor, fn): + # This test checks 2 things: + # - That passing clamping_mode=None to the clamp_bounding_boxes + # functional (or to the class) relies on the box's `.clamping_mode` + # attribute + # - That clamping happens when it should, and only when it should, i.e. + # when the clamping mode is not "none". It doesn't validate the + # nunmerical results, only that clamping happened. For that, we create + # a large 100x100 box inside of a small 10x10 image. + + if pass_pure_tensor and fn is not F.clamp_bounding_boxes: + # Only the functional supports pure tensors, not the class + return + if pass_pure_tensor and clamping_mode is None: + # cannot leave clamping_mode=None when passing pure tensor + return + + if rotated: + boxes = tv_tensors.BoundingBoxes([0, 0, 100, 100, 0], format="XYWHR", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode) + expected_clamped_output = torch.tensor([[0, 0, 10, 10, 0]]) + else: + boxes = tv_tensors.BoundingBoxes([0, 100, 0, 100], format="XYXY", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode) + expected_clamped_output = torch.tensor([[0, 10, 0, 10]]) + + if pass_pure_tensor: + out = fn(boxes.as_subclass(torch.Tensor), format=boxes.format, canvas_size=boxes.canvas_size, clamping_mode=clamping_mode) + else: + out = fn(boxes, clamping_mode=clamping_mode) + + clamping_mode_prevailing = constructor_clamping_mode if clamping_mode is None else clamping_mode + if clamping_mode_prevailing == "none": + assert_equal(boxes, out) # should be a pass-through + else: + assert_equal(out, expected_clamped_output) + class TestClampKeyPoints: diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py index 1e3d9be2f28..7a950492ced 100644 --- a/torchvision/transforms/v2/_meta.py +++ b/torchvision/transforms/v2/_meta.py @@ -2,6 +2,7 @@ from torchvision import tv_tensors from torchvision.transforms.v2 import functional as F, Transform +from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE class ConvertBoundingBoxFormat(Transform): @@ -28,12 +29,18 @@ class ClampBoundingBoxes(Transform): The clamping is done according to the bounding boxes' ``canvas_size`` meta-data. + Args: + clamping_mode: TODOBB more docs. Default is None which relies on the input box' .clamping_mode attribute. + """ + def __init__(self, clamping_mode: CLAMPING_MODE_TYPE = None) -> None: + super().__init__() + self.clamping_mode = clamping_mode _transformed_types = (tv_tensors.BoundingBoxes,) def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes: - return F.clamp_bounding_boxes(inpt) # type: ignore[return-value] + return F.clamp_bounding_boxes(inpt, clamping_mode=self.clamping_mode) # type: ignore[return-value] class ClampKeyPoints(Transform): diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py index 1729aa4bbaf..1de78cd9a46 100644 --- a/torchvision/transforms/v2/functional/_meta.py +++ b/torchvision/transforms/v2/functional/_meta.py @@ -5,6 +5,7 @@ from torchvision import tv_tensors from torchvision.transforms import _functional_pil as _FP from torchvision.tv_tensors import BoundingBoxFormat +from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE from torchvision.utils import _log_api_usage_once @@ -370,8 +371,11 @@ def convert_bounding_box_format( def _clamp_bounding_boxes( - bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int] + bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int], + clamping_mode: Optional[CLAMPING_MODE_TYPE], # TODOBB shouldn't be Optional ) -> torch.Tensor: + if clamping_mode is not None and clamping_mode == "none": + return bounding_boxes.clone() # TODO: Investigate if it makes sense from a performance perspective to have an implementation for every # BoundingBoxFormat instead of converting back and forth in_dtype = bounding_boxes.dtype @@ -477,7 +481,8 @@ def _clamp_along_y_axis( def _clamp_rotated_bounding_boxes( - bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int] + bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int], + clamping_mode: Optional[CLAMPING_MODE_TYPE], # TODOBB shouldn't be Optional ) -> torch.Tensor: """ Clamp rotated bounding boxes to ensure they stay within the canvas boundaries. @@ -499,6 +504,8 @@ def _clamp_rotated_bounding_boxes( Returns: torch.Tensor: Clamped bounding boxes in the original format and shape """ + if clamping_mode is not None and clamping_mode == "none": + return bounding_boxes.clone() original_shape = bounding_boxes.shape dtype = bounding_boxes.dtype acceptable_dtypes = [torch.float64] # Ensure consistency between CPU and GPU. @@ -536,6 +543,7 @@ def clamp_bounding_boxes( inpt: torch.Tensor, format: Optional[BoundingBoxFormat] = None, canvas_size: Optional[tuple[int, int]] = None, + clamping_mode: Optional[CLAMPING_MODE_TYPE] = None, ) -> torch.Tensor: """See :func:`~torchvision.transforms.v2.ClampBoundingBoxes` for details.""" if not torch.jit.is_scripting(): @@ -543,22 +551,25 @@ def clamp_bounding_boxes( if torch.jit.is_scripting() or is_pure_tensor(inpt): - if format is None or canvas_size is None: - raise ValueError("For pure tensor inputs, `format` and `canvas_size` have to be passed.") + # TODOBB + if format is None or canvas_size is None or clamping_mode is None: + raise ValueError("For pure tensor inputs, `format`, `canvas_size` and `clamping_mode` have to be passed.") if tv_tensors.is_rotated_bounding_format(format): - return _clamp_rotated_bounding_boxes(inpt, format=format, canvas_size=canvas_size) + return _clamp_rotated_bounding_boxes(inpt, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode) else: - return _clamp_bounding_boxes(inpt, format=format, canvas_size=canvas_size) + return _clamp_bounding_boxes(inpt, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode) elif isinstance(inpt, tv_tensors.BoundingBoxes): if format is not None or canvas_size is not None: raise ValueError("For bounding box tv_tensor inputs, `format` and `canvas_size` must not be passed.") + if clamping_mode is None: + clamping_mode = inpt.clamping_mode if tv_tensors.is_rotated_bounding_format(inpt.format): output = _clamp_rotated_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size + inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, clamping_mode=clamping_mode ) else: output = _clamp_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size + inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, clamping_mode=clamping_mode ) return tv_tensors.wrap(output, like=inpt) else: diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index 15fa37252c8..7c4d28c7c41 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -3,7 +3,7 @@ from collections.abc import Mapping, Sequence from enum import Enum -from typing import Any, Literal +from typing import Any import torch from torch.utils._pytree import tree_flatten @@ -46,7 +46,12 @@ def is_rotated_bounding_format(format: BoundingBoxFormat) -> bool: ) -CLAMPING_MODE_TYPE = Literal["hard", "soft", "none"] +# TODOBB consider making this a Literal instead. Tried briefly and got +# torchscript errors, leaving to str for now. +# CLAMPING_MODE_TYPE = Literal["hard", "soft", "none"] +CLAMPING_MODE_TYPE = str + +# TODOBB All docs. Add any new API to rst files, add tutorial[s]. class BoundingBoxes(TVTensor): @@ -65,6 +70,7 @@ class BoundingBoxes(TVTensor): data: Any data that can be turned into a tensor with :func:`torch.as_tensor`. format (BoundingBoxFormat, str): Format of the bounding box. canvas_size (two-tuple of ints): Height and width of the corresponding image or video. + clamping_mode: TODOBB dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from ``data``. device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a @@ -89,6 +95,7 @@ def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat | str, canvas_ bounding_boxes = tensor.as_subclass(cls) bounding_boxes.format = format bounding_boxes.canvas_size = canvas_size + # TODOBB validate values bounding_boxes.clamping_mode = clamping_mode return bounding_boxes @@ -98,13 +105,13 @@ def __new__( *, format: BoundingBoxFormat | str, canvas_size: tuple[int, int], - clamping_mode: CLAMPING_MODE_TYPE = "soft", + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB change default to soft! dtype: torch.dtype | None = None, device: torch.device | str | int | None = None, requires_grad: bool | None = None, ) -> BoundingBoxes: tensor = cls._to_tensor(data, dtype=dtype, device=device, requires_grad=requires_grad) - return cls._wrap(tensor, format=format, canvas_size=canvas_size) + return cls._wrap(tensor, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode) @classmethod def _wrap_output( From 335f84043c559d00f5c88c1012824bd2ed23ce80 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 27 Jun 2025 13:33:07 +0100 Subject: [PATCH 3/7] Fix a test --- test/test_transforms_v2.py | 5 +++-- torchvision/transforms/v2/functional/_meta.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 75782cf2a7d..483e4e48c0a 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -5529,9 +5529,10 @@ def test_errors(self): input_pure_tensor = input_tv_tensor.as_subclass(torch.Tensor) format, canvas_size = input_tv_tensor.format, input_tv_tensor.canvas_size - for format_, canvas_size_ in [(None, None), (format, None), (None, canvas_size)]: + for format_, canvas_size_, clamping_mode_ in itertools.product( + (format, None), (canvas_size, None), (input_tv_tensor.clamping_mode, None)): with pytest.raises( - ValueError, match="For pure tensor inputs, `format` and `canvas_size` have to be passed." + ValueError, match="For pure tensor inputs, `format`, `canvas_size` and `clamping_mode` have to be passed." ): F.clamp_bounding_boxes(input_pure_tensor, format=format_, canvas_size=canvas_size_) diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py index 1de78cd9a46..656ce839250 100644 --- a/torchvision/transforms/v2/functional/_meta.py +++ b/torchvision/transforms/v2/functional/_meta.py @@ -551,7 +551,6 @@ def clamp_bounding_boxes( if torch.jit.is_scripting() or is_pure_tensor(inpt): - # TODOBB if format is None or canvas_size is None or clamping_mode is None: raise ValueError("For pure tensor inputs, `format`, `canvas_size` and `clamping_mode` have to be passed.") if tv_tensors.is_rotated_bounding_format(format): From 51ca83e00c7480771454e74e676fa7a30485be93 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 27 Jun 2025 14:03:27 +0100 Subject: [PATCH 4/7] Fix some tests --- test/test_transforms_v2.py | 8 +++-- .../transforms/v2/functional/_geometry.py | 29 ++++++++++++++----- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 483e4e48c0a..13f223d2b54 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -492,6 +492,7 @@ def adapt_fill(value, *, dtype): def reference_affine_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True): format = bounding_boxes.format canvas_size = new_canvas_size or bounding_boxes.canvas_size + clamping_mode = bounding_boxes.clamping_mode def affine_bounding_boxes(bounding_boxes): dtype = bounding_boxes.dtype @@ -535,6 +536,7 @@ def affine_bounding_boxes(bounding_boxes): output, format=format, canvas_size=canvas_size, + clamping_mode=clamping_mode, ) else: # We leave the bounding box as float64 so the caller gets the full precision to perform any additional @@ -557,6 +559,7 @@ def reference_affine_rotated_bounding_boxes_helper( ): format = bounding_boxes.format canvas_size = new_canvas_size or bounding_boxes.canvas_size + clamping_mode = bounding_boxes.clamping_mode def affine_rotated_bounding_boxes(bounding_boxes): dtype = bounding_boxes.dtype @@ -618,6 +621,7 @@ def affine_rotated_bounding_boxes(bounding_boxes): output.to(dtype=dtype, device=device), format=format, canvas_size=canvas_size, + clamping_mode=clamping_mode, ) if clamp else output.to(dtype=output.dtype, device=device) @@ -831,7 +835,6 @@ def test_functional(self, size, make_input): (F.resize_image, torch.Tensor), (F._geometry._resize_image_pil, PIL.Image.Image), (F.resize_image, tv_tensors.Image), - (F.resize_bounding_boxes, tv_tensors.BoundingBoxes), (F.resize_mask, tv_tensors.Mask), (F.resize_video, tv_tensors.Video), (F.resize_keypoints, tv_tensors.KeyPoints), @@ -3289,7 +3292,6 @@ def test_functional(self, make_input): (F.elastic_image, torch.Tensor), (F._geometry._elastic_image_pil, PIL.Image.Image), (F.elastic_image, tv_tensors.Image), - (F.elastic_bounding_boxes, tv_tensors.BoundingBoxes), (F.elastic_mask, tv_tensors.Mask), (F.elastic_video, tv_tensors.Video), (F.elastic_keypoints, tv_tensors.KeyPoints), @@ -5126,6 +5128,7 @@ def test_image_functional_correctness(self, coefficients, interpolation, fill): def _reference_perspective_bounding_boxes(self, bounding_boxes, *, startpoints, endpoints): format = bounding_boxes.format canvas_size = bounding_boxes.canvas_size + clamping_mode = bounding_boxes.clamping_mode dtype = bounding_boxes.dtype device = bounding_boxes.device is_rotated = tv_tensors.is_rotated_bounding_format(format) @@ -5226,6 +5229,7 @@ def perspective_bounding_boxes(bounding_boxes): output, format=format, canvas_size=canvas_size, + clamping_mode=clamping_mode, ).to(dtype=dtype, device=device) return tv_tensors.BoundingBoxes( diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 7e9766bdaf5..09b0d9b98e6 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -9,6 +9,7 @@ from torch.nn.functional import grid_sample, interpolate, pad as torch_pad from torchvision import tv_tensors +from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE from torchvision.transforms import _functional_pil as _FP from torchvision.transforms._functional_tensor import _pad_symmetric from torchvision.transforms.functional import ( @@ -521,6 +522,7 @@ def resize_bounding_boxes( size: Optional[list[int]], max_size: Optional[int] = None, format: tv_tensors.BoundingBoxFormat = tv_tensors.BoundingBoxFormat.XYXY, + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB soft ) -> tuple[torch.Tensor, tuple[int, int]]: # We set the default format as `tv_tensors.BoundingBoxFormat.XYXY` # to ensure backward compatibility. @@ -546,7 +548,7 @@ def resize_bounding_boxes( transformed_points = xyxyxyxy_boxes.mul(ratios) out_bboxes = _parallelogram_to_bounding_boxes(transformed_points) out_bboxes = clamp_bounding_boxes( - out_bboxes, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, canvas_size=(new_height, new_width) + out_bboxes, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, canvas_size=(new_height, new_width), clamping_mode=clamping_mode, ) return ( convert_bounding_box_format( @@ -572,7 +574,7 @@ def _resize_bounding_boxes_dispatch( inpt: tv_tensors.BoundingBoxes, size: Optional[list[int]], max_size: Optional[int] = None, **kwargs: Any ) -> tv_tensors.BoundingBoxes: output, canvas_size = resize_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, size=size, max_size=max_size + inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, size=size, max_size=max_size, clamping_mode=inpt.clamping_mode ) return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size) @@ -1098,6 +1100,7 @@ def _affine_bounding_boxes_with_expand( shear: list[float], center: Optional[list[float]] = None, expand: bool = False, + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB soft ) -> tuple[torch.Tensor, tuple[int, int]]: if bounding_boxes.numel() == 0: return bounding_boxes, canvas_size @@ -1176,7 +1179,7 @@ def _affine_bounding_boxes_with_expand( new_width, new_height = _compute_affine_output_size(affine_vector, width, height) canvas_size = (new_height, new_width) - out_bboxes = clamp_bounding_boxes(out_bboxes, format=intermediate_format, canvas_size=canvas_size) + out_bboxes = clamp_bounding_boxes(out_bboxes, format=intermediate_format, canvas_size=canvas_size, clamping_mode=clamping_mode) out_bboxes = convert_bounding_box_format( out_bboxes, old_format=intermediate_format, new_format=format, inplace=True ).reshape(original_shape) @@ -1197,6 +1200,7 @@ def affine_bounding_boxes( scale: float, shear: list[float], center: Optional[list[float]] = None, + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB soft ) -> torch.Tensor: out_box, _ = _affine_bounding_boxes_with_expand( bounding_boxes, @@ -1208,6 +1212,7 @@ def affine_bounding_boxes( shear=shear, center=center, expand=False, + clamping_mode=clamping_mode, ) return out_box @@ -1231,6 +1236,7 @@ def _affine_bounding_boxes_dispatch( scale=scale, shear=shear, center=center, + clamping_mode=inpt.clamping_mode, ) return tv_tensors.wrap(output, like=inpt) @@ -1723,6 +1729,7 @@ def pad_bounding_boxes( canvas_size: tuple[int, int], padding: list[int], padding_mode: str = "constant", + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB soft ) -> tuple[torch.Tensor, tuple[int, int]]: if padding_mode not in ["constant"]: # TODO: add support of other padding modes @@ -1745,7 +1752,7 @@ def pad_bounding_boxes( width += left + right canvas_size = (height, width) - return clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size), canvas_size + return clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode), canvas_size @_register_kernel_internal(pad, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False) @@ -1758,6 +1765,7 @@ def _pad_bounding_boxes_dispatch( canvas_size=inpt.canvas_size, padding=padding, padding_mode=padding_mode, + clamping_mode=inpt.clamping_mode ) return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size) @@ -1836,6 +1844,7 @@ def crop_bounding_boxes( left: int, height: int, width: int, + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB soft ) -> tuple[torch.Tensor, tuple[int, int]]: # Crop or implicit pad if left and/or top have negative values: @@ -1854,7 +1863,7 @@ def crop_bounding_boxes( if format == tv_tensors.BoundingBoxFormat.XYXYXYXY: bounding_boxes = _parallelogram_to_bounding_boxes(bounding_boxes) - return clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size), canvas_size + return clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode), canvas_size @_register_kernel_internal(crop, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False) @@ -1862,7 +1871,7 @@ def _crop_bounding_boxes_dispatch( inpt: tv_tensors.BoundingBoxes, top: int, left: int, height: int, width: int ) -> tv_tensors.BoundingBoxes: output, canvas_size = crop_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, top=top, left=left, height=height, width=width + inpt.as_subclass(torch.Tensor), format=inpt.format, top=top, left=left, height=height, width=width, clamping_mode=inpt.clamping_mode ) return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size) @@ -2066,6 +2075,7 @@ def perspective_bounding_boxes( startpoints: Optional[list[list[int]]], endpoints: Optional[list[list[int]]], coefficients: Optional[list[float]] = None, + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB soft ) -> torch.Tensor: if bounding_boxes.numel() == 0: return bounding_boxes @@ -2130,7 +2140,7 @@ def perspective_bounding_boxes( out_bbox_mins, out_bbox_maxs = torch.aminmax(transformed_points, dim=1) out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1) - out_bboxes = clamp_bounding_boxes(out_bboxes, format=intermediate_format, canvas_size=canvas_size) + out_bboxes = clamp_bounding_boxes(out_bboxes, format=intermediate_format, canvas_size=canvas_size, clamping_mode=clamping_mode) out_bboxes = convert_bounding_box_format( out_bboxes, old_format=intermediate_format, new_format=format, inplace=True @@ -2185,6 +2195,7 @@ def _perspective_bounding_boxes_dispatch( startpoints=startpoints, endpoints=endpoints, coefficients=coefficients, + clamping_mode=inpt.clamping_mode, ) return tv_tensors.wrap(output, like=inpt) @@ -2377,6 +2388,7 @@ def elastic_bounding_boxes( format: tv_tensors.BoundingBoxFormat, canvas_size: tuple[int, int], displacement: torch.Tensor, + clamping_mode: CLAMPING_MODE_TYPE = "hard", # TODOBB soft ) -> torch.Tensor: expected_shape = (1, canvas_size[0], canvas_size[1], 2) if not isinstance(displacement, torch.Tensor): @@ -2432,6 +2444,7 @@ def elastic_bounding_boxes( out_bboxes, format=intermediate_format, canvas_size=canvas_size, + clamping_mode=clamping_mode ) return convert_bounding_box_format( @@ -2444,7 +2457,7 @@ def _elastic_bounding_boxes_dispatch( inpt: tv_tensors.BoundingBoxes, displacement: torch.Tensor, **kwargs ) -> tv_tensors.BoundingBoxes: output = elastic_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, displacement=displacement + inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, displacement=displacement, clamping_mode=inpt.clamping_mode ) return tv_tensors.wrap(output, like=inpt) From 01d34521353e966db2eb75ac7074e695fb69c6b3 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 27 Jun 2025 14:45:18 +0100 Subject: [PATCH 5/7] Add SetClampingMode transform --- test/common_utils.py | 4 +-- test/test_transforms_v2.py | 49 +++++++++++++++++++++++++++ torchvision/transforms/v2/__init__.py | 2 +- torchvision/transforms/v2/_meta.py | 21 ++++++++++-- torchvision/tv_tensors/__init__.py | 3 +- 5 files changed, 72 insertions(+), 7 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 1461d1adff2..dbc567353c2 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -410,7 +410,7 @@ def make_bounding_boxes( canvas_size=DEFAULT_SIZE, *, format=tv_tensors.BoundingBoxFormat.XYXY, - clamping_mode="soft", + clamping_mode="hard", # TODOBB num_boxes=1, dtype=None, device="cpu", @@ -481,7 +481,7 @@ def sample_position(values, max_value): out_boxes[:, :2] += buffer // 2 elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY: out_boxes[:, :] += buffer // 2 - return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size) + return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode) def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"): diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 13f223d2b54..d64858387a1 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -5589,6 +5589,54 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode, else: assert_equal(out, expected_clamped_output) +class TestSetClampingMode: + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none")) # TODOBB add soft + @pytest.mark.parametrize("desired_clamping_mode", ("hard", "none")) # TODOBB add soft + def test_setter(self, format, constructor_clamping_mode, desired_clamping_mode): + + in_boxes = make_bounding_boxes(format=format, clamping_mode=constructor_clamping_mode) + out_boxes = transforms.SetClampingMode(clamping_mode=desired_clamping_mode)(in_boxes) + + assert in_boxes.clamping_mode == constructor_clamping_mode # input is unchanged: no leak + assert out_boxes.clamping_mode == desired_clamping_mode + + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) + @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none")) # TODOBB add soft + def test_pipeline_no_leak(self, format, constructor_clamping_mode): + + class AssertClampingMode(transforms.Transform): + def __init__(self, expected_clamping_mode): + super().__init__() + self.expected_clamping_mode = expected_clamping_mode + + _transformed_types = (tv_tensors.BoundingBoxes,) + + def transform(self, inpt, _): + assert inpt.clamping_mode == self.expected_clamping_mode + return inpt + + t = transforms.Compose( + [ + transforms.SetClampingMode("none"), + AssertClampingMode("none"), + transforms.SetClampingMode("hard"), + AssertClampingMode("hard"), + transforms.SetClampingMode("none"), + AssertClampingMode("none"), + transforms.ClampBoundingBoxes("hard") + ] + ) + + in_boxes = make_bounding_boxes(format=format, clamping_mode=constructor_clamping_mode) + out_boxes = t(in_boxes) + + assert in_boxes.clamping_mode == constructor_clamping_mode # input is unchanged: no leak + + # assert that the output boxes clamping_mode is the one set by the last SetClampingMode. + # ClampBoundingBoxes doesn't set clamping_mode. + assert out_boxes.clamping_mode == "none" class TestClampKeyPoints: @@ -7376,3 +7424,4 @@ def test_different_sizes(self, make_input1, make_input2, query): def test_no_valid_input(self, query): with pytest.raises(TypeError, match="No image"): query(["blah"]) + diff --git a/torchvision/transforms/v2/__init__.py b/torchvision/transforms/v2/__init__.py index 82a131d6fbc..408065dab94 100644 --- a/torchvision/transforms/v2/__init__.py +++ b/torchvision/transforms/v2/__init__.py @@ -41,7 +41,7 @@ ScaleJitter, TenCrop, ) -from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat +from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat, SetClampingMode from ._misc import ( ConvertImageDtype, GaussianBlur, diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py index 7a950492ced..d9b5331b5af 100644 --- a/torchvision/transforms/v2/_meta.py +++ b/torchvision/transforms/v2/_meta.py @@ -1,4 +1,4 @@ -from typing import Any, Union +from typing import Any, Union, Optional from torchvision import tv_tensors from torchvision.transforms.v2 import functional as F, Transform @@ -30,10 +30,10 @@ class ClampBoundingBoxes(Transform): The clamping is done according to the bounding boxes' ``canvas_size`` meta-data. Args: - clamping_mode: TODOBB more docs. Default is None which relies on the input box' .clamping_mode attribute. + clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute. """ - def __init__(self, clamping_mode: CLAMPING_MODE_TYPE = None) -> None: + def __init__(self, clamping_mode: Optional[CLAMPING_MODE_TYPE] = None) -> None: super().__init__() self.clamping_mode = clamping_mode @@ -53,3 +53,18 @@ class ClampKeyPoints(Transform): def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_tensors.KeyPoints: return F.clamp_keypoints(inpt) # type: ignore[return-value] + + +class SetClampingMode(Transform): + """TODOBB""" + def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None: + super().__init__() + # TODOBB validate mode + self.clamping_mode = clamping_mode + + _transformed_types = (tv_tensors.BoundingBoxes,) + + def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes: + out = inpt.clone() + out.clamping_mode = self.clamping_mode + return out \ No newline at end of file diff --git a/torchvision/tv_tensors/__init__.py b/torchvision/tv_tensors/__init__.py index 40cd70d16cb..744e5241135 100644 --- a/torchvision/tv_tensors/__init__.py +++ b/torchvision/tv_tensors/__init__.py @@ -23,7 +23,7 @@ def wrap(wrappee, *, like, **kwargs): wrappee (Tensor): The tensor to convert. like (:class:`~torchvision.tv_tensors.TVTensor`): The reference. ``wrappee`` will be converted into the same subclass as ``like``. - kwargs: Can contain "format" and "canvas_size" if ``like`` is a :class:`~torchvision.tv_tensor.BoundingBoxes`. + kwargs: Can contain "format", "canvas_size" and "clamping_mode" if ``like`` is a :class:`~torchvision.tv_tensor.BoundingBoxes`. Ignored otherwise. """ if isinstance(like, BoundingBoxes): @@ -31,6 +31,7 @@ def wrap(wrappee, *, like, **kwargs): wrappee, format=kwargs.get("format", like.format), canvas_size=kwargs.get("canvas_size", like.canvas_size), + clamping_mode=kwargs.get("clamping_mode", like.clamping_mode), ) elif isinstance(like, KeyPoints): return KeyPoints._wrap(wrappee, canvas_size=kwargs.get("canvas_size", like.canvas_size)) From faffedf7e6413247a97d1c14d68802d441de2b42 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 27 Jun 2025 14:46:57 +0100 Subject: [PATCH 6/7] Lint --- test/common_utils.py | 5 +- test/test_transforms_v2.py | 35 +++++++----- torchvision/transforms/v2/_meta.py | 8 +-- .../transforms/v2/functional/_geometry.py | 53 ++++++++++++++----- torchvision/transforms/v2/functional/_meta.py | 24 ++++++--- torchvision/tv_tensors/_bounding_boxes.py | 2 +- 6 files changed, 89 insertions(+), 38 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index dbc567353c2..9af40cec878 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -475,7 +475,10 @@ def sample_position(values, max_value): # numerical issues during the testing buffer = 4 out_boxes = clamp_bounding_boxes( - out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer), clamping_mode=clamping_mode + out_boxes, + format=format, + canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer), + clamping_mode=clamping_mode, ) if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR: out_boxes[:, :2] += buffer // 2 diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index d64858387a1..dd774672273 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -5524,7 +5524,7 @@ def test_kernel(self, format, clamping_mode, dtype, device): ) @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) - @pytest.mark.parametrize("clamping_mode", ("hard", "none")) # TODOBB add soft + @pytest.mark.parametrize("clamping_mode", ("hard", "none")) # TODOBB add soft def test_functional(self, format, clamping_mode): check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format, clamping_mode=clamping_mode)) @@ -5534,9 +5534,11 @@ def test_errors(self): format, canvas_size = input_tv_tensor.format, input_tv_tensor.canvas_size for format_, canvas_size_, clamping_mode_ in itertools.product( - (format, None), (canvas_size, None), (input_tv_tensor.clamping_mode, None)): + (format, None), (canvas_size, None), (input_tv_tensor.clamping_mode, None) + ): with pytest.raises( - ValueError, match="For pure tensor inputs, `format`, `canvas_size` and `clamping_mode` have to be passed." + ValueError, + match="For pure tensor inputs, `format`, `canvas_size` and `clamping_mode` have to be passed.", ): F.clamp_bounding_boxes(input_pure_tensor, format=format_, canvas_size=canvas_size_) @@ -5548,7 +5550,7 @@ def test_errors(self): def test_transform(self): check_transform(transforms.ClampBoundingBoxes(), make_bounding_boxes()) - + @pytest.mark.parametrize("rotated", (True, False)) @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none")) @pytest.mark.parametrize("clamping_mode", ("hard", "none", None)) # TODOBB add soft here. @@ -5572,14 +5574,23 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode, return if rotated: - boxes = tv_tensors.BoundingBoxes([0, 0, 100, 100, 0], format="XYWHR", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode) + boxes = tv_tensors.BoundingBoxes( + [0, 0, 100, 100, 0], format="XYWHR", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode + ) expected_clamped_output = torch.tensor([[0, 0, 10, 10, 0]]) else: - boxes = tv_tensors.BoundingBoxes([0, 100, 0, 100], format="XYXY", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode) + boxes = tv_tensors.BoundingBoxes( + [0, 100, 0, 100], format="XYXY", canvas_size=(10, 10), clamping_mode=constructor_clamping_mode + ) expected_clamped_output = torch.tensor([[0, 10, 0, 10]]) if pass_pure_tensor: - out = fn(boxes.as_subclass(torch.Tensor), format=boxes.format, canvas_size=boxes.canvas_size, clamping_mode=clamping_mode) + out = fn( + boxes.as_subclass(torch.Tensor), + format=boxes.format, + canvas_size=boxes.canvas_size, + clamping_mode=clamping_mode, + ) else: out = fn(boxes, clamping_mode=clamping_mode) @@ -5589,8 +5600,8 @@ def test_clamping_mode(self, rotated, constructor_clamping_mode, clamping_mode, else: assert_equal(out, expected_clamped_output) -class TestSetClampingMode: +class TestSetClampingMode: @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none")) # TODOBB add soft @pytest.mark.parametrize("desired_clamping_mode", ("hard", "none")) # TODOBB add soft @@ -5601,18 +5612,17 @@ def test_setter(self, format, constructor_clamping_mode, desired_clamping_mode): assert in_boxes.clamping_mode == constructor_clamping_mode # input is unchanged: no leak assert out_boxes.clamping_mode == desired_clamping_mode - + @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) @pytest.mark.parametrize("constructor_clamping_mode", ("hard", "none")) # TODOBB add soft def test_pipeline_no_leak(self, format, constructor_clamping_mode): - class AssertClampingMode(transforms.Transform): def __init__(self, expected_clamping_mode): super().__init__() self.expected_clamping_mode = expected_clamping_mode _transformed_types = (tv_tensors.BoundingBoxes,) - + def transform(self, inpt, _): assert inpt.clamping_mode == self.expected_clamping_mode return inpt @@ -5625,7 +5635,7 @@ def transform(self, inpt, _): AssertClampingMode("hard"), transforms.SetClampingMode("none"), AssertClampingMode("none"), - transforms.ClampBoundingBoxes("hard") + transforms.ClampBoundingBoxes("hard"), ] ) @@ -7424,4 +7434,3 @@ def test_different_sizes(self, make_input1, make_input2, query): def test_no_valid_input(self, query): with pytest.raises(TypeError, match="No image"): query(["blah"]) - diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py index d9b5331b5af..4ed41bd6edc 100644 --- a/torchvision/transforms/v2/_meta.py +++ b/torchvision/transforms/v2/_meta.py @@ -1,8 +1,8 @@ -from typing import Any, Union, Optional +from typing import Any, Optional, Union from torchvision import tv_tensors from torchvision.transforms.v2 import functional as F, Transform -from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE +from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE class ConvertBoundingBoxFormat(Transform): @@ -33,6 +33,7 @@ class ClampBoundingBoxes(Transform): clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute. """ + def __init__(self, clamping_mode: Optional[CLAMPING_MODE_TYPE] = None) -> None: super().__init__() self.clamping_mode = clamping_mode @@ -57,6 +58,7 @@ def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_te class SetClampingMode(Transform): """TODOBB""" + def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None: super().__init__() # TODOBB validate mode @@ -67,4 +69,4 @@ def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None: def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes: out = inpt.clone() out.clamping_mode = self.clamping_mode - return out \ No newline at end of file + return out diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 09b0d9b98e6..aff96d0a7e8 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -9,7 +9,6 @@ from torch.nn.functional import grid_sample, interpolate, pad as torch_pad from torchvision import tv_tensors -from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE from torchvision.transforms import _functional_pil as _FP from torchvision.transforms._functional_tensor import _pad_symmetric from torchvision.transforms.functional import ( @@ -21,6 +20,7 @@ pil_to_tensor, to_pil_image, ) +from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE from torchvision.utils import _log_api_usage_once @@ -548,7 +548,10 @@ def resize_bounding_boxes( transformed_points = xyxyxyxy_boxes.mul(ratios) out_bboxes = _parallelogram_to_bounding_boxes(transformed_points) out_bboxes = clamp_bounding_boxes( - out_bboxes, format=tv_tensors.BoundingBoxFormat.XYXYXYXY, canvas_size=(new_height, new_width), clamping_mode=clamping_mode, + out_bboxes, + format=tv_tensors.BoundingBoxFormat.XYXYXYXY, + canvas_size=(new_height, new_width), + clamping_mode=clamping_mode, ) return ( convert_bounding_box_format( @@ -574,7 +577,12 @@ def _resize_bounding_boxes_dispatch( inpt: tv_tensors.BoundingBoxes, size: Optional[list[int]], max_size: Optional[int] = None, **kwargs: Any ) -> tv_tensors.BoundingBoxes: output, canvas_size = resize_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, size=size, max_size=max_size, clamping_mode=inpt.clamping_mode + inpt.as_subclass(torch.Tensor), + format=inpt.format, + canvas_size=inpt.canvas_size, + size=size, + max_size=max_size, + clamping_mode=inpt.clamping_mode, ) return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size) @@ -1179,7 +1187,9 @@ def _affine_bounding_boxes_with_expand( new_width, new_height = _compute_affine_output_size(affine_vector, width, height) canvas_size = (new_height, new_width) - out_bboxes = clamp_bounding_boxes(out_bboxes, format=intermediate_format, canvas_size=canvas_size, clamping_mode=clamping_mode) + out_bboxes = clamp_bounding_boxes( + out_bboxes, format=intermediate_format, canvas_size=canvas_size, clamping_mode=clamping_mode + ) out_bboxes = convert_bounding_box_format( out_bboxes, old_format=intermediate_format, new_format=format, inplace=True ).reshape(original_shape) @@ -1752,7 +1762,10 @@ def pad_bounding_boxes( width += left + right canvas_size = (height, width) - return clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode), canvas_size + return ( + clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode), + canvas_size, + ) @_register_kernel_internal(pad, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False) @@ -1765,7 +1778,7 @@ def _pad_bounding_boxes_dispatch( canvas_size=inpt.canvas_size, padding=padding, padding_mode=padding_mode, - clamping_mode=inpt.clamping_mode + clamping_mode=inpt.clamping_mode, ) return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size) @@ -1863,7 +1876,10 @@ def crop_bounding_boxes( if format == tv_tensors.BoundingBoxFormat.XYXYXYXY: bounding_boxes = _parallelogram_to_bounding_boxes(bounding_boxes) - return clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode), canvas_size + return ( + clamp_bounding_boxes(bounding_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode), + canvas_size, + ) @_register_kernel_internal(crop, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False) @@ -1871,7 +1887,13 @@ def _crop_bounding_boxes_dispatch( inpt: tv_tensors.BoundingBoxes, top: int, left: int, height: int, width: int ) -> tv_tensors.BoundingBoxes: output, canvas_size = crop_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, top=top, left=left, height=height, width=width, clamping_mode=inpt.clamping_mode + inpt.as_subclass(torch.Tensor), + format=inpt.format, + top=top, + left=left, + height=height, + width=width, + clamping_mode=inpt.clamping_mode, ) return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size) @@ -2140,7 +2162,9 @@ def perspective_bounding_boxes( out_bbox_mins, out_bbox_maxs = torch.aminmax(transformed_points, dim=1) out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1) - out_bboxes = clamp_bounding_boxes(out_bboxes, format=intermediate_format, canvas_size=canvas_size, clamping_mode=clamping_mode) + out_bboxes = clamp_bounding_boxes( + out_bboxes, format=intermediate_format, canvas_size=canvas_size, clamping_mode=clamping_mode + ) out_bboxes = convert_bounding_box_format( out_bboxes, old_format=intermediate_format, new_format=format, inplace=True @@ -2441,10 +2465,7 @@ def elastic_bounding_boxes( out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1).to(bounding_boxes.dtype) out_bboxes = clamp_bounding_boxes( - out_bboxes, - format=intermediate_format, - canvas_size=canvas_size, - clamping_mode=clamping_mode + out_bboxes, format=intermediate_format, canvas_size=canvas_size, clamping_mode=clamping_mode ) return convert_bounding_box_format( @@ -2457,7 +2478,11 @@ def _elastic_bounding_boxes_dispatch( inpt: tv_tensors.BoundingBoxes, displacement: torch.Tensor, **kwargs ) -> tv_tensors.BoundingBoxes: output = elastic_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, displacement=displacement, clamping_mode=inpt.clamping_mode + inpt.as_subclass(torch.Tensor), + format=inpt.format, + canvas_size=inpt.canvas_size, + displacement=displacement, + clamping_mode=inpt.clamping_mode, ) return tv_tensors.wrap(output, like=inpt) diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py index 656ce839250..cf23471f770 100644 --- a/torchvision/transforms/v2/functional/_meta.py +++ b/torchvision/transforms/v2/functional/_meta.py @@ -5,7 +5,7 @@ from torchvision import tv_tensors from torchvision.transforms import _functional_pil as _FP from torchvision.tv_tensors import BoundingBoxFormat -from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE +from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE from torchvision.utils import _log_api_usage_once @@ -371,7 +371,9 @@ def convert_bounding_box_format( def _clamp_bounding_boxes( - bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int], + bounding_boxes: torch.Tensor, + format: BoundingBoxFormat, + canvas_size: tuple[int, int], clamping_mode: Optional[CLAMPING_MODE_TYPE], # TODOBB shouldn't be Optional ) -> torch.Tensor: if clamping_mode is not None and clamping_mode == "none": @@ -481,7 +483,9 @@ def _clamp_along_y_axis( def _clamp_rotated_bounding_boxes( - bounding_boxes: torch.Tensor, format: BoundingBoxFormat, canvas_size: tuple[int, int], + bounding_boxes: torch.Tensor, + format: BoundingBoxFormat, + canvas_size: tuple[int, int], clamping_mode: Optional[CLAMPING_MODE_TYPE], # TODOBB shouldn't be Optional ) -> torch.Tensor: """ @@ -554,7 +558,9 @@ def clamp_bounding_boxes( if format is None or canvas_size is None or clamping_mode is None: raise ValueError("For pure tensor inputs, `format`, `canvas_size` and `clamping_mode` have to be passed.") if tv_tensors.is_rotated_bounding_format(format): - return _clamp_rotated_bounding_boxes(inpt, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode) + return _clamp_rotated_bounding_boxes( + inpt, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode + ) else: return _clamp_bounding_boxes(inpt, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode) elif isinstance(inpt, tv_tensors.BoundingBoxes): @@ -564,11 +570,17 @@ def clamp_bounding_boxes( clamping_mode = inpt.clamping_mode if tv_tensors.is_rotated_bounding_format(inpt.format): output = _clamp_rotated_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, clamping_mode=clamping_mode + inpt.as_subclass(torch.Tensor), + format=inpt.format, + canvas_size=inpt.canvas_size, + clamping_mode=clamping_mode, ) else: output = _clamp_bounding_boxes( - inpt.as_subclass(torch.Tensor), format=inpt.format, canvas_size=inpt.canvas_size, clamping_mode=clamping_mode + inpt.as_subclass(torch.Tensor), + format=inpt.format, + canvas_size=inpt.canvas_size, + clamping_mode=clamping_mode, ) return tv_tensors.wrap(output, like=inpt) else: diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index 7c4d28c7c41..528f81e2fc5 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -49,7 +49,7 @@ def is_rotated_bounding_format(format: BoundingBoxFormat) -> bool: # TODOBB consider making this a Literal instead. Tried briefly and got # torchscript errors, leaving to str for now. # CLAMPING_MODE_TYPE = Literal["hard", "soft", "none"] -CLAMPING_MODE_TYPE = str +CLAMPING_MODE_TYPE = str # TODOBB All docs. Add any new API to rst files, add tutorial[s]. From 0a6ff79f08e6eb6ff16c9321bb5073dafabd6945 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 30 Jun 2025 09:50:34 +0100 Subject: [PATCH 7/7] Fix types --- torchvision/transforms/v2/_meta.py | 2 +- torchvision/tv_tensors/_bounding_boxes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py index 4ed41bd6edc..34e44045cbc 100644 --- a/torchvision/transforms/v2/_meta.py +++ b/torchvision/transforms/v2/_meta.py @@ -67,6 +67,6 @@ def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None: _transformed_types = (tv_tensors.BoundingBoxes,) def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes: - out = inpt.clone() + out: tv_tensors.BoundingBoxes = inpt.clone() # type: ignore[assignment] out.clamping_mode = self.clamping_mode return out diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index 528f81e2fc5..22a32b7dfa5 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -81,7 +81,7 @@ class BoundingBoxes(TVTensor): format: BoundingBoxFormat canvas_size: tuple[int, int] - clamping_mode: CLAMPING_MODE_T + clamping_mode: CLAMPING_MODE_TYPE @classmethod def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat | str, canvas_size: tuple[int, int], clamping_mode: CLAMPING_MODE_TYPE = "soft", check_dims: bool = True) -> BoundingBoxes: # type: ignore[override]