@@ -522,7 +522,7 @@ def resize_bounding_boxes(
522
522
size : Optional [list [int ]],
523
523
max_size : Optional [int ] = None ,
524
524
format : tv_tensors .BoundingBoxFormat = tv_tensors .BoundingBoxFormat .XYXY ,
525
- clamping_mode : CLAMPING_MODE_TYPE = "hard" , # TODOBB soft
525
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
526
526
) -> tuple [torch .Tensor , tuple [int , int ]]:
527
527
# We set the default format as `tv_tensors.BoundingBoxFormat.XYXY`
528
528
# to ensure backward compatibility.
@@ -1108,15 +1108,16 @@ def _affine_bounding_boxes_with_expand(
1108
1108
shear : list [float ],
1109
1109
center : Optional [list [float ]] = None ,
1110
1110
expand : bool = False ,
1111
- clamping_mode : CLAMPING_MODE_TYPE = "hard" , # TODOBB soft
1111
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
1112
1112
) -> tuple [torch .Tensor , tuple [int , int ]]:
1113
1113
if bounding_boxes .numel () == 0 :
1114
1114
return bounding_boxes , canvas_size
1115
1115
1116
1116
original_shape = bounding_boxes .shape
1117
1117
dtype = bounding_boxes .dtype
1118
- need_cast = not bounding_boxes .is_floating_point ()
1119
- bounding_boxes = bounding_boxes .float () if need_cast else bounding_boxes .clone ()
1118
+ acceptable_dtypes = [torch .float64 ] # Ensure consistency between CPU and GPU.
1119
+ need_cast = dtype not in acceptable_dtypes
1120
+ bounding_boxes = bounding_boxes .to (torch .float64 ) if need_cast else bounding_boxes .clone ()
1120
1121
device = bounding_boxes .device
1121
1122
is_rotated = tv_tensors .is_rotated_bounding_format (format )
1122
1123
intermediate_format = tv_tensors .BoundingBoxFormat .XYXYXYXY if is_rotated else tv_tensors .BoundingBoxFormat .XYXY
@@ -1210,7 +1211,7 @@ def affine_bounding_boxes(
1210
1211
scale : float ,
1211
1212
shear : list [float ],
1212
1213
center : Optional [list [float ]] = None ,
1213
- clamping_mode : CLAMPING_MODE_TYPE = "hard" , # TODOBB soft
1214
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
1214
1215
) -> torch .Tensor :
1215
1216
out_box , _ = _affine_bounding_boxes_with_expand (
1216
1217
bounding_boxes ,
@@ -1448,6 +1449,7 @@ def rotate_bounding_boxes(
1448
1449
angle : float ,
1449
1450
expand : bool = False ,
1450
1451
center : Optional [list [float ]] = None ,
1452
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
1451
1453
) -> tuple [torch .Tensor , tuple [int , int ]]:
1452
1454
return _affine_bounding_boxes_with_expand (
1453
1455
bounding_boxes ,
@@ -1459,6 +1461,7 @@ def rotate_bounding_boxes(
1459
1461
shear = [0.0 , 0.0 ],
1460
1462
center = center ,
1461
1463
expand = expand ,
1464
+ clamping_mode = clamping_mode ,
1462
1465
)
1463
1466
1464
1467
@@ -1473,6 +1476,7 @@ def _rotate_bounding_boxes_dispatch(
1473
1476
angle = angle ,
1474
1477
expand = expand ,
1475
1478
center = center ,
1479
+ clamping_mode = inpt .clamping_mode ,
1476
1480
)
1477
1481
return tv_tensors .wrap (output , like = inpt , canvas_size = canvas_size )
1478
1482
@@ -1739,7 +1743,7 @@ def pad_bounding_boxes(
1739
1743
canvas_size : tuple [int , int ],
1740
1744
padding : list [int ],
1741
1745
padding_mode : str = "constant" ,
1742
- clamping_mode : CLAMPING_MODE_TYPE = "hard" , # TODOBB soft
1746
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
1743
1747
) -> tuple [torch .Tensor , tuple [int , int ]]:
1744
1748
if padding_mode not in ["constant" ]:
1745
1749
# TODO: add support of other padding modes
@@ -1857,7 +1861,7 @@ def crop_bounding_boxes(
1857
1861
left : int ,
1858
1862
height : int ,
1859
1863
width : int ,
1860
- clamping_mode : CLAMPING_MODE_TYPE = "hard" , # TODOBB soft
1864
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
1861
1865
) -> tuple [torch .Tensor , tuple [int , int ]]:
1862
1866
1863
1867
# Crop or implicit pad if left and/or top have negative values:
@@ -2097,7 +2101,7 @@ def perspective_bounding_boxes(
2097
2101
startpoints : Optional [list [list [int ]]],
2098
2102
endpoints : Optional [list [list [int ]]],
2099
2103
coefficients : Optional [list [float ]] = None ,
2100
- clamping_mode : CLAMPING_MODE_TYPE = "hard" , # TODOBB soft
2104
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
2101
2105
) -> torch .Tensor :
2102
2106
if bounding_boxes .numel () == 0 :
2103
2107
return bounding_boxes
@@ -2412,7 +2416,7 @@ def elastic_bounding_boxes(
2412
2416
format : tv_tensors .BoundingBoxFormat ,
2413
2417
canvas_size : tuple [int , int ],
2414
2418
displacement : torch .Tensor ,
2415
- clamping_mode : CLAMPING_MODE_TYPE = "hard" , # TODOBB soft
2419
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
2416
2420
) -> torch .Tensor :
2417
2421
expected_shape = (1 , canvas_size [0 ], canvas_size [1 ], 2 )
2418
2422
if not isinstance (displacement , torch .Tensor ):
@@ -2433,19 +2437,19 @@ def elastic_bounding_boxes(
2433
2437
2434
2438
original_shape = bounding_boxes .shape
2435
2439
# TODO: first cast to float if bbox is int64 before convert_bounding_box_format
2436
- intermediate_format = tv_tensors .BoundingBoxFormat .XYXYXYXY if is_rotated else tv_tensors .BoundingBoxFormat .XYXY
2440
+ intermediate_format = tv_tensors .BoundingBoxFormat .CXCYWHR if is_rotated else tv_tensors .BoundingBoxFormat .XYXY
2437
2441
2438
2442
bounding_boxes = (
2439
2443
convert_bounding_box_format (bounding_boxes .clone (), old_format = format , new_format = intermediate_format )
2440
- ).reshape (- 1 , 8 if is_rotated else 4 )
2444
+ ).reshape (- 1 , 5 if is_rotated else 4 )
2441
2445
2442
2446
id_grid = _create_identity_grid (canvas_size , device = device , dtype = dtype )
2443
2447
# We construct an approximation of inverse grid as inv_grid = id_grid - displacement
2444
2448
# This is not an exact inverse of the grid
2445
2449
inv_grid = id_grid .sub_ (displacement )
2446
2450
2447
2451
# Get points from bboxes
2448
- points = bounding_boxes if is_rotated else bounding_boxes [:, [[0 , 1 ], [2 , 1 ], [2 , 3 ], [0 , 3 ]]]
2452
+ points = bounding_boxes [:, : 2 ] if is_rotated else bounding_boxes [:, [[0 , 1 ], [2 , 1 ], [2 , 3 ], [0 , 3 ]]]
2449
2453
points = points .reshape (- 1 , 2 )
2450
2454
if points .is_floating_point ():
2451
2455
points = points .ceil_ ()
@@ -2457,8 +2461,8 @@ def elastic_bounding_boxes(
2457
2461
transformed_points = inv_grid [0 , index_y , index_x , :].add_ (1 ).mul_ (0.5 * t_size ).sub_ (0.5 )
2458
2462
2459
2463
if is_rotated :
2460
- transformed_points = transformed_points .reshape (- 1 , 8 )
2461
- out_bboxes = _parallelogram_to_bounding_boxes ( transformed_points ).to (bounding_boxes .dtype )
2464
+ transformed_points = transformed_points .reshape (- 1 , 2 )
2465
+ out_bboxes = torch . cat ([ transformed_points , bounding_boxes [:, 2 :]], dim = 1 ).to (bounding_boxes .dtype )
2462
2466
else :
2463
2467
transformed_points = transformed_points .reshape (- 1 , 4 , 2 )
2464
2468
out_bbox_mins , out_bbox_maxs = torch .aminmax (transformed_points , dim = 1 )
@@ -2619,11 +2623,18 @@ def center_crop_bounding_boxes(
2619
2623
format : tv_tensors .BoundingBoxFormat ,
2620
2624
canvas_size : tuple [int , int ],
2621
2625
output_size : list [int ],
2626
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
2622
2627
) -> tuple [torch .Tensor , tuple [int , int ]]:
2623
2628
crop_height , crop_width = _center_crop_parse_output_size (output_size )
2624
2629
crop_top , crop_left = _center_crop_compute_crop_anchor (crop_height , crop_width , * canvas_size )
2625
2630
return crop_bounding_boxes (
2626
- bounding_boxes , format , top = crop_top , left = crop_left , height = crop_height , width = crop_width
2631
+ bounding_boxes ,
2632
+ format ,
2633
+ top = crop_top ,
2634
+ left = crop_left ,
2635
+ height = crop_height ,
2636
+ width = crop_width ,
2637
+ clamping_mode = clamping_mode ,
2627
2638
)
2628
2639
2629
2640
@@ -2632,7 +2643,11 @@ def _center_crop_bounding_boxes_dispatch(
2632
2643
inpt : tv_tensors .BoundingBoxes , output_size : list [int ]
2633
2644
) -> tv_tensors .BoundingBoxes :
2634
2645
output , canvas_size = center_crop_bounding_boxes (
2635
- inpt .as_subclass (torch .Tensor ), format = inpt .format , canvas_size = inpt .canvas_size , output_size = output_size
2646
+ inpt .as_subclass (torch .Tensor ),
2647
+ format = inpt .format ,
2648
+ canvas_size = inpt .canvas_size ,
2649
+ output_size = output_size ,
2650
+ clamping_mode = inpt .clamping_mode ,
2636
2651
)
2637
2652
return tv_tensors .wrap (output , like = inpt , canvas_size = canvas_size )
2638
2653
@@ -2779,17 +2794,29 @@ def resized_crop_bounding_boxes(
2779
2794
height : int ,
2780
2795
width : int ,
2781
2796
size : list [int ],
2797
+ clamping_mode : CLAMPING_MODE_TYPE = "soft" ,
2782
2798
) -> tuple [torch .Tensor , tuple [int , int ]]:
2783
- bounding_boxes , canvas_size = crop_bounding_boxes (bounding_boxes , format , top , left , height , width )
2784
- return resize_bounding_boxes (bounding_boxes , format = format , canvas_size = canvas_size , size = size )
2799
+ bounding_boxes , canvas_size = crop_bounding_boxes (
2800
+ bounding_boxes , format , top , left , height , width , clamping_mode = clamping_mode
2801
+ )
2802
+ return resize_bounding_boxes (
2803
+ bounding_boxes , format = format , canvas_size = canvas_size , size = size , clamping_mode = clamping_mode
2804
+ )
2785
2805
2786
2806
2787
2807
@_register_kernel_internal (resized_crop , tv_tensors .BoundingBoxes , tv_tensor_wrapper = False )
2788
2808
def _resized_crop_bounding_boxes_dispatch (
2789
2809
inpt : tv_tensors .BoundingBoxes , top : int , left : int , height : int , width : int , size : list [int ], ** kwargs
2790
2810
) -> tv_tensors .BoundingBoxes :
2791
2811
output , canvas_size = resized_crop_bounding_boxes (
2792
- inpt .as_subclass (torch .Tensor ), format = inpt .format , top = top , left = left , height = height , width = width , size = size
2812
+ inpt .as_subclass (torch .Tensor ),
2813
+ format = inpt .format ,
2814
+ top = top ,
2815
+ left = left ,
2816
+ height = height ,
2817
+ width = width ,
2818
+ size = size ,
2819
+ clamping_mode = inpt .clamping_mode ,
2793
2820
)
2794
2821
return tv_tensors .wrap (output , like = inpt , canvas_size = canvas_size )
2795
2822
0 commit comments