Skip to content

refactor: migrate from setup.py to pyproject.toml and reorganize tools #61

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "semseg"
version = "0.5.0"
description = "SOTA Semantic Segmentation Models"
readme = "README.md"
license = {text = "MIT License"}
requires-python = ">=3.10.12"
dependencies = [
"coremltools>=8.1",
"einops~=0.8.0",
"fvcore~=0.1.5.post20221221",
"matplotlib~=3.9.2",
"numpy~=1.26.4",
"onnx>=1.17.0",
"rich~=13.9.4",
"scipy~=1.14.1",
"tabulate~=0.9.0",
"tensorboard~=2.18.0",
"torch>=2.5.1",
"torchvision>=0.20.1",
"tqdm~=4.67.0",
]
scripts = { semseg-infer = "semseg.tools.infer:main" }

[project.urls]
Repository = "https://github.com/sithu31296/semantic-segmentation"

[tool.hatch.build.targets.wheel]
packages = ["semseg"]
22 changes: 13 additions & 9 deletions semseg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

def show_models():
model_names = models.__all__
numbers = list(range(1, len(model_names)+1))
print(tabulate({'No.': numbers, 'Model Names': model_names}, headers='keys'))
numbers = list(range(1, len(model_names) + 1))
print(tabulate({"No.": numbers, "Model Names": model_names}, headers="keys"))


def show_backbones():
Expand All @@ -16,18 +16,22 @@ def show_backbones():
for name in backbone_names:
try:
variants.append(list(eval(f"backbones.{name.lower()}_settings").keys()))
except:
variants.append('-')
print(tabulate({'Backbone Names': backbone_names, 'Variants': variants}, headers='keys'))
except Exception:
variants.append("-")
print(
tabulate(
{"Backbone Names": backbone_names, "Variants": variants}, headers="keys"
)
)


def show_heads():
head_names = heads.__all__
numbers = list(range(1, len(head_names)+1))
print(tabulate({'No.': numbers, 'Heads': head_names}, headers='keys'))
numbers = list(range(1, len(head_names) + 1))
print(tabulate({"No.": numbers, "Heads": head_names}, headers="keys"))


def show_datasets():
dataset_names = datasets.__all__
numbers = list(range(1, len(dataset_names)+1))
print(tabulate({'No.': numbers, 'Datasets': dataset_names}, headers='keys'))
numbers = list(range(1, len(dataset_names) + 1))
print(tabulate({"No.": numbers, "Datasets": dataset_names}, headers="keys"))
167 changes: 105 additions & 62 deletions semseg/augmentations.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import torchvision.transforms.functional as TF
import torchvision.transforms.functional as TF
import random
import math
import torch
from torch import Tensor
from typing import Tuple, List, Union, Tuple, Optional
from typing import Tuple, List, Union


class Compose:
Expand All @@ -23,7 +23,9 @@ def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:


class Normalize:
def __init__(self, mean: list = (0.485, 0.456, 0.406), std: list = (0.229, 0.224, 0.225)):
def __init__(
self, mean: list = (0.485, 0.456, 0.406), std: list = (0.229, 0.224, 0.225)
):
self.mean = mean
self.std = std

Expand Down Expand Up @@ -136,8 +138,8 @@ def __call__(self, image, label):

class Posterize:
def __init__(self, bits=2):
self.bits = bits # 0-8
self.bits = bits # 0-8

def __call__(self, image, label):
return TF.posterize(image, self.bits), label

Expand All @@ -149,21 +151,43 @@ def __init__(self, angle=0, translate=[0, 0], scale=1.0, shear=[0, 0], seg_fill=
self.scale = scale
self.shear = shear
self.seg_fill = seg_fill

def __call__(self, img, label):
return TF.affine(img, self.angle, self.translate, self.scale, self.shear, TF.InterpolationMode.BILINEAR, 0), TF.affine(label, self.angle, self.translate, self.scale, self.shear, TF.InterpolationMode.NEAREST, self.seg_fill)
return TF.affine(
img,
self.angle,
self.translate,
self.scale,
self.shear,
TF.InterpolationMode.BILINEAR,
0,
), TF.affine(
label,
self.angle,
self.translate,
self.scale,
self.shear,
TF.InterpolationMode.NEAREST,
self.seg_fill,
)


class RandomRotation:
def __init__(self, degrees: float = 10.0, p: float = 0.2, seg_fill: int = 0, expand: bool = False) -> None:
def __init__(
self,
degrees: float = 10.0,
p: float = 0.2,
seg_fill: int = 0,
expand: bool = False,
) -> None:
"""Rotate the image by a random angle between -angle and angle with probability p

Args:
p: probability
angle: rotation angle value in degrees, counter-clockwise.
expand: Optional expansion flag.
expand: Optional expansion flag.
If true, expands the output image to make it large enough to hold the entire rotated image.
If false or omitted, make the output image the same size as the input image.
If false or omitted, make the output image the same size as the input image.
Note that the expand flag assumes rotation around the center and no translation.
"""
self.p = p
Expand All @@ -174,10 +198,18 @@ def __init__(self, degrees: float = 10.0, p: float = 0.2, seg_fill: int = 0, exp
def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
random_angle = random.random() * 2 * self.angle - self.angle
if random.random() < self.p:
img = TF.rotate(img, random_angle, TF.InterpolationMode.BILINEAR, self.expand, fill=0)
mask = TF.rotate(mask, random_angle, TF.InterpolationMode.NEAREST, self.expand, fill=self.seg_fill)
img = TF.rotate(
img, random_angle, TF.InterpolationMode.BILINEAR, self.expand, fill=0
)
mask = TF.rotate(
mask,
random_angle,
TF.InterpolationMode.NEAREST,
self.expand,
fill=self.seg_fill,
)
return img, mask


class CenterCrop:
def __init__(self, size: Union[int, List[int], Tuple[int]]) -> None:
Expand Down Expand Up @@ -209,8 +241,8 @@ def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
if random.random() < self.p:
margin_h = max(H - tH, 0)
margin_w = max(W - tW, 0)
y1 = random.randint(0, margin_h+1)
x1 = random.randint(0, margin_w+1)
y1 = random.randint(0, margin_h + 1)
x1 = random.randint(0, margin_w + 1)
y2 = y1 + tH
x2 = x1 + tW
img = img[:, y1:y2, x1:x2]
Expand All @@ -219,7 +251,9 @@ def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:


class Pad:
def __init__(self, size: Union[List[int], Tuple[int], int], seg_fill: int = 0) -> None:
def __init__(
self, size: Union[List[int], Tuple[int], int], seg_fill: int = 0
) -> None:
"""Pad the given image on all sides with the given "pad" value.
Args:
size: expected output image size (h, w)
Expand All @@ -229,16 +263,18 @@ def __init__(self, size: Union[List[int], Tuple[int], int], seg_fill: int = 0) -
self.seg_fill = seg_fill

def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
padding = (0, 0, self.size[1]-img.shape[2], self.size[0]-img.shape[1])
padding = (0, 0, self.size[1] - img.shape[2], self.size[0] - img.shape[1])
return TF.pad(img, padding), TF.pad(mask, padding, self.seg_fill)


class ResizePad:
def __init__(self, size: Union[int, Tuple[int], List[int]], seg_fill: int = 0) -> None:
def __init__(
self, size: Union[int, Tuple[int], List[int]], seg_fill: int = 0
) -> None:
"""Resize the input image to the given size.
Args:
size: Desired output size.
If size is a sequence, the output size will be matched to this.
size: Desired output size.
If size is a sequence, the output size will be matched to this.
If size is an int, the smaller edge of the image will be matched to this number maintaining the aspect ratio.
"""
self.size = size
Expand All @@ -248,50 +284,54 @@ def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
H, W = img.shape[1:]
tH, tW = self.size

# scale the image
scale_factor = min(tH/H, tW/W) if W > H else max(tH/H, tW/W)
# scale the image
scale_factor = min(tH / H, tW / W) if W > H else max(tH / H, tW / W)
# nH, nW = int(H * scale_factor + 0.5), int(W * scale_factor + 0.5)
nH, nW = round(H*scale_factor), round(W*scale_factor)
nH, nW = round(H * scale_factor), round(W * scale_factor)
img = TF.resize(img, (nH, nW), TF.InterpolationMode.BILINEAR)
mask = TF.resize(mask, (nH, nW), TF.InterpolationMode.NEAREST)

# pad the image
padding = [0, 0, tW - nW, tH - nH]
img = TF.pad(img, padding, fill=0)
mask = TF.pad(mask, padding, fill=self.seg_fill)
return img, mask
return img, mask


class Resize:
def __init__(self, size: Union[int, Tuple[int], List[int]]) -> None:
"""Resize the input image to the given size.
Args:
size: Desired output size.
If size is a sequence, the output size will be matched to this.
size: Desired output size.
If size is a sequence, the output size will be matched to this.
If size is an int, the smaller edge of the image will be matched to this number maintaining the aspect ratio.
"""
self.size = size

def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
H, W = img.shape[1:]

# scale the image
# scale the image
scale_factor = self.size[0] / min(H, W)
nH, nW = round(H*scale_factor), round(W*scale_factor)
nH, nW = round(H * scale_factor), round(W * scale_factor)
img = TF.resize(img, (nH, nW), TF.InterpolationMode.BILINEAR)
mask = TF.resize(mask, (nH, nW), TF.InterpolationMode.NEAREST)

# make the image divisible by stride
alignH, alignW = int(math.ceil(nH / 32)) * 32, int(math.ceil(nW / 32)) * 32
img = TF.resize(img, (alignH, alignW), TF.InterpolationMode.BILINEAR)
mask = TF.resize(mask, (alignH, alignW), TF.InterpolationMode.NEAREST)
return img, mask
return img, mask


class RandomResizedCrop:
def __init__(self, size: Union[int, Tuple[int], List[int]], scale: Tuple[float, float] = (0.5, 2.0), seg_fill: int = 0) -> None:
"""Resize the input image to the given size.
"""
def __init__(
self,
size: Union[int, Tuple[int], List[int]],
scale: Tuple[float, float] = (0.5, 2.0),
seg_fill: int = 0,
) -> None:
"""Resize the input image to the given size."""
self.size = size
self.scale = scale
self.seg_fill = seg_fill
Expand All @@ -303,10 +343,10 @@ def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
# get the scale
ratio = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
# ratio = random.uniform(min(self.scale), max(self.scale))
scale = int(tH*ratio), int(tW*4*ratio)
scale = int(tH * ratio), int(tW * 4 * ratio)

# scale the image
scale_factor = min(max(scale)/max(H, W), min(scale)/min(H, W))
# scale the image
scale_factor = min(max(scale) / max(H, W), min(scale) / min(H, W))
nH, nW = int(H * scale_factor + 0.5), int(W * scale_factor + 0.5)
# nH, nW = int(math.ceil(nH / 32)) * 32, int(math.ceil(nW / 32)) * 32
img = TF.resize(img, (nH, nW), TF.InterpolationMode.BILINEAR)
Expand All @@ -315,8 +355,8 @@ def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
# random crop
margin_h = max(img.shape[1] - tH, 0)
margin_w = max(img.shape[2] - tW, 0)
y1 = random.randint(0, margin_h+1)
x1 = random.randint(0, margin_w+1)
y1 = random.randint(0, margin_h + 1)
x1 = random.randint(0, margin_w + 1)
y2 = y1 + tH
x2 = x1 + tW
img = img[:, y1:y2, x1:x2]
Expand All @@ -327,40 +367,43 @@ def __call__(self, img: Tensor, mask: Tensor) -> Tuple[Tensor, Tensor]:
padding = [0, 0, tW - img.shape[2], tH - img.shape[1]]
img = TF.pad(img, padding, fill=0)
mask = TF.pad(mask, padding, fill=self.seg_fill)
return img, mask

return img, mask


def get_train_augmentation(size: Union[int, Tuple[int], List[int]], seg_fill: int = 0):
return Compose([
# ColorJitter(brightness=0.0, contrast=0.5, saturation=0.5, hue=0.5),
# RandomAdjustSharpness(sharpness_factor=0.1, p=0.5),
# RandomAutoContrast(p=0.2),
RandomHorizontalFlip(p=0.5),
# RandomVerticalFlip(p=0.5),
# RandomGaussianBlur((3, 3), p=0.5),
# RandomGrayscale(p=0.5),
# RandomRotation(degrees=10, p=0.3, seg_fill=seg_fill),
RandomResizedCrop(size, scale=(0.5, 2.0), seg_fill=seg_fill),
Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
return Compose(
[
# ColorJitter(brightness=0.0, contrast=0.5, saturation=0.5, hue=0.5),
# RandomAdjustSharpness(sharpness_factor=0.1, p=0.5),
# RandomAutoContrast(p=0.2),
RandomHorizontalFlip(p=0.5),
# RandomVerticalFlip(p=0.5),
# RandomGaussianBlur((3, 3), p=0.5),
# RandomGrayscale(p=0.5),
# RandomRotation(degrees=10, p=0.3, seg_fill=seg_fill),
RandomResizedCrop(size, scale=(0.5, 2.0), seg_fill=seg_fill),
Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
]
)


def get_val_augmentation(size: Union[int, Tuple[int], List[int]]):
return Compose([
Resize(size),
Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
return Compose(
[Resize(size), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
)


if __name__ == '__main__':
if __name__ == "__main__":
h = 230
w = 420
img = torch.randn(3, h, w)
mask = torch.randn(1, h, w)
aug = Compose([
RandomResizedCrop((512, 512)),
# RandomCrop((512, 512), p=1.0),
# Pad((512, 512))
])
aug = Compose(
[
RandomResizedCrop((512, 512)),
# RandomCrop((512, 512), p=1.0),
# Pad((512, 512))
]
)
img, mask = aug(img, mask)
print(img.shape, mask.shape)
print(img.shape, mask.shape)
Loading