Skip to content

Commit 05a5959

Browse files
committed
Tiled VAE: fix bug with pathologic size (tile size - overlap + 1)
This fixes the error: > The size of tensor a (128) must match the size of tensor b (0) > at non-singleton dimension 0
1 parent e708c31 commit 05a5959

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

src/refiners/foundationals/latent_diffusion/auto_encoder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,8 @@ def _generate_latent_tiles(size: _ImageSize, tile_size: _ImageSize, overlap: int
415415
"""
416416
tiles: list[_Tile] = []
417417

418-
for x in range(0, size.width, tile_size.width - overlap):
419-
for y in range(0, size.height, tile_size.height - overlap):
418+
for x in range(0, max(size.width - overlap, 1), tile_size.width - overlap):
419+
for y in range(0, max(size.height - overlap, 1), tile_size.height - overlap):
420420
tile = _Tile(
421421
top=max(0, y),
422422
left=max(0, x),

tests/foundationals/latent_diffusion/test_autoencoders.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,28 @@ def test_tiled_autoencoder_rectangular_image(autoencoder: LatentDiffusionAutoenc
107107
ensure_similar_images(sample_image, result, min_psnr=37, min_ssim=0.985)
108108

109109

110+
@no_grad()
111+
@pytest.mark.parametrize("img_width", [960, 968, 976, 1016, 1024, 1032])
112+
def test_tiled_autoencoder_pathologic_sizes(
113+
refiners_sd15_autoencoder: SD1Autoencoder,
114+
sample_image: Image.Image,
115+
test_device: torch.device,
116+
img_width: int,
117+
):
118+
# 968 is the pathologic case, just larger than (tile size - overlap): (128 - 8 + 1) * 8 = 968
119+
120+
autoencoder = refiners_sd15_autoencoder.to(device=test_device, dtype=torch.float32)
121+
122+
sample_image = sample_image.crop((0, 0, img_width // 4, 400))
123+
sample_image = sample_image.resize((sample_image.width * 4, sample_image.height * 4))
124+
125+
with autoencoder.tiled_inference(sample_image, tile_size=(1024, 1024)):
126+
encoded = autoencoder.tiled_image_to_latents(sample_image)
127+
result = autoencoder.tiled_latents_to_image(encoded)
128+
129+
ensure_similar_images(sample_image, result, min_psnr=37, min_ssim=0.985)
130+
131+
110132
def test_value_error_tile_encode_no_context(autoencoder: LatentDiffusionAutoencoder, sample_image: Image.Image) -> None:
111133
with pytest.raises(ValueError):
112134
autoencoder.tiled_image_to_latents(sample_image)

0 commit comments

Comments
 (0)