@@ -832,7 +832,7 @@ def test_diffusion_std_random_init_bfloat16(
832
832
)
833
833
predicted_image = sd15 .lda .latents_to_image (x )
834
834
835
- ensure_similar_images (predicted_image , expected_image_std_random_init_bfloat16 )
835
+ ensure_similar_images (predicted_image , expected_image_std_random_init_bfloat16 , min_psnr = 30 , min_ssim = 0.97 )
836
836
837
837
838
838
@no_grad ()
@@ -1166,7 +1166,7 @@ def test_diffusion_inpainting_float16(
1166
1166
predicted_image = sd15 .lda .latents_to_image (x )
1167
1167
1168
1168
# PSNR and SSIM values are large because float16 is even worse than float32.
1169
- ensure_similar_images (predicted_image , expected_image_std_inpainting , min_psnr = 20 , min_ssim = 0.92 )
1169
+ ensure_similar_images (predicted_image , expected_image_std_inpainting , min_psnr = 25 , min_ssim = 0.95 , min_dinov2 = 0.96 )
1170
1170
1171
1171
1172
1172
@no_grad ()
@@ -1245,7 +1245,7 @@ def test_diffusion_controlnet_tile_upscale(
1245
1245
predicted_image = sd15 .lda .latents_to_image (x )
1246
1246
1247
1247
# Note: rather large tolerances are used on purpose here (loose comparison with diffusers' output)
1248
- ensure_similar_images (predicted_image , expected_image , min_psnr = 24 , min_ssim = 0.75 )
1248
+ ensure_similar_images (predicted_image , expected_image , min_psnr = 24 , min_ssim = 0.75 , min_dinov2 = 0.94 )
1249
1249
1250
1250
1251
1251
@no_grad ()
@@ -1852,7 +1852,7 @@ def test_diffusion_ella_adapter(
1852
1852
condition_scale = 12 ,
1853
1853
)
1854
1854
predicted_image = sd15 .lda .latents_to_image (x )
1855
- ensure_similar_images (predicted_image , expected_image_ella_adapter , min_psnr = 35 , min_ssim = 0.98 )
1855
+ ensure_similar_images (predicted_image , expected_image_ella_adapter , min_psnr = 31 , min_ssim = 0.98 )
1856
1856
1857
1857
1858
1858
@no_grad ()
@@ -1937,7 +1937,7 @@ def test_diffusion_ip_adapter_multi(
1937
1937
)
1938
1938
predicted_image = sd15 .lda .decode_latents (x )
1939
1939
1940
- ensure_similar_images (predicted_image , expected_image_ip_adapter_multi )
1940
+ ensure_similar_images (predicted_image , expected_image_ip_adapter_multi , min_psnr = 43 , min_ssim = 0.98 )
1941
1941
1942
1942
1943
1943
@no_grad ()
@@ -2130,7 +2130,7 @@ def test_diffusion_sdxl_ip_adapter_plus(
2130
2130
sdxl .lda .to (dtype = torch .float32 )
2131
2131
predicted_image = sdxl .lda .latents_to_image (x .to (dtype = torch .float32 ))
2132
2132
2133
- ensure_similar_images (predicted_image , expected_image_sdxl_ip_adapter_plus_woman )
2133
+ ensure_similar_images (predicted_image , expected_image_sdxl_ip_adapter_plus_woman , min_psnr = 43 , min_ssim = 0.98 )
2134
2134
2135
2135
2136
2136
@no_grad ()
@@ -2608,11 +2608,11 @@ def test_style_aligned(
2608
2608
2609
2609
# tile all images horizontally
2610
2610
merged_image = Image .new ("RGB" , (1024 * len (predicted_images ), 1024 ))
2611
- for i in range ( len ( predicted_images ) ):
2612
- merged_image .paste (predicted_images [ i ] , (i * 1024 , 0 )) # type: ignore
2611
+ for i , image in enumerate ( predicted_images ):
2612
+ merged_image .paste (image , (1024 * i , 0 ))
2613
2613
2614
2614
# compare against reference image
2615
- ensure_similar_images (merged_image , expected_style_aligned , min_psnr = 35 , min_ssim = 0.99 )
2615
+ ensure_similar_images (merged_image , expected_style_aligned , min_psnr = 12 , min_ssim = 0.39 , min_dinov2 = 0.95 )
2616
2616
2617
2617
2618
2618
@no_grad ()
@@ -2624,7 +2624,7 @@ def test_multi_upscaler(
2624
2624
generator = torch .Generator (device = multi_upscaler .device )
2625
2625
generator .manual_seed (37 )
2626
2626
predicted_image = multi_upscaler .upscale (clarity_example , generator = generator )
2627
- ensure_similar_images (predicted_image , expected_multi_upscaler , min_psnr = 35 , min_ssim = 0.99 )
2627
+ ensure_similar_images (predicted_image , expected_multi_upscaler , min_psnr = 25 , min_ssim = 0.85 , min_dinov2 = 0.96 )
2628
2628
2629
2629
2630
2630
@no_grad ()
0 commit comments