@@ -171,31 +171,18 @@ def diffuse_upscaler_target(self, x: Tensor, step: int, target: UpscalerTarget)
171
171
condition_scale = target .condition_scale ,
172
172
)
173
173
174
- @staticmethod
175
- def resize_modulo_8 (image : Image .Image , size : int = 768 , on_short : bool = True ) -> Image .Image :
176
- """
177
- Resize an image respecting the aspect ratio and ensuring the size is a multiple of 8.
178
-
179
- The `on_short` parameter determines whether the resizing is based on the shortest side.
180
- """
181
- assert size % 8 == 0 , "Size must be a multiple of 8 because this is the latent compression size."
182
- side_size = min (image .size ) if on_short else max (image .size )
183
- scale = size / (side_size * 8 )
184
- new_size = (int (image .width * scale ) * 8 , int (image .height * scale ) * 8 )
185
- return image .resize (new_size , resample = Image .Resampling .LANCZOS ) # type: ignore
186
-
187
- @no_grad ()
188
- def pre_upscale (self , image : Image .Image , upscale_factor : float , ** _ : Any ) -> Image .Image :
174
+ def pre_upscale (self , image : Image .Image , upscale_factor : float ) -> Image .Image :
189
175
"""
190
176
Pre-upscale an image before the actual upscaling process.
191
177
192
- You can override this method to implement custom pre-upscaling logic like using a ESRGAN model like in the
193
- original implementation.
178
+ You can override this method to implement custom pre-upscaling logic
179
+ like using a ESRGAN model like in the original implementation.
180
+ The resulting image must have a width and height divisible by 8.
194
181
"""
195
182
196
183
return image .resize (
197
- (int (image .width * upscale_factor ), int (image .height * upscale_factor )),
198
- resample = Image .Resampling .LANCZOS , # type: ignore
184
+ (int (( image .width * upscale_factor ) // 8 * 8 ) , int (( image .height * upscale_factor ) // 8 * 8 )),
185
+ resample = Image .Resampling .LANCZOS ,
199
186
)
200
187
201
188
def compute_upscaler_targets (
@@ -253,7 +240,6 @@ def upscale(
253
240
prompt : str = "masterpiece, best quality, highres" ,
254
241
negative_prompt : str = "worst quality, low quality, normal quality" ,
255
242
upscale_factor : float = 2 ,
256
- downscale_size : int = 768 ,
257
243
tile_size : tuple [int , int ] = (144 , 112 ),
258
244
denoise_strength : float = 0.35 ,
259
245
condition_scale : float = 6 ,
@@ -276,8 +262,6 @@ def upscale(
276
262
negative_prompt: The negative prompt to use for the upscaling. Original default has a weight of 2.0, but
277
263
using prompt weighting is no supported yet in Refiners.
278
264
upscale_factor: The factor to upscale the image by.
279
- downscale_size: The size to downscale the image along is short side to before upscaling. Must be a
280
- multiple of 8 because of latent compression.
281
265
tile_size: The size (H, W) of the tiles to use for latent diffusion. The smaller the tile size, the more "fractal"
282
266
the upscaling will be.
283
267
denoise_strength: The strength of the denoising. A value of 0.0 means no denoising (so nothing happens),
@@ -321,8 +305,8 @@ def upscale(
321
305
clip_text_embedding = self .compute_clip_text_embedding (prompt = prompt , negative_prompt = negative_prompt )
322
306
323
307
# prepare the image for the upscale
324
- image = self .resize_modulo_8 (image , size = downscale_size )
325
308
image = self .pre_upscale (image , upscale_factor = upscale_factor )
309
+ assert image .width % 8 == 0 and image .height % 8 == 0 , "rescaled image dimensions must be divisible by 8"
326
310
327
311
# compute the latent size and tile size
328
312
latent_size = Size (height = image .height // 8 , width = image .width // 8 )
0 commit comments