unet needs to be conditioned on log(snr) in p_mean_variance for continuous time gaussian diffusion

lucidrains · lucidrains · commit eaf9d9fdc48c · 2022-06-08T00:41:41.000-07:00
diff --git a/denoising_diffusion_pytorch/continuous_time_gaussian_diffusion.py b/denoising_diffusion_pytorch/continuous_time_gaussian_diffusion.py
@@ -59,7 +59,6 @@ def __init__(
         *,
         image_size,
         channels = 3,
-        cond_scale = 500,
         loss_type = 'l1',
         noise_schedule = 'linear',
         num_sample_steps = 500
@@ -76,7 +75,6 @@ def __init__(
 
         # continuous noise schedule related stuff
 
-        self.cond_scale = cond_scale # the log(snr) will be scaled by this value
         self.loss_type = loss_type
 
         if noise_schedule == 'linear':
@@ -108,18 +106,16 @@ def p_mean_variance(self, x, time, time_next):
         # todo - derive x_start from the posterior mean and do dynamic thresholding
         # assumed that is what is going on in Imagen
 
-        batch = x.shape[0]
-        batch_time = repeat(time, ' -> b', b = batch)
-
-        pred_noise = self.denoise_fn(x, batch_time * self.cond_scale)
-
         log_snr = self.log_snr(time)
         log_snr_next = self.log_snr(time_next)
         c = -expm1(log_snr - log_snr_next)
 
         squared_alpha, squared_alpha_next = log_snr.sigmoid(), log_snr_next.sigmoid()
         squared_sigma, squared_sigma_next = (-log_snr).sigmoid(), (-log_snr_next).sigmoid()
 
+        batch_log_snr = repeat(log_snr, ' -> b', b = x.shape[0])
+        pred_noise = self.denoise_fn(x, batch_log_snr)
+
         model_mean = sqrt(squared_alpha_next / squared_alpha) * (x - c * sqrt(squared_sigma) * pred_noise)
         posterior_variance = squared_sigma_next * c
 
@@ -151,6 +147,7 @@ def p_sample_loop(self, shape):
             times_next = steps[i + 1]
             img = self.p_sample(img, times, times_next)
 
+        img.clamp_(-1., 1.)
         img = unnormalize_to_zero_to_one(img)
         return img
 
@@ -180,7 +177,7 @@ def p_losses(self, x_start, times, noise = None):
 
         x, log_snr = self.q_sample(x_start = x_start, times = times, noise = noise)
 
-        model_out = self.denoise_fn(x, log_snr * self.cond_scale)
+        model_out = self.denoise_fn(x, log_snr)
         return self.loss_fn(model_out, noise)
 
     def forward(self, img, *args, **kwargs):
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'denoising-diffusion-pytorch',
   packages = find_packages(),
-  version = '0.16.5',
+  version = '0.16.7',
   license='MIT',
   description = 'Denoising Diffusion Probabilistic Models - Pytorch',
   author = 'Phil Wang',