add gaussian diffusion where model predicts both noise and x_start, with a learned weighting between the two (experimental)

lucidrains · lucidrains · commit 27853452a2a7 · 2022-05-13T12:52:11.000-07:00
diff --git a/denoising_diffusion_pytorch/__init__.py b/denoising_diffusion_pytorch/__init__.py
@@ -1,2 +1,4 @@
 from denoising_diffusion_pytorch.denoising_diffusion_pytorch import GaussianDiffusion, Unet, Trainer
+
 from denoising_diffusion_pytorch.learned_gaussian_diffusion import LearnedGaussianDiffusion
+from denoising_diffusion_pytorch.weighted_objective_gaussian_diffusion import WeightedObjectiveGaussianDiffusion
diff --git a/denoising_diffusion_pytorch/weighted_objective_gaussian_diffusion.py b/denoising_diffusion_pytorch/weighted_objective_gaussian_diffusion.py
@@ -0,0 +1,80 @@
+import torch
+from inspect import isfunction
+from torch import nn, einsum
+from einops import rearrange
+
+from denoising_diffusion_pytorch.denoising_diffusion_pytorch import GaussianDiffusion, extract, unnormalize_to_zero_to_one
+
+# helper functions
+
+def exists(x):
+    return x is not None
+
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+
+# some improvisation on my end
+# where i have the model learn to both predict noise and x0
+# and learn the weighted sum for each depending on time step
+
+class WeightedObjectiveGaussianDiffusion(GaussianDiffusion):
+    def __init__(
+        self,
+        denoise_fn,
+        *args,
+        pred_noise_loss_weight = 0.1,
+        pred_x_start_loss_weight = 0.1,
+        **kwargs
+    ):
+        super().__init__(denoise_fn, *args, **kwargs)
+        channels = denoise_fn.channels
+        assert denoise_fn.out_dim == (channels * 2 + 2), 'dimension out (out_dim) of unet must be twice the number of channels + 2 (for the softmax weighted sum) - for channels of 3, this should be (3 * 2) + 2 = 8'
+
+        self.split_dims = (channels, channels, 2)
+        self.pred_noise_loss_weight = pred_noise_loss_weight
+        self.pred_x_start_loss_weight = pred_x_start_loss_weight
+
+    def p_mean_variance(self, *, x, t, clip_denoised, model_output = None):
+        model_output = self.denoise_fn(x, t)
+
+        pred_noise, pred_x_start, weights = model_output.split(self.split_dims, dim = 1)
+        normalized_weights = weights.softmax(dim = 1)
+
+        x_start_from_noise = self.predict_start_from_noise(x, t = t, noise = pred_noise)
+        
+        x_starts = torch.stack((x_start_from_noise, pred_x_start), dim = 1)
+        weighted_x_start = einsum('b j h w, b j c h w -> b c h w', normalized_weights, x_starts)
+
+        if clip_denoised:
+            weighted_x_start.clamp_(-1., 1.)
+
+        model_mean, model_variance, model_log_variance = self.q_posterior(weighted_x_start, x, t)
+
+        return model_mean, model_variance, model_log_variance
+
+    def p_losses(self, x_start, t, noise = None, clip_denoised = False):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        x_t = self.q_sample(x_start = x_start, t = t, noise = noise)
+
+        model_output = self.denoise_fn(x_t, t)
+        pred_noise, pred_x_start, weights = model_output.split(self.split_dims, dim = 1)
+
+        # get loss for predicted noise and x_start
+        # with the loss weight given at initialization
+
+        noise_loss = self.loss_fn(noise, pred_noise) * self.pred_noise_loss_weight
+        x_start_loss = self.loss_fn(x_start, pred_x_start) * self.pred_x_start_loss_weight
+
+        # calculate x_start from predicted noise
+        # then do a weighted sum of the x_start prediction, weights also predicted by the model (softmax normalized)
+
+        x_start_from_pred_noise = self.predict_start_from_noise(x_t, t, pred_noise)
+        x_start_from_pred_noise = x_start_from_pred_noise.clamp(-2., 2.)
+        weighted_x_start = einsum('b j h w, b j c h w -> b c h w', weights.softmax(dim = 1), torch.stack((x_start_from_pred_noise, pred_x_start), dim = 1))
+
+        # main loss to x_start with the weighted one
+
+        weighted_x_start_loss = self.loss_fn(x_start, weighted_x_start)
+        return weighted_x_start_loss + x_start_loss + noise_loss
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'denoising-diffusion-pytorch',
   packages = find_packages(),
-  version = '0.15.0',
+  version = '0.15.1',
   license='MIT',
   description = 'Denoising Diffusion Probabilistic Models - Pytorch',
   author = 'Phil Wang',