diffusion update

Nupur Kumari · Nupur Kumari · commit be2cd9e7df81 · 2023-04-13T13:22:52.000-04:00
diff --git a/README.md b/README.md
@@ -107,7 +107,9 @@ pip install clip-retrieval tqdm
 
 Our code was developed on the following commit `#21f890f9da3cfbeaba8e2ac3c425ee9e998d5229` of [stable-diffusion](https://github.com/CompVis/stable-diffusion).
 
-For downloading the stable-diffusion model checkpoint, please refer [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original).
+Download the stable-diffusion model checkpoint
+`wget https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt`
+For more details, please refer [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original).
 
 **Dataset:** we release some of the datasets used in paper [here](https://www.cs.cmu.edu/~custom-diffusion/assets/data.zip). 
 Images taken from UnSplash are under [UnSplash LICENSE](https://unsplash.com/license). Moongate dataset can be downloaded from [here](https://github.com/odegeasslbc/FastGAN-pytorch).
@@ -132,7 +134,7 @@ python src/get_deltas.py --path logs/<folder-name> --newtoken 1
 python sample.py --prompt "<new1> cat playing with a ball" --delta_ckpt logs/<folder-name>/checkpoints/delta_epoch\=000004.ckpt --ckpt <pretrained-model-path>
 ```
 
-Our results in the paper are not based on the [clip-retrieval](https://github.com/rom1504/clip-retrieval) for retrieving real images as the regularization samples. But this also leads to similar results.
+The `<pretrained-model-path>` is the path to the pretrained `sd-v1-4.ckpt` model. Our results in the paper are not based on the [clip-retrieval](https://github.com/rom1504/clip-retrieval) for retrieving real images as the regularization samples. But this also leads to similar results.
 
 **Generated images as regularization**
 ```
diff --git a/src/diffusers_model_pipeline.py b/src/diffusers_model_pipeline.py
@@ -415,16 +415,6 @@ def __init__(
                          requires_safety_checker)
 
         # change attn class
-        def change_attn(unet):
-            for layer in unet.children():
-                if type(layer) == CrossAttention:
-                    bound_method = set_use_memory_efficient_attention_xformers.__get__(layer, layer.__class__)
-                    setattr(layer, 'set_use_memory_efficient_attention_xformers', bound_method)
-                else:
-                    change_attn(layer)
-
-        change_attn(self.unet)
-        self.unet.set_attn_processor(CustomDiffusionAttnProcessor())
         self.modifier_token = modifier_token
         self.modifier_token_id = modifier_token_id
 
diff --git a/src/diffusers_training.py b/src/diffusers_training.py
@@ -645,8 +645,7 @@ def main(args):
                 class_images_dir.mkdir(parents=True, exist_ok=True)
             if args.real_prior:
                 if accelerator.is_main_process:
-                    name = '_'.join(concept['class_prompt'].split())
-                    if not Path(os.path.join(class_images_dir, name)).exists() or len(list(Path(os.path.join(class_images_dir, name)).iterdir())) < args.num_class_images:
+                    if not Path(os.path.join(class_images_dir, 'images')).exists() or len(list(Path(os.path.join(class_images_dir, 'images')).iterdir())) < args.num_class_images:
                         retrieve.retrieve(concept['class_prompt'], class_images_dir, args.num_class_images)
                 concept['class_prompt'] = os.path.join(class_images_dir, 'caption.txt')
                 concept['class_data_dir'] = os.path.join(class_images_dir, 'images.txt')
@@ -674,7 +673,7 @@ def main(args):
                     num_new_images = args.num_class_images - cur_class_images
                     logger.info(f"Number of class images to sample: {num_new_images}.")
 
-                    sample_dataset = PromptDataset(args.class_prompt, num_new_images)
+                    sample_dataset = PromptDataset(concept['class_prompt'], num_new_images)
                     sample_dataloader = torch.utils.data.DataLoader(sample_dataset, batch_size=args.sample_batch_size)
 
                     sample_dataloader = accelerator.prepare(sample_dataloader)
@@ -741,7 +740,6 @@ def main(args):
         args.pretrained_model_name_or_path, subfolder="unet", revision=args.revision
     )
 
-    # We only train the additional adapter LoRA layers
     vae.requires_grad_(False)
     if not args.train_text_encoder and args.modifier_token is None:
         text_encoder.requires_grad_(False)
@@ -1032,12 +1030,13 @@ def main(args):
                             args.pretrained_model_name_or_path,
                             unet=accelerator.unwrap_model(unet),
                             text_encoder=accelerator.unwrap_model(text_encoder),
+                            tokenizer=tokenizer,
                             revision=args.revision,
                             modifier_token=args.modifier_token,
                             modifier_token_id=modifier_token_id,
                         )
                         save_path = os.path.join(args.output_dir, f"delta-{global_step}.bin")
-                        pipeline.save_pretrained(save_path)
+                        pipeline.save_pretrained(save_path, freeze_model=args.freeze_model)
 
             logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]}
             progress_bar.set_postfix(**logs)
@@ -1061,7 +1060,7 @@ def main(args):
             modifier_token_id=modifier_token_id,
         )
         save_path = os.path.join(args.output_dir, f"delta.bin")
-        pipeline.save_pretrained(save_path)
+        pipeline.save_pretrained(save_path, freeze_model=args.freeze_model)
         if args.validation_prompt is not None:
             logger.info(
                 f"Running validation... \n Generating {args.num_validation_images} images with prompt:"