fixed random cropping

garbade · garbade · commit a31304c0432d · 2017-02-12T20:23:17.000+01:00
diff --git a/deeplab_resnet/image_reader.py b/deeplab_resnet/image_reader.py
@@ -3,7 +3,11 @@
 import numpy as np
 import tensorflow as tf
 
-IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
+
+
+# IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
+# IMG_MEAN = np.array((104.0,116.0,122.0), dtype=np.float32)
+
 
 def read_labeled_image_list(data_dir, data_list):
     """Reads txt file containing paths to images and ground truth masks.
@@ -37,13 +41,87 @@ def read_labeled_image_list(data_dir, data_list):
     
     return images, masks, img_type
 
-def image_distortions(image, distortions):
-    distort_left_right_random = distortions[0]
+def image_mirroring(image, random_number):
+    distort_left_right_random = random_number[0]
     mirror = tf.less(tf.pack([1.0, distort_left_right_random, 1.0]), 0.5)
     image = tf.reverse(image, mirror)
     return image
+
+def random_crop_and_pad_image_and_labels(image, labels, crop_h, crop_w):
+    """Randomly crops `image` together with `labels`.
+        To ensure labels are padded with "ignore_label" this has to be subtracted from the label image, then
+        after padding with 0, that value is added again. Make sure dtype allows negative values.
+    Args:
+        image: A Tensor with shape [D_1, ..., D_K, N]
+        labels: A Tensor with shape [D_1, ..., D_K, M]
+        size: A Tensor with shape [K] indicating the crop size.
+    Returns:
+        A tuple of (cropped_image, cropped_label).
+    """
+    combined = tf.concat(2, [image, labels]) 
+    image_shape = tf.shape(image)
+    combined_pad = tf.image.pad_to_bounding_box(
+        combined, 0, 0,
+        tf.maximum(crop_h, image_shape[0]),
+        tf.maximum(crop_w, image_shape[1]))
+    
+    last_image_dim = tf.shape(image)[-1]
+    last_label_dim = tf.shape(labels)[-1]
+    combined_crop = tf.random_crop(combined_pad,[crop_h,crop_w,4]) # TODO: Make cropping size a variable
+
+    return (combined_crop[:, :, :last_image_dim],
+            combined_crop[:, :, last_image_dim:])
+
     
-def read_images_from_disk(input_queue, input_size, random_scale, img_type): # optional pre-processing arguments
+def preprocess_input_train(img, label, ignore_label = 255, 
+                                       input_size = (321,321),
+                                       scale = True,
+                                       mirror = True):
+    """Read one image and its corresponding mask with optional pre-processing.    
+    Args:
+      input_queue: tf queue with paths to the image and its mask.
+
+      Returns:
+      Two tensors: the decoded image and its mask.
+    """    
+    
+    if scale:
+        # Scale
+        scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None)
+        h_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[0]), scale))
+        w_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[1]), scale))
+        new_shape = tf.squeeze(tf.pack([h_new, w_new]), squeeze_dims=[1])
+        img = tf.image.resize_images(img, new_shape)
+        label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
+        label = tf.squeeze(label, squeeze_dims=[0])
+    
+    if mirror:
+        # Mirror
+        random_number = tf.random_uniform([2], 0, 1.0, dtype=tf.float32)
+        img = image_mirroring(img, random_number)
+        label = image_mirroring(label, random_number)
+
+    # Crop and pad image
+    label = tf.cast(label, dtype=tf.float32) # Needs to be subtract and later added due to 0 padding
+    label = label - ignore_label
+    crop_h, crop_w = input_size
+    img, label = random_crop_and_pad_image_and_labels(img, label, crop_h, crop_w)
+    label = label + ignore_label
+    label = tf.cast(label, dtype=tf.uint8)
+    # Set static shape so that tensorflow knows shape at compile time 
+    img.set_shape((crop_h, crop_w, 3))
+    label.set_shape((crop_h,crop_w, 1))
+        
+    return img, label
+    
+
+def read_images_from_disk(input_queue, 
+                          img_type, 
+                          phase, 
+                          input_size = (321,321), 
+                          ignore_label = 255,
+                          scale = True,
+                          mirror = True): 
     """Read one image and its corresponding mask with optional pre-processing.
     
     Args:
@@ -52,11 +130,12 @@ def read_images_from_disk(input_queue, input_size, random_scale, img_type): # op
                   If not given, return images of original size.
       random_scale: whether to randomly scale the images prior
                     to random crop.
+      phase: A string specifying either 'train' , 'valid' or 'test' 
       
     Returns:
       Two tensors: the decoded image and its mask.
     """
-    mirror = True # TODO: make this a variable    
+
     img_contents = tf.read_file(input_queue[0])
     label_contents = tf.read_file(input_queue[1])
     
@@ -66,55 +145,69 @@ def read_images_from_disk(input_queue, input_size, random_scale, img_type): # op
         img = tf.image.decode_png(img_contents, channels=3) # CamVid
         
     label = tf.image.decode_png(label_contents, channels=1)
-    if input_size is not None:
-        h, w = input_size
-        if random_scale:
-            scale = tf.random_uniform([1], minval=0.75, maxval=1.25, dtype=tf.float32, seed=None)
-            h_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[0]), scale))
-            w_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[1]), scale))
-            new_shape = tf.squeeze(tf.pack([h_new, w_new]), squeeze_dims=[1])
-
-            img = tf.image.resize_images(img, new_shape)
-            label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
-            label = tf.squeeze(label, squeeze_dims=[0])
-        if mirror:
-            distortions = tf.random_uniform([2], 0, 1.0, dtype=tf.float32)
-            img = image_distortions(img, distortions)
-            label = image_distortions(label, distortions)            
-        img = tf.image.resize_image_with_crop_or_pad(img, h, w)
-        label = tf.image.resize_image_with_crop_or_pad(label, h, w)
+    
+
+    # Change RGB to BGR
     img_r, img_g, img_b = tf.split(split_dim=2, num_split=3, value=img)
-    img = tf.cast(tf.concat(2, [img_b, img_g, img_r]), dtype=tf.float32)
-    # extract mean
-    img -= IMG_MEAN 
-    return img, label
+    img = tf.cast(tf.concat(2, [img_b, img_g, img_r]), dtype=tf.float32)    
+    
+    # Mean subtraction 
+    IMG_MEAN = tf.constant([104.00698793,116.66876762,122.67891434],shape=[1,1,3], dtype=tf.float32) # BGR
+    IMG_MEAN = tf.reshape(IMG_MEAN,[1,1,3]) 
+    img = img - IMG_MEAN
+    
+    
+    # Optional preprocessing for training phase    
+    if phase == 'train':
+        img, label = preprocess_input_train(img, label, input_size = (321,321), 
+                                                      ignore_label = ignore_label)
+    elif phase == 'valid':
+        # TODO: Perform only a central crop -> size should be the same as during training
+        pass
+    elif phase == 'test':
+        pass
+
+    return img, label    
+
+
+
+
+def image_mirroring(image, random_number):
+    distort_left_right_random = random_number[0]
+    mirror = tf.less(tf.pack([1.0, distort_left_right_random, 1.0]), 0.5)
+    image = tf.reverse(image, mirror)
+    return image
+    
 
 class ImageReader(object):
     '''Generic ImageReader which reads images and corresponding segmentation
        masks from the disk, and enqueues them into a TensorFlow queue.
     '''
 
-    def __init__(self, data_dir, data_list, input_size, random_scale, coord):
+    def __init__(self, data_dir, data_list, input_size, phase, coord):
         '''Initialise an ImageReader.
         
         Args:
           data_dir: path to the directory with images and masks.
           data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
           input_size: a tuple with (height, width) values, to which all the images will be resized.
-          random_scale: whether to randomly scale the images prior to random crop.
+          phase: 'train', 'valid' or 'test'
           coord: TensorFlow queue coordinator.
         '''
         self.data_dir = data_dir
         self.data_list = data_list
         self.input_size = input_size
         self.coord = coord
+        self.phase = phase
         
         self.image_list, self.label_list , self.img_type = read_labeled_image_list(self.data_dir, self.data_list)
         self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
         self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string)
         self.queue = tf.train.slice_input_producer([self.images, self.labels],
                                                    shuffle=input_size is not None) # not shuffling if it is val
-        self.image, self.label = read_images_from_disk(self.queue, self.input_size, random_scale, self.img_type) 
+        # self.image, self.label = read_images_from_disk(self.queue, self.input_size, phase, self.img_type) 
+        # self.image, self.label = read_images_from_disk(self.queue, self.img_type, self.phase, input_size = (321,321), ignore_label = 255)
+        self.image, self.label = read_images_from_disk(self.queue, self.img_type, self.phase)
 
     def dequeue(self, num_elements):
         '''Pack images and labels into a batch.
diff --git a/evaluate.py b/evaluate.py
@@ -20,31 +20,35 @@
 import tensorflow as tf
 import numpy as np
 
+# from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label, decode_labels, decode_labels_old
 from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label, decode_labels, decode_labels_old
 
 
+
 OUTPUT_IMGS = True
 
 ### Voc12
-#n_classes = 21
-#ignore_label = 20
-#DATA_DIRECTORY = '/home/garbade/datasets/VOC2012/'
-#DATA_LIST_PATH = './dataset/voc12/val_Bndry255.txt'
-#DATA_LIST_PATH_ID = '/home/garbade/models/01_voc12/17_DL_v2_ResNet/voc12/list/val_id.txt'
-#RESTORE_FROM = '/home/garbade/models_tf/01_voc12/07_LR_fixed/snapshots_finetune/model.ckpt-17400'
-##RESTORE_FROM = './Vladimir/model.ckpt-20000'
-#SAVE_DIR = '/home/garbade/models_tf/01_voc12/07_LR_fixed/images_val/'
+n_classes = 21
+ignore_label = 20
+DATA_DIRECTORY = '/home/garbade/datasets/VOC2012/'
+DATA_LIST_PATH = './dataset/voc12/val_Bndry255.txt'
+DATA_LIST_PATH_ID = './dataset/voc12/val_id.txt'
+RESTORE_FROM = '/home/garbade/models_tf/01_voc12/14_fixedRandomCropping/snapshots_finetune/model.ckpt-20000'
+#RESTORE_FROM = './Vladimir/model.ckpt-20000'
+SAVE_DIR = '/home/garbade/models_tf/01_voc12/14_fixedRandomCropping/images_val/'
 
 
 ### CamVid
-n_classes = 11
-ignore_label = 10
-DATA_DIRECTORY = '/home/garbade/datasets/CamVid/'
-DATA_LIST_PATH = '/home/garbade/datasets/CamVid/list/test_70.txt'
-DATA_LIST_PATH_ID = '/home/garbade/datasets/CamVid/list/test_id.txt'
-SAVE_DIR = '/home/garbade/models_tf/03_CamVid/04_nc11_ic10/images_val/'
-RESTORE_FROM = '/home/garbade/models_tf/03_CamVid/10_fixedMirrorImgAndScale/snapshots_finetune/model.ckpt-6500'
-SAVE_DIR = '/home/garbade/models_tf/03_CamVid/10_fixedMirrorImgAndScale/images_val/'
+#n_classes = 11
+#ignore_label = 255
+#DATA_DIRECTORY = '/home/garbade/datasets/CamVid/'
+## DATA_LIST_PATH = './dataset/camvid/test_70.txt'
+#DATA_LIST_PATH = './dataset/camvid/test.txt'
+#DATA_LIST_PATH_ID = './dataset/camvid/test_id.txt'
+#SAVE_DIR = '/home/garbade/models_tf/03_CamVid/14_fixedRandomCropping/images_val_full/'
+## RESTORE_FROM = '/home/garbade/models_tf/03_CamVid/12_higherLR/snapshots_finetune/model.ckpt-6600'
+#RESTORE_FROM = '/home/garbade/models_tf/03_CamVid/14_fixedRandomCropping/snapshots_finetune/model.ckpt-20000'
+
 
 
 ### Cityscapes (19 classes + BG)
@@ -54,8 +58,8 @@
 #DATA_LIST_PATH='./dataset/city/small_50/val_splt_offst_65.txt'
 #DATA_LIST_PATH_ID='./dataset/city/small_50/val_split_id.txt'
 #TRAIN_SIZE=1000
-#RESTORE_FROM = '/home/garbade/models_tf/05_Cityscapes/07_fixedLR/snapshots_finetune/model.ckpt-17400'
-#SAVE_DIR = '/home/garbade/models_tf/05_Cityscapes/07_fixedLR/images_val/'
+#RESTORE_FROM = '/home/garbade/models_tf/05_Cityscapes/14_fixedRandomCropping/snapshots_finetune/model.ckpt-20000'
+#SAVE_DIR = '/home/garbade/models_tf/05_Cityscapes/14_fixedRandomCropping/images_val/'
 
 
 
@@ -120,7 +124,7 @@ def main():
             args.data_dir,
             args.data_list,
             None, # No defined input size.
-            False, # No random scale.
+            'test', # phase 'train', 'valid' or 'test'
             coord)
         image, label = reader.image, reader.label
     image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.
@@ -140,7 +144,7 @@ def main():
     # mIoU
     pred_lin = tf.reshape(pred, [-1,])
     gt = tf.reshape(label_batch, [-1,])
-    weights = tf.cast(tf.less_equal(gt, args.ignore_label), tf.int32) # Ignore void label '255'.
+    weights = tf.cast(tf.less_equal(gt, args.n_classes - 1), tf.int32) # TODO: Includ n_classes -1 ->Ignore void label '255'.
     mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred_lin, gt, num_classes = args.n_classes, weights = weights)
     
     # Set up tf session and initialize variables. 
diff --git a/inference.py b/inference.py
@@ -18,6 +18,8 @@
 
 from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, prepare_label
 
+IMG_FILE = '~/lena.jpg'
+MODEL_DIR = './deeplab_tf_model/deeplab_resnet.ckpt'
 SAVE_DIR = './output/'
 IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
 
@@ -28,9 +30,9 @@ def get_arguments():
       A list of parsed arguments.
     """
     parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.")
-    parser.add_argument("img_path", type=str,
+    parser.add_argument("img_path", type=str, default=IMG_FILE,
                         help="Path to the RGB image file.")
-    parser.add_argument("model_weights", type=str,
+    parser.add_argument("model_weights", type=str, default=MODEL_DIR,
                         help="Path to the file with model weights.")
     parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
                         help="Where to save predicted mask.")
diff --git a/npy2ckpt.py b/npy2ckpt.py
@@ -15,6 +15,7 @@
 from deeplab_resnet import DeepLabResNetModel
 
 SAVE_DIR = './'
+n_classes = 11
 
 def get_arguments():
     """Parse all the arguments provided from the CLI.
@@ -27,6 +28,10 @@ def get_arguments():
                         help="Path to the .npy file, which contains the weights.")
     parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
                         help="Where to save the converted .ckpt file.")
+    parser.add_argument("--n_classes", type=int, default=n_classes,
+                        help="Number of classes.")
+    parser.add_argument("--is-training", action="store_true",
+                        help="Whether to updates the running means and variances during the training.")                        
     return parser.parse_args()
 
 def save(saver, sess, logdir):
@@ -47,7 +52,8 @@ def main():
     # Default image.
     image_batch = tf.constant(0, tf.float32, shape=[1, 321, 321, 3]) 
     # Create network.
-    net = DeepLabResNetModel({'data': image_batch})
+    net = DeepLabResNetModel({'data': image_batch},args.n_classes, is_training=args.is_training)
+    
     var_list = tf.global_variables()
           
     # Set up tf session and initialize variables. 
diff --git a/train.py b/train.py