Skip to content
This repository was archived by the owner on Sep 16, 2024. It is now read-only.

Commit a31304c

Browse files
author
garbade
committed
fixed random cropping
1 parent ec87f8d commit a31304c

File tree

5 files changed

+173
-63
lines changed

5 files changed

+173
-63
lines changed

deeplab_resnet/image_reader.py

Lines changed: 122 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
import numpy as np
44
import tensorflow as tf
55

6-
IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
6+
7+
8+
# IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
9+
# IMG_MEAN = np.array((104.0,116.0,122.0), dtype=np.float32)
10+
711

812
def read_labeled_image_list(data_dir, data_list):
913
"""Reads txt file containing paths to images and ground truth masks.
@@ -37,13 +41,87 @@ def read_labeled_image_list(data_dir, data_list):
3741

3842
return images, masks, img_type
3943

40-
def image_distortions(image, distortions):
41-
distort_left_right_random = distortions[0]
44+
def image_mirroring(image, random_number):
45+
distort_left_right_random = random_number[0]
4246
mirror = tf.less(tf.pack([1.0, distort_left_right_random, 1.0]), 0.5)
4347
image = tf.reverse(image, mirror)
4448
return image
49+
50+
def random_crop_and_pad_image_and_labels(image, labels, crop_h, crop_w):
51+
"""Randomly crops `image` together with `labels`.
52+
To ensure labels are padded with "ignore_label" this has to be subtracted from the label image, then
53+
after padding with 0, that value is added again. Make sure dtype allows negative values.
54+
Args:
55+
image: A Tensor with shape [D_1, ..., D_K, N]
56+
labels: A Tensor with shape [D_1, ..., D_K, M]
57+
size: A Tensor with shape [K] indicating the crop size.
58+
Returns:
59+
A tuple of (cropped_image, cropped_label).
60+
"""
61+
combined = tf.concat(2, [image, labels])
62+
image_shape = tf.shape(image)
63+
combined_pad = tf.image.pad_to_bounding_box(
64+
combined, 0, 0,
65+
tf.maximum(crop_h, image_shape[0]),
66+
tf.maximum(crop_w, image_shape[1]))
67+
68+
last_image_dim = tf.shape(image)[-1]
69+
last_label_dim = tf.shape(labels)[-1]
70+
combined_crop = tf.random_crop(combined_pad,[crop_h,crop_w,4]) # TODO: Make cropping size a variable
71+
72+
return (combined_crop[:, :, :last_image_dim],
73+
combined_crop[:, :, last_image_dim:])
74+
4575

46-
def read_images_from_disk(input_queue, input_size, random_scale, img_type): # optional pre-processing arguments
76+
def preprocess_input_train(img, label, ignore_label = 255,
77+
input_size = (321,321),
78+
scale = True,
79+
mirror = True):
80+
"""Read one image and its corresponding mask with optional pre-processing.
81+
Args:
82+
input_queue: tf queue with paths to the image and its mask.
83+
84+
Returns:
85+
Two tensors: the decoded image and its mask.
86+
"""
87+
88+
if scale:
89+
# Scale
90+
scale = tf.random_uniform([1], minval=0.5, maxval=1.5, dtype=tf.float32, seed=None)
91+
h_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[0]), scale))
92+
w_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[1]), scale))
93+
new_shape = tf.squeeze(tf.pack([h_new, w_new]), squeeze_dims=[1])
94+
img = tf.image.resize_images(img, new_shape)
95+
label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
96+
label = tf.squeeze(label, squeeze_dims=[0])
97+
98+
if mirror:
99+
# Mirror
100+
random_number = tf.random_uniform([2], 0, 1.0, dtype=tf.float32)
101+
img = image_mirroring(img, random_number)
102+
label = image_mirroring(label, random_number)
103+
104+
# Crop and pad image
105+
label = tf.cast(label, dtype=tf.float32) # Needs to be subtract and later added due to 0 padding
106+
label = label - ignore_label
107+
crop_h, crop_w = input_size
108+
img, label = random_crop_and_pad_image_and_labels(img, label, crop_h, crop_w)
109+
label = label + ignore_label
110+
label = tf.cast(label, dtype=tf.uint8)
111+
# Set static shape so that tensorflow knows shape at compile time
112+
img.set_shape((crop_h, crop_w, 3))
113+
label.set_shape((crop_h,crop_w, 1))
114+
115+
return img, label
116+
117+
118+
def read_images_from_disk(input_queue,
119+
img_type,
120+
phase,
121+
input_size = (321,321),
122+
ignore_label = 255,
123+
scale = True,
124+
mirror = True):
47125
"""Read one image and its corresponding mask with optional pre-processing.
48126
49127
Args:
@@ -52,11 +130,12 @@ def read_images_from_disk(input_queue, input_size, random_scale, img_type): # op
52130
If not given, return images of original size.
53131
random_scale: whether to randomly scale the images prior
54132
to random crop.
133+
phase: A string specifying either 'train' , 'valid' or 'test'
55134
56135
Returns:
57136
Two tensors: the decoded image and its mask.
58137
"""
59-
mirror = True # TODO: make this a variable
138+
60139
img_contents = tf.read_file(input_queue[0])
61140
label_contents = tf.read_file(input_queue[1])
62141

@@ -66,55 +145,69 @@ def read_images_from_disk(input_queue, input_size, random_scale, img_type): # op
66145
img = tf.image.decode_png(img_contents, channels=3) # CamVid
67146

68147
label = tf.image.decode_png(label_contents, channels=1)
69-
if input_size is not None:
70-
h, w = input_size
71-
if random_scale:
72-
scale = tf.random_uniform([1], minval=0.75, maxval=1.25, dtype=tf.float32, seed=None)
73-
h_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[0]), scale))
74-
w_new = tf.to_int32(tf.mul(tf.to_float(tf.shape(img)[1]), scale))
75-
new_shape = tf.squeeze(tf.pack([h_new, w_new]), squeeze_dims=[1])
76-
77-
img = tf.image.resize_images(img, new_shape)
78-
label = tf.image.resize_nearest_neighbor(tf.expand_dims(label, 0), new_shape)
79-
label = tf.squeeze(label, squeeze_dims=[0])
80-
if mirror:
81-
distortions = tf.random_uniform([2], 0, 1.0, dtype=tf.float32)
82-
img = image_distortions(img, distortions)
83-
label = image_distortions(label, distortions)
84-
img = tf.image.resize_image_with_crop_or_pad(img, h, w)
85-
label = tf.image.resize_image_with_crop_or_pad(label, h, w)
148+
149+
150+
# Change RGB to BGR
86151
img_r, img_g, img_b = tf.split(split_dim=2, num_split=3, value=img)
87-
img = tf.cast(tf.concat(2, [img_b, img_g, img_r]), dtype=tf.float32)
88-
# extract mean
89-
img -= IMG_MEAN
90-
return img, label
152+
img = tf.cast(tf.concat(2, [img_b, img_g, img_r]), dtype=tf.float32)
153+
154+
# Mean subtraction
155+
IMG_MEAN = tf.constant([104.00698793,116.66876762,122.67891434],shape=[1,1,3], dtype=tf.float32) # BGR
156+
IMG_MEAN = tf.reshape(IMG_MEAN,[1,1,3])
157+
img = img - IMG_MEAN
158+
159+
160+
# Optional preprocessing for training phase
161+
if phase == 'train':
162+
img, label = preprocess_input_train(img, label, input_size = (321,321),
163+
ignore_label = ignore_label)
164+
elif phase == 'valid':
165+
# TODO: Perform only a central crop -> size should be the same as during training
166+
pass
167+
elif phase == 'test':
168+
pass
169+
170+
return img, label
171+
172+
173+
174+
175+
def image_mirroring(image, random_number):
176+
distort_left_right_random = random_number[0]
177+
mirror = tf.less(tf.pack([1.0, distort_left_right_random, 1.0]), 0.5)
178+
image = tf.reverse(image, mirror)
179+
return image
180+
91181

92182
class ImageReader(object):
93183
'''Generic ImageReader which reads images and corresponding segmentation
94184
masks from the disk, and enqueues them into a TensorFlow queue.
95185
'''
96186

97-
def __init__(self, data_dir, data_list, input_size, random_scale, coord):
187+
def __init__(self, data_dir, data_list, input_size, phase, coord):
98188
'''Initialise an ImageReader.
99189
100190
Args:
101191
data_dir: path to the directory with images and masks.
102192
data_list: path to the file with lines of the form '/path/to/image /path/to/mask'.
103193
input_size: a tuple with (height, width) values, to which all the images will be resized.
104-
random_scale: whether to randomly scale the images prior to random crop.
194+
phase: 'train', 'valid' or 'test'
105195
coord: TensorFlow queue coordinator.
106196
'''
107197
self.data_dir = data_dir
108198
self.data_list = data_list
109199
self.input_size = input_size
110200
self.coord = coord
201+
self.phase = phase
111202

112203
self.image_list, self.label_list , self.img_type = read_labeled_image_list(self.data_dir, self.data_list)
113204
self.images = tf.convert_to_tensor(self.image_list, dtype=tf.string)
114205
self.labels = tf.convert_to_tensor(self.label_list, dtype=tf.string)
115206
self.queue = tf.train.slice_input_producer([self.images, self.labels],
116207
shuffle=input_size is not None) # not shuffling if it is val
117-
self.image, self.label = read_images_from_disk(self.queue, self.input_size, random_scale, self.img_type)
208+
# self.image, self.label = read_images_from_disk(self.queue, self.input_size, phase, self.img_type)
209+
# self.image, self.label = read_images_from_disk(self.queue, self.img_type, self.phase, input_size = (321,321), ignore_label = 255)
210+
self.image, self.label = read_images_from_disk(self.queue, self.img_type, self.phase)
118211

119212
def dequeue(self, num_elements):
120213
'''Pack images and labels into a batch.

evaluate.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,31 +20,35 @@
2020
import tensorflow as tf
2121
import numpy as np
2222

23+
# from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label, decode_labels, decode_labels_old
2324
from deeplab_resnet import DeepLabResNetModel, ImageReader, prepare_label, decode_labels, decode_labels_old
2425

2526

27+
2628
OUTPUT_IMGS = True
2729

2830
### Voc12
29-
#n_classes = 21
30-
#ignore_label = 20
31-
#DATA_DIRECTORY = '/home/garbade/datasets/VOC2012/'
32-
#DATA_LIST_PATH = './dataset/voc12/val_Bndry255.txt'
33-
#DATA_LIST_PATH_ID = '/home/garbade/models/01_voc12/17_DL_v2_ResNet/voc12/list/val_id.txt'
34-
#RESTORE_FROM = '/home/garbade/models_tf/01_voc12/07_LR_fixed/snapshots_finetune/model.ckpt-17400'
35-
##RESTORE_FROM = './Vladimir/model.ckpt-20000'
36-
#SAVE_DIR = '/home/garbade/models_tf/01_voc12/07_LR_fixed/images_val/'
31+
n_classes = 21
32+
ignore_label = 20
33+
DATA_DIRECTORY = '/home/garbade/datasets/VOC2012/'
34+
DATA_LIST_PATH = './dataset/voc12/val_Bndry255.txt'
35+
DATA_LIST_PATH_ID = './dataset/voc12/val_id.txt'
36+
RESTORE_FROM = '/home/garbade/models_tf/01_voc12/14_fixedRandomCropping/snapshots_finetune/model.ckpt-20000'
37+
#RESTORE_FROM = './Vladimir/model.ckpt-20000'
38+
SAVE_DIR = '/home/garbade/models_tf/01_voc12/14_fixedRandomCropping/images_val/'
3739

3840

3941
### CamVid
40-
n_classes = 11
41-
ignore_label = 10
42-
DATA_DIRECTORY = '/home/garbade/datasets/CamVid/'
43-
DATA_LIST_PATH = '/home/garbade/datasets/CamVid/list/test_70.txt'
44-
DATA_LIST_PATH_ID = '/home/garbade/datasets/CamVid/list/test_id.txt'
45-
SAVE_DIR = '/home/garbade/models_tf/03_CamVid/04_nc11_ic10/images_val/'
46-
RESTORE_FROM = '/home/garbade/models_tf/03_CamVid/10_fixedMirrorImgAndScale/snapshots_finetune/model.ckpt-6500'
47-
SAVE_DIR = '/home/garbade/models_tf/03_CamVid/10_fixedMirrorImgAndScale/images_val/'
42+
#n_classes = 11
43+
#ignore_label = 255
44+
#DATA_DIRECTORY = '/home/garbade/datasets/CamVid/'
45+
## DATA_LIST_PATH = './dataset/camvid/test_70.txt'
46+
#DATA_LIST_PATH = './dataset/camvid/test.txt'
47+
#DATA_LIST_PATH_ID = './dataset/camvid/test_id.txt'
48+
#SAVE_DIR = '/home/garbade/models_tf/03_CamVid/14_fixedRandomCropping/images_val_full/'
49+
## RESTORE_FROM = '/home/garbade/models_tf/03_CamVid/12_higherLR/snapshots_finetune/model.ckpt-6600'
50+
#RESTORE_FROM = '/home/garbade/models_tf/03_CamVid/14_fixedRandomCropping/snapshots_finetune/model.ckpt-20000'
51+
4852

4953

5054
### Cityscapes (19 classes + BG)
@@ -54,8 +58,8 @@
5458
#DATA_LIST_PATH='./dataset/city/small_50/val_splt_offst_65.txt'
5559
#DATA_LIST_PATH_ID='./dataset/city/small_50/val_split_id.txt'
5660
#TRAIN_SIZE=1000
57-
#RESTORE_FROM = '/home/garbade/models_tf/05_Cityscapes/07_fixedLR/snapshots_finetune/model.ckpt-17400'
58-
#SAVE_DIR = '/home/garbade/models_tf/05_Cityscapes/07_fixedLR/images_val/'
61+
#RESTORE_FROM = '/home/garbade/models_tf/05_Cityscapes/14_fixedRandomCropping/snapshots_finetune/model.ckpt-20000'
62+
#SAVE_DIR = '/home/garbade/models_tf/05_Cityscapes/14_fixedRandomCropping/images_val/'
5963

6064

6165

@@ -120,7 +124,7 @@ def main():
120124
args.data_dir,
121125
args.data_list,
122126
None, # No defined input size.
123-
False, # No random scale.
127+
'test', # phase 'train', 'valid' or 'test'
124128
coord)
125129
image, label = reader.image, reader.label
126130
image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0) # Add one batch dimension.
@@ -140,7 +144,7 @@ def main():
140144
# mIoU
141145
pred_lin = tf.reshape(pred, [-1,])
142146
gt = tf.reshape(label_batch, [-1,])
143-
weights = tf.cast(tf.less_equal(gt, args.ignore_label), tf.int32) # Ignore void label '255'.
147+
weights = tf.cast(tf.less_equal(gt, args.n_classes - 1), tf.int32) # TODO: Includ n_classes -1 ->Ignore void label '255'.
144148
mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(pred_lin, gt, num_classes = args.n_classes, weights = weights)
145149

146150
# Set up tf session and initialize variables.

inference.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
from deeplab_resnet import DeepLabResNetModel, ImageReader, decode_labels, prepare_label
2020

21+
IMG_FILE = '~/lena.jpg'
22+
MODEL_DIR = './deeplab_tf_model/deeplab_resnet.ckpt'
2123
SAVE_DIR = './output/'
2224
IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
2325

@@ -28,9 +30,9 @@ def get_arguments():
2830
A list of parsed arguments.
2931
"""
3032
parser = argparse.ArgumentParser(description="DeepLabLFOV Network Inference.")
31-
parser.add_argument("img_path", type=str,
33+
parser.add_argument("img_path", type=str, default=IMG_FILE,
3234
help="Path to the RGB image file.")
33-
parser.add_argument("model_weights", type=str,
35+
parser.add_argument("model_weights", type=str, default=MODEL_DIR,
3436
help="Path to the file with model weights.")
3537
parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
3638
help="Where to save predicted mask.")

npy2ckpt.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from deeplab_resnet import DeepLabResNetModel
1616

1717
SAVE_DIR = './'
18+
n_classes = 11
1819

1920
def get_arguments():
2021
"""Parse all the arguments provided from the CLI.
@@ -27,6 +28,10 @@ def get_arguments():
2728
help="Path to the .npy file, which contains the weights.")
2829
parser.add_argument("--save-dir", type=str, default=SAVE_DIR,
2930
help="Where to save the converted .ckpt file.")
31+
parser.add_argument("--n_classes", type=int, default=n_classes,
32+
help="Number of classes.")
33+
parser.add_argument("--is-training", action="store_true",
34+
help="Whether to updates the running means and variances during the training.")
3035
return parser.parse_args()
3136

3237
def save(saver, sess, logdir):
@@ -47,7 +52,8 @@ def main():
4752
# Default image.
4853
image_batch = tf.constant(0, tf.float32, shape=[1, 321, 321, 3])
4954
# Create network.
50-
net = DeepLabResNetModel({'data': image_batch})
55+
net = DeepLabResNetModel({'data': image_batch},args.n_classes, is_training=args.is_training)
56+
5157
var_list = tf.global_variables()
5258

5359
# Set up tf session and initialize variables.

0 commit comments

Comments
 (0)