import numpy as np
import glob
import skimage.io as io
import os.path
import tensorflow as tf
def fimg_to_fmask(img_path):
# convert an image file path into a corresponding mask file path
dirname, basename = os.path.split(img_path)
maskname = basename.replace(".tif", "_mask.tif")
return os.path.join(dirname, maskname)
origin_images_subset = [img for img in glob.glob("train_subset/*.tif") if 'mask' not in img]
paired_images_subset = [(img, fimg_to_fmask(img)) for img in origin_images_subset]
print("number of image segmentation pairs: ", len(paired_images_subset))
number of image segmentation pairs: 0
origin_images_full = [img for img in glob.glob("train/*.tif") if 'mask' not in img]
paired_images_full = [(img, fimg_to_fmask(img)) for img in origin_images_full]
print("number of image segmentation pairs: ", len(paired_images_full))
number of image segmentation pairs: 0
%matplotlib inline
# check an image instance
img = io.imread('train_subset/1_1.tif')
mask =io.imread('train_subset/1_1_mask.tif')
print(type(img))
print(img.shape)
io.imshow(img)
io.show()
io.imshow(mask)
io.show()
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) <ipython-input-5-65e34ab6990c> in <module>() 1 get_ipython().run_line_magic('matplotlib', 'inline') 2 # check an image instance ----> 3 img = io.imread('train_subset/1_1.tif') 4 mask =io.imread('train_subset/1_1_mask.tif') 5 print(type(img)) ~/anaconda/lib/python3.5/site-packages/skimage/io/_io.py in imread(fname, as_grey, plugin, flatten, **plugin_args) 59 60 with file_or_url_context(fname) as fname: ---> 61 img = call_plugin('imread', fname, plugin=plugin, **plugin_args) 62 63 if not hasattr(img, 'ndim'): ~/anaconda/lib/python3.5/site-packages/skimage/io/manage_plugins.py in call_plugin(kind, *args, **kwargs) 209 (plugin, kind)) 210 --> 211 return func(*args, **kwargs) 212 213 ~/anaconda/lib/python3.5/site-packages/skimage/io/_plugins/tifffile_plugin.py in imread(fname, dtype, **kwargs) 27 if 'img_num' in kwargs: 28 kwargs['key'] = kwargs.pop('img_num') ---> 29 with open(fname, 'rb') as f: 30 tif = TiffFile(f) 31 return tif.asarray(**kwargs) FileNotFoundError: [Errno 2] No such file or directory: 'train_subset/1_1.tif'
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
def image_augmentation(img, save_dir):
datagen = ImageDataGenerator(
rotation_range=180,
horizontal_flip=True,
fill_mode='nearest')
img = np.expand_dims(img, 0)
img = np.expand_dims(img, -1)
i = 0
for batch in datagen.flow(img, batch_size=1,
save_to_dir=save_dir, save_prefix="mask", save_format='jpeg'):
i += 1
if i > 10:
break
Using TensorFlow backend.
def images_split(paired_images, full=False, train=True):
fcn_img = "data_fcn"
fcn_mask = "data_fcn"
simple_cnn_img = "data_simple_cnn"
simple_cnn_mask = "data_simple_cnn"
if full:
fcn_img += "_full"
fcn_mask += "_full"
simple_cnn_img += "_full"
simple_cnn_mask += "_full"
if train:
fcn_img += "/train/images/images/"
fcn_mask += "/train/masks/masks/"
simple_cnn_img += "/train/no_mask/"
simple_cnn_mask += "/train/mask/"
else:
fcn_img += "/validation/images/images/"
fcn_mask += "/validation/masks/masks/"
simple_cnn_img += "/validation/no_mask/"
simple_cnn_mask += "/validation/mask/"
count_no_mask = 1
count_mask = 1
count_fcn = 1
for raw_img, raw_mask in paired_images:
img = io.imread(raw_img)
mask = io.imread(raw_mask) / 255
for i in range(6):
for j in range(5):
small_img = img[i*70:(i+1)*70, j*116:(j+1)*116]
small_mask = mask[i*70:(i+1)*70, j*116:(j+1)*116]
io.imsave(fcn_img + str(count_fcn) + ".jpg", small_img / 255)
io.imsave(fcn_mask + str(count_fcn) + "_mask.jpg", small_mask)
count_fcn += 1
if np.sum(mask[i*70:(i+1)*70, j*116:(j+1)*116]) >= 400:
io.imsave(simple_cnn_mask + str(count_mask) + "_mask.jpg", small_img)
count_mask += 1
else:
io.imsave(simple_cnn_img + str(count_no_mask) + ".jpg", small_img)
count_no_mask += 1
print("Finished splitting and saving images and segmentations")
images_split(paired_images_subset[:480], full=False, train=True)
images_split(paired_images_subset[480:], full=False, train=False)
imgs_no_mask = [img for img in glob.glob("data_simple_cnn/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn/train/mask/*")]
print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))
images with no mask: 13982 images with mask: 10275 mask to no mask ratio 0.7348734086682878
# 5635
images_split(paired_images_full[:5000], full=True, train=True)
images_split(paired_images_full[5000:], full=True, train=False)
imgs_no_mask = [img for img in glob.glob("data_simple_cnn_full/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn_full/train/mask/*")]
print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))
images with no mask: 323227 images with mask: 350497 mask to no mask ratio 1.0843679519347085
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
def image_augmentation(img, save_dir, save_prefix):
datagen = ImageDataGenerator(
rotation_range=180,
horizontal_flip=True,
fill_mode='nearest')
img = np.expand_dims(img, 0)
img = np.expand_dims(img, -1)
i = 0
for batch in datagen.flow(img, batch_size=1,
save_to_dir=save_dir, save_prefix=save_prefix, save_format='jpg'):
i += 1
if i > 5:
break
def fimg_to_fmask(img_path):
# convert an image file path into a corresponding mask file path
dirname, basename = os.path.split(img_path)
maskname = basename.replace(".tif", "_mask.tif")
return os.path.join(dirname, maskname)
origin_images_full = [img for img in glob.glob("train/*.tif") if 'mask' not in img]
paired_images_full = [(img, fimg_to_fmask(img)) for img in origin_images_full]
print("number of image segmentation pairs: ", len(paired_images_full))
no_masked = "data_simple_cnn/test/no_mask/"
masked = "data_simple_cnn/test/mask/"
count = 1
for raw_img, raw_mask in paired_images_full:
img = io.imread(raw_img)
mask = io.imread(raw_mask)
if np.sum(mask[:,:]) > 0:
io.imsave(masked + str(count) + "_mask.jpg", img)
else:
io.imsave(no_masked + str(count) + ".jpg", img)
count += 1
number of image segmentation pairs: 5635
/home/charlioxumykj/miniconda3/envs/dlnd/lib/python3.6/site-packages/skimage/external/tifffile/tifffile.py:2611: RuntimeWarning: py_decodelzw encountered unexpected end of stream strip = decompress(strip)
import random
for count in range((len(imgs_no_mask) - len(imgs_mask))//5):
n = len(imgs_mask)
i = random.randint(1, n-1)
small_img = io.imread(imgs_mask[i])
image_augmentation(small_img, "data_simple_cnn_full/train/mask/", "aug_"+str(count))
imgs_no_mask = [img for img in glob.glob("data_simple_cnn_full/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn_full/train/mask/*")]
print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))
images with no mask: 323227 images with mask: 350497 mask to no mask ratio 1.0843679519347085
import random
for count in range(300000//5):
i = random.randint(1, len(imgs_mask)-1)
small_img = io.imread(imgs_mask[i])
image_augmentation(small_img, "data_simple_cnn_full/train/mask/", "even_more_aug_"+str(count))
j = random.randint(1, len(imgs_no_mask)-1)
small_img = io.imread(imgs_no_mask[j])
image_augmentation(small_img, "data_simple_cnn_full/train/no_mask/", "even_more_aug_"+str(count))
imgs_no_mask = [img for img in glob.glob("data_simple_cnn_full/train/no_mask/*")]
imgs_mask = [img for img in glob.glob("data_simple_cnn_full/train/mask/*")]
print("images with no mask: ", len(imgs_no_mask))
print("images with mask: ", len(imgs_mask))
print("mask to no mask ratio", float(len(imgs_mask))/len(imgs_no_mask))
images with no mask: 683136 images with mask: 710408 mask to no mask ratio 1.0399217725313847
imgs_no_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/no_mask/*")]
imgs_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/mask/*")]
print("images with no mask: ", len(imgs_no_mask_val))
print("images with mask: ", len(imgs_mask_val))
print("mask to no mask ratio", float(len(imgs_mask_val))/len(imgs_no_mask_val))
images with no mask: 43955 images with mask: 47440 mask to no mask ratio 1.0792856330337846
for count in range((len(imgs_no_mask_val) - len(imgs_mask_val))//5):
n = len(imgs_mask_val)
i = random.randint(1, n-1)
small_img = io.imread(imgs_mask_val[i])
image_augmentation(small_img, "data_simple_cnn_full/validation/mask/", "aug_"+str(count))
imgs_no_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/no_mask/*")]
imgs_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/mask/*")]
print("images with no mask: ", len(imgs_no_mask_val))
print("images with mask: ", len(imgs_mask_val))
print("mask to no mask ratio", float(len(imgs_mask_val))/len(imgs_no_mask_val))
images with no mask: 43955 images with mask: 47440 mask to no mask ratio 1.0792856330337846
for count in range(60000//5):
i = random.randint(1, len(imgs_mask_val)-1)
small_img = io.imread(imgs_mask_val[i])
image_augmentation(small_img, "data_simple_cnn_full/validation/mask/", "more_aug_"+str(count))
j = random.randint(1, len(imgs_no_mask_val)-1)
small_img = io.imread(imgs_no_mask_val[j])
image_augmentation(small_img, "data_simple_cnn_full/validation/no_mask/", "more_aug_"+str(count))
imgs_no_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/no_mask/*")]
imgs_mask_val = [img for img in glob.glob("data_simple_cnn_full/validation/mask/*")]
print("images with no mask: ", len(imgs_no_mask_val))
print("images with mask: ", len(imgs_mask_val))
print("mask to no mask ratio", float(len(imgs_mask_val))/len(imgs_no_mask_val))
images with no mask: 115932 images with mask: 119411 mask to no mask ratio 1.030008970775972
def defected(mask):
return np.sum(mask[:,:]) >= 400
train_masks = [ma for ma in glob.glob("data_fcn_full/train/masks/masks/*")]
defected_masks = [defected(io.imread(ma)) for ma in train_masks]
print("No. of train masks: ", len(train_masks))
print("No. of defected masks: ", sum(defected_masks))
print("defected percentage: ", float(sum(defected_masks))/len(train_masks))
def image_augmentation(img, save_dir, save_prefix, seed):
datagen = ImageDataGenerator(rotation_range=180, horizontal_flip=True, fill_mode='nearest')
img = np.expand_dims(img, 0)
img = np.expand_dims(img, -1)
i = 0
for batch in datagen.flow(img, batch_size=1, save_to_dir=save_dir,
save_prefix=save_prefix, save_format='jpg', seed=seed):
i += 1
if i > 5:
break
def fimg_to_fmask(img_path, mask_folder):
dirname, basename = os.path.split(img_path)
maskname = basename.replace(".jpg", "_mask.jpg")
return os.path.join(mask_folder, maskname)
img_folder = "data_fcn_full/train/images/images/"
mask_folder = "data_fcn_full/train/masks/masks/"
fcn_train_imgs = [img for img in glob.glob(img_folder+"*")]
fcn_train_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_train_imgs]
n = len(fcn_train_pairs)
print(n)
307371
import random
count = 1
for _ in range(n):
i = random.randint(1, n-1)
seed = random.randint(1, 10000001)
small_mask = io.imread(fcn_train_pairs[i][1])
if defected(small_mask):
image_augmentation(small_mask, mask_folder, "mask_aug_defect_"+str(count), seed)
small_img = io.imread(fcn_train_pairs[i][0])
image_augmentation(small_img, img_folder, "aug_defect_"+str(count), seed)
count += 1
if count > 1000:
break
train_masks = [ma for ma in glob.glob("data_fcn_full/train/masks/masks/*")]
defected_masks = [defected(io.imread(ma)) for ma in train_masks]
print("No. of train masks: ", len(train_masks))
print("No. of defected masks: ", sum(defected_masks))
print("defected percentage: ", float(sum(defected_masks))/len(train_masks))
img_folder = "data_fcn_full/validation/images/images/"
mask_folder = "data_fcn_full/validation/masks/masks/"
def fimg_to_fmask(img_path, mask_folder):
dirname, basename = os.path.split(img_path)
maskname = basename.replace(".jpg", "_mask.jpg")
return os.path.join(mask_folder, maskname)
fcn_val_imgs = [img for img in glob.glob(img_folder+"*")]
fcn_val_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_val_imgs]
n = len(fcn_val_pairs)
print(n)
19050
import random
count = 1
def defected(mask):
return np.sum(mask[:,:]) >= 400
def image_augmentation(img, save_dir, save_prefix, seed):
datagen = ImageDataGenerator(rotation_range=180, horizontal_flip=True, fill_mode='nearest')
img = np.expand_dims(img, 0)
img = np.expand_dims(img, -1)
i = 0
for batch in datagen.flow(img, batch_size=1, save_to_dir=save_dir,
save_prefix=save_prefix, save_format='jpg', seed=seed):
i += 1
if i > 5:
break
for _ in range(n):
i = random.randint(1, n-1)
seed = random.randint(1, 10000001)
small_mask = io.imread(fcn_val_pairs[i][1])
if defected(small_mask):
image_augmentation(small_mask, mask_folder, "mask_aug_defect_"+str(count), seed)
small_img = io.imread(fcn_val_pairs[i][0])
image_augmentation(small_img, img_folder, "aug_defect_"+str(count), seed)
count += 1
if count > 2000:
break
val_masks = [ma for ma in glob.glob("data_fcn_full/validation/masks/masks/*")]
defected_masks = [defected(io.imread(ma)) for ma in val_masks]
print("No. of validation masks: ", len(val_masks))
print("No. of defected masks: ", sum(defected_masks))
print("defected percentage: ", float(sum(defected_masks))/len(val_masks))
No. of validation masks: 32264 No. of defected masks: 13025 defected percentage: 0.40370071906769156
img_folder = "data_fcn_full/train/images/images/"
mask_folder = "data_fcn_full/train/masks/masks/"
def fimg_to_fmask(img_path, mask_folder):
dirname, basename = os.path.split(img_path)
if basename[0].isalpha():
maskname = "mask_"+basename
else:
maskname = basename.replace(".jpg", "_mask.jpg")
return os.path.join(mask_folder, maskname)
fcn_train_imgs = [img for img in glob.glob(img_folder+"*")]
fcn_train_masks = [mask for mask in glob.glob(mask_folder+"*")]
fcn_train_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_train_imgs]
n = len(fcn_train_pairs)
print(n)
print(fcn_train_pairs[3458])
print(len(fcn_train_imgs))
print(len(fcn_train_masks))
307377 ('data_fcn_full/train/images/images/aug_defect_2026_0_2482.jpg', 'data_fcn_full/train/masks/masks/mask_aug_defect_2026_0_2482.jpg') 307377 307377
import random
count = 1
for _ in range(n):
i = random.randint(1, n-1)
seed = random.randint(1, 10000001)
small_mask = io.imread(fcn_train_pairs[i][1])
image_augmentation(small_mask, mask_folder, "mask_aug_defect_"+str(count), seed)
small_img = io.imread(fcn_train_pairs[i][0])
image_augmentation(small_img, img_folder, "aug_defect_"+str(count), seed)
count += 1
if count > 100000:
break
train_masks = [ma for ma in glob.glob("data_fcn_full/train/masks/masks/*")]
defected_masks = [defected(io.imread(ma)) for ma in train_masks]
print("No. of train masks: ", len(train_masks))
print("No. of defected masks: ", sum(defected_masks))
print("defected percentage: ", float(sum(defected_masks))/len(train_masks))
No. of train masks: 1506512 No. of defected masks: 709056 defected percentage: 0.47066070499272494
img_folder = "data_fcn_full/validation/images/images/"
mask_folder = "data_fcn_full/validation/masks/masks/"
fcn_val_imgs = [img for img in glob.glob(img_folder+"*")]
fcn_val_pairs = [(img, fimg_to_fmask(img, mask_folder)) for img in fcn_val_imgs]
n = len(fcn_val_pairs)
print(n)
32264
import random
count = 1
for _ in range(n):
i = random.randint(1, n-1)
seed = random.randint(1, 10000001)
small_mask = io.imread(fcn_val_pairs[i][1])
image_augmentation(small_mask, mask_folder, "mask_aug_defect_"+str(count), seed)
small_img = io.imread(fcn_val_pairs[i][0])
image_augmentation(small_img, img_folder, "aug_defect_"+str(count), seed)
count += 1
if count > 10000:
break
val_masks = [ma for ma in glob.glob("data_fcn_full/validation/masks/masks/*")]
defected_masks = [defected(io.imread(ma)) for ma in val_masks]
print("No. of validation masks: ", len(val_masks))
print("No. of defected masks: ", sum(defected_masks))
print("defected percentage: ", float(sum(defected_masks))/len(val_masks))
No. of validation masks: 152184 No. of defected masks: 58921 defected percentage: 0.3871694790516743