from skimage.color import rgb2gray
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.filters import threshold_sauvola
import numpy as np
from skimage.exposure import is_low_contrast
from skimage.exposure import adjust_gamma
from skimage import exposure
from skimage.color import rgb2hsv
from tensorflow import keras
from skimage import exposure
from skimage.restoration import (denoise_tv_chambolle, denoise_bilateral,
denoise_wavelet, estimate_sigma)
cimage = imread('../sample_images/image2.jpg')
cimage = exposure.adjust_gamma(cimage, 1)
reconstructed_model = keras.models.load_model('keras_alphanumeric_weights.h5')
img = rgb2gray(cimage)
Estimated Gaussian noise standard deviation = 0.0007267657933850974
plt.figure(figsize=(20,20))
plt.imshow(img, cmap="gray")
<matplotlib.image.AxesImage at 0x7f56e0755cf8>
from skimage.measure import label, regionprops
from scipy.ndimage.morphology import distance_transform_edt
import time
import pytesseract
from skimage.transform import rescale, resize
from skimage.util import invert , pad
from skimage.io import imsave
from skimage.morphology import binary_closing
final_thresholded_image = np.zeros(img.shape)
st_time = int(round(time.time() * 1000))
multilayer_prediction_map = {}
multilayer_prediction_map_recorded = []
def predict_alphanumer(img):
#bin_char = invert(resize(np.pad(img, 5, pad_with), (32,32)) > 0.1)
predicted = reconstructed_model.predict(rescale_for_tess(img).reshape(1,32,32,1))
# predicted = pytesseract.image_to_string(rescale_for_tess(img), config='--psm 10')
# print(np.max(predicted))
max_prob = np.max(predicted)
if max_prob < 0.2:
return ''
predicted = all_characters[np.argmax(predicted)]
if predicted.isalnum():
return predicted
return ''
def rescale_for_tess(character_img):
character_img = binary_closing(character_img)
resized = rescale(character_img,2,multichannel=False)
r, c = resized.shape
rescaled = rescale(resized, np.minimum(20 / r, 20 / c), multichannel=False) > 0.1
# fit it inside a 32 x 32 box, centered
r, c = rescaled.shape
r_dist = int((32 - r) / 2)
c_dist = int((32 - c) / 2)
digit = np.ones((32, 32), dtype=np.int8)
digit[r_dist:r_dist + r, c_dist:c_dist + c] = invert(rescaled)
return digit
def pad_with(vector, pad_width, iaxis, kwargs):
pad_value = kwargs.get('padder', 0)
vector[:pad_width[0]] = pad_value
vector[-pad_width[1]:] = pad_value
def round5(x):
return int(x / 10) * 10 + 10 if (x % 10) >= 5 else int(x / 10) * 10
def is_not_recorded(region):
centroid = list(region.centroid)
centroid[0] = round5(centroid[0])
centroid[1] = round5(centroid[1])
centroid_id = str(centroid[0])+"_"+str(centroid[1])
if centroid_id not in multilayer_prediction_map_recorded:
return True
return False
def remove_non_text_like_regions(image, original_gray_img):
ret_img = np.copy(image)
label_image = label(image)
img_height, img_weight = image.shape
for region in regionprops(label_image):
if is_not_recorded(region):
minr, minc, maxr, maxc = region.bbox
height = maxr - minr
width = maxc - minc
aspect_ratio = width/height
should_clean = region.area < 15 * (img_height * img_weight / (600**2))
should_clean = should_clean or (region.area > (img_height*img_weight/5))
should_clean = should_clean or aspect_ratio < 0.06 or aspect_ratio > 3
should_clean = should_clean or region.eccentricity > 0.995
should_clean = should_clean or region.solidity < 0.3
should_clean = should_clean or region.extent < 0.2 or region.extent > 0.9
strokeWidthValues = distance_transform_edt(region.image)
flat = strokeWidthValues.flatten()
flat = flat[flat > 0]
coefficient_of_variation = np.std(flat)/np.mean(flat)
should_clean = should_clean or coefficient_of_variation > 0.6
if should_clean:
for coord in region.coords:
ret_img[coord[0],coord[1]] = 0
else:
#imsave("/tmp/"+str(time.time())+".jpg", original_gray_img[minr:maxr, minc:maxc])
predicted = predict_alphanumer(region.image)
if len(predicted) > 0:
centroid = list(region.centroid)
centroid[0] = round5(centroid[0])
centroid[1] = round5(centroid[1])
centroid_id = str(centroid[0])+"_"+str(centroid[1])
if centroid_id not in multilayer_prediction_map:
multilayer_prediction_map[centroid_id] = []
multilayer_prediction_map[centroid_id].append(predicted)
if multilayer_prediction_map[centroid_id].count(predicted) == 2:
final_thresholded_image[minr:maxr, minc:maxc] = region.image
multilayer_prediction_map_recorded.append(centroid_id)
return ret_img
# thresholding the image under different thresholds
binary_images = []
threshold_values = np.arange(0.1,0.7, 0.1)
for threshold in threshold_values:
binary_images.append(remove_non_text_like_regions(img < threshold, img))
ed_time = int(round(time.time() * 1000))
print("total time taken:", (ed_time - st_time))
total time taken: 4704
multilayer_prediction_map
{'110_100': ['S', 's', 'S'], '110_110': ['S', 's', 's'], '110_130': ['l', 'l'], '110_140': ['V', 'v', 'v'], '110_160': ['e', 'e'], '110_190': ['D', 'D'], '110_210': ['a', 'a'], '110_220': ['a', 'a'], '110_230': ['t', 't'], '110_240': ['a', 'w'], '110_250': ['a', 'a'], '110_260': ['s', 's'], '110_280': ['a', 'e', 'e'], '110_290': ['t'], '110_300': ['t', 't'], '110_310': ['S', 'S'], '110_50': ['M', 'M'], '110_60': ['M', 'M'], '110_80': ['C', 'a', 'a'], '140_220': ['R', 'R'], '140_240': ['u'], '150_150': ['h', 'F', 'a', 'a'], '150_160': ['n', 'n'], '150_170': ['c'], '150_180': ['7'], '150_190': ['n', 'n'], '150_200': ['d', 'd'], '150_220': ['H'], '150_230': ['a'], '150_240': ['j', '1', '1'], '150_250': ['a', 'T'], '150_260': ['l', 'T', 'm'], '150_270': ['R', 'z'], '150_280': ['m', 'w'], '150_290': ['7'], '150_300': ['a'], '150_310': ['j', '1', 'n', 'n'], '170_120': ['l', 'l'], '170_130': ['e', 'q', 'Q'], '170_140': ['E', 'f', 'g', 'H'], '170_150': ['T', 'r'], '170_160': ['n', 'e', 'Q', 'q'], '170_170': ['y', 'y'], '170_190': ['1', 'D', 'D'], '170_200': ['j', 'T'], '170_210': ['9', 'a'], '170_220': ['J', 'v', 'v'], '170_230': ['l', 'd', 'l'], '170_240': ['m', 'd', 'a'], '170_250': ['U', 'U'], '170_260': ['1', 't'], '170_270': ['1', 't', 'f'], '170_280': ['m', 'm'], '170_290': ['X'], '170_300': ['4', 'a', 'a'], '170_310': ['l', 'n', 'n'], '180_170': ['y'], '210_260': ['9', 'x'], '230_250': ['X'], '230_90': ['P'], '260_320': ['x'], '270_160': ['l'], '270_170': ['l'], '270_260': ['a'], '270_310': ['s'], '270_80': ['f'], '280_100': ['E'], '280_110': ['t', 'e'], '280_320': ['K'], '290_130': ['h'], '300_300': ['x', 'x'], '300_90': ['p'], '320_130': ['m', 'm'], '320_250': ['f'], '330_130': ['h'], '350_260': ['d'], '380_100': ['h'], '390_80': ['l'], '400_130': ['P'], '410_150': ['l'], '430_190': ['s'], '450_170': ['g'], '450_190': ['s'], '470_200': ['e'], '480_0': ['l'], '480_20': ['C'], '480_200': ['H'], '480_30': ['d'], '500_10': ['s'], '60_200': ['a', 'u', 'h'], '60_230': ['u', 'u'], '70_180': ['M', 'M'], '70_200': ['L', 'l', 'z', 't', 'l'], '70_210': ['1'], '70_220': ['1', 'n', 'n'], '70_230': ['L', 'l', 'z', 'z'], '70_240': ['1'], '70_250': ['1', 'n', 'n'], '70_270': ['q', 'g', 'g'], '70_300': ['O', 'O'], '70_310': ['f', 'f'], '80_270': ['H', 'g', 'g'], '90_130': ['a']}
fig, ax = plt.subplots(figsize=(20,20))
ax.imshow(final_thresholded_image)
pytesseract.image_to_string(final_thresholded_image)
plt.show()
fig, ax = plt.subplots(nrows=len(threshold_values), figsize=(20,120))
for index, bin_img in enumerate(binary_images):
ax[index].imshow(bin_img)
ax[index].set_title(threshold_values[index])
plt.show()
fig, ax = plt.subplots(figsize=(20,120))
index = 4
ax.imshow(binary_images[index])
ax.set_title(threshold_values[index])
plt.show()
from skimage.measure import label, regionprops
from skimage.color import label2rgb
image_index = 4
# label image regions
label_image = label(final_thresholded_image)
# to make the background transparent, pass the value of `bg_label`,
# and leave `bg_color` as `None` and `kind` as `overlay`
image_label_overlay = label2rgb(label_image, image=binary_images[image_index], bg_label=0)
region_images = []
fig, ax = plt.subplots(figsize=(10, 6))
ax.imshow(image_label_overlay)
for region in regionprops(label_image):
region_images.append(region.image)
print(predict_alphanumer(region.image) , region.bbox)
ax.set_axis_off()
plt.tight_layout()
plt.show()
T (7, 577, 159, 772) v (7, 786, 26, 814) o (44, 55, 79, 82) R (44, 127, 78, 152) e (44, 156, 79, 182) P (44, 205, 78, 229) o (44, 230, 79, 257) R (44, 290, 78, 315) 0 (44, 383, 79, 408) T (44, 445, 79, 469) T (44, 474, 79, 498) Y (45, 34, 79, 55) U (45, 86, 79, 111) U (45, 261, 79, 286) l (45, 319, 78, 331) N (45, 333, 78, 358) U (45, 413, 79, 438) A (45, 498, 78, 521) B (47, 624, 81, 650) R (47, 655, 81, 681) o (47, 789, 82, 816) 5 (47, 929, 82, 953) P (47, 958, 81, 982) U (48, 688, 82, 712) H (48, 716, 81, 742) Y (48, 768, 81, 789) U (48, 820, 82, 845) L (48, 860, 82, 880) L (48, 884, 82, 904) l (48, 983, 81, 995) L (48, 996, 82, 1016) L (48, 1021, 81, 1041) l (48, 1066, 71, 1078) T (97, 43, 131, 67) e (97, 98, 131, 124) 6 (97, 147, 131, 172) L (97, 176, 131, 196) A (97, 199, 130, 222) 5 (97, 230, 131, 255) 5 (97, 259, 131, 284) H (98, 68, 130, 94) l (98, 309, 121, 321) o (114, 899, 141, 924) o (198, 695, 225, 721) e (220, 287, 352, 471) o (335, 418, 345, 428) o (368, 329, 378, 339) H (482, 49, 561, 180) v (619, 10, 634, 30) v (619, 41, 637, 81) l (619, 444, 638, 451) v (619, 574, 646, 607) v (619, 634, 645, 676) o (662, 78, 697, 104) B (662, 156, 696, 183) l (662, 212, 696, 224) o (662, 255, 696, 281) A (662, 337, 696, 360) l (662, 416, 686, 428) Y (663, 57, 696, 78) U (663, 109, 696, 134) L (663, 188, 696, 207) N (663, 225, 696, 251) M (663, 303, 696, 334) N (663, 367, 696, 392) 2 (715, 798, 732, 818) d (755, 649, 797, 702) o (770, 66, 805, 101) X (781, 9, 799, 25) o (1151, 651, 1167, 667) o (1152, 675, 1167, 690)
#fig, ax = plt.subplots(nrows=len(region_images), figsize=(100,45))
for index, bin_char in enumerate(region_images):
# ax[index].imshow(bin_char)
# ax[index].axis("off")
# ax[index].set_title(predict_alphanumer(bin_char))
predicted1 = reconstructed_model.predict(rescale_for_tess(bin_char).reshape(1,32,32,1))
predicted2 = pytesseract.image_to_string(rescale_for_tess(bin_char), config='--psm 10')
predicted1 = all_characters[np.argmax(predicted1)]
print(predicted1, predicted2)
T r v a o 0 R z e c P p o 0 R ? 0 0 T 7 T 7 Y ¥ U U U U l 7 N x U U A A B 7 R Rg o G 5 5 P P U U H H Y ¥ U U L c L L l 7 L L L c l Z T 7 e c 6 6 L c A A 5 S 5 5 H H l Z o o oe e ~ o = o = H my v r v > l , v yr v vy o 0 B B l 7 o D A A l , Y ¥ U U L c N NW M N Nn 2 a d = o Ss X \ o eo o
bin_char = invert(resize(np.pad(region_images[31], 5, pad_with), (32,32)) > 0.1)
predicted = pytesseract.image_to_string(bin_char, config='--psm 10')
print("predicted", predicted)
plt.imshow(bin_char)
predicted H
<matplotlib.image.AxesImage at 0x7f5788aedbe0>
character = rescale_for_tess(region_images[34])
print(pytesseract.image_to_string(character, config='--psm 10'))
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
predicted = reconstructed_model.predict(character.reshape(1,32,32,1))
print(all_characters[np.argmax(predicted)], np.max(predicted))
plt.imshow(character)
5 5 0.4660305
<matplotlib.image.AxesImage at 0x7f57885d8a58>