For the following exercices, you need Python 3 with some basic librairies (see below). All images necessary for the session are available here.
If you use your own Python 3 install, you should download the images, put them in a convenient directory and update the path in the next cell.
For some parts of the session (cells with commands written as to do
), you are supposed to code by yourself.
path = './'
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import scipy.signal as scs
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
Load and display a color image. A color image is made of three channels : red, green and blue. A color image in $\mathbb{R}^{N\times M}$ is stored as a $N\times M\times 3$ matrix.
Be careful with the functions plt.imread()
and plt.imshow()
of matplotlib
.
plt.imread()
reads png images as numpy arrays of floating points between 0 and 1, but it reads jpg or bmp images as numpy arrays of 8 bit integers.
In this practical session, we assume floating point images between 0 and 1, so if you use jpg or bmp images, you should normalize them to $[0,1]$.
If 'im' is an image encoded as a double numpy array, plt.imshow(im)
will display all values above 1 in white and all values below 0 in black. If the image 'im' is encoded on 8 bits though, plt.imshow(im)
will display 0 in black and 255 in white.
imrgb = plt.imread(path+"parrot.png")
# extract the three (red,green,blue) channels from imrgb
def RGB_channels(imrgb):
imred = imrgb[:,:,0]
imgreen = imrgb[:,:,1]
imblue = imrgb[:,:,2]
return imred, imgreen, imblue
imred, imgreen, imblue = RGB_channels(imrgb)
#image size
[nrow,ncol,nch]=imrgb.shape
#we display the images
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
axes[0, 0].imshow(imrgb)
axes[0,0].set_title('Original image')
axes[0, 1].imshow(imred, cmap="gray", vmin=0, vmax=1)
axes[0,1].set_title('red channel')
axes[1, 0].imshow(imgreen, cmap="gray", vmin=0, vmax=1)
axes[1,0].set_title('green channel')
axes[1, 1].imshow(imblue, cmap="gray", vmin=0, vmax=1)
axes[1,1].set_title('blue channel')
fig.tight_layout()
It might be useful to convert the color image to gray level. This can be done by averaging the three channels, or by computing another well chosen linear combination of the coordinates R, G and B.
imgray = np.sum(imrgb,2)/3
plt.figure(figsize=(5, 5))
plt.imshow(imgray,cmap='gray', vmin=0, vmax=1)
plt.show()
Color opponent spaces are characterized by a channel representing an achromatic signal, as well as two channels encoding color opponency. The two chromatic channels generally represent an approximate red-green opponency and yellow- blue opponency. $$ O_1 = \frac 1 {\sqrt{2}} (R-G),\; O_2 = \frac 1 {\sqrt{6}} (R+G-2B),\; O_3 = \frac 1 {\sqrt{3}} (R+G+B)$$
def Opponent_spaces(imrgb):
imred, imgreen, imblue = RGB_channels(imrgb)
O1 = (imred-imgreen)/np.sqrt(2)
O2 = (imred+imgreen-2*imblue)/np.sqrt(6)
O3 = (imred+imgreen+imblue)/np.sqrt(3)
return O1, O2, O3
O1, O2, O3 = Opponent_spaces(imrgb)
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10))
axes[0].imshow(O1, cmap='gray')
axes[0].set_title('O1')
axes[1].imshow(O2, cmap='gray')
axes[1].set_title('O2')
axes[2].imshow(O3, cmap='gray')
axes[2].set_title('O3')
fig.tight_layout()
These colorspaces are obtained by a non-linear transformation of the RGB coordinates into polar coordinates. The luminance (or value V) corresponds to the vertical axis of the cylinder; the hue corresponds to the angular coordinate and the saturation to the distance from the axis. See https://en.wikipedia.org/wiki/HSL_and_HSV for more details.
The conversion from RGB to HSI boils down to $$ H=atan\left(\frac{O_1}{O_2}\right),\;S=\sqrt{O_1^2+O_2^2},\; I=O_3$$
def HSI_space(imrgb):
O1, O2, O3 = Opponent_spaces(imrgb)
H = np.arctan(O1/(O2+0.001))
S = np.sqrt(O1**2+O2**2)
I = O3
return H, S, I
H, S, I = HSI_space(imrgb)
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10))
axes[0].imshow(H, cmap='gray')
axes[0].set_title('H')
axes[1].imshow(S, cmap='gray')
axes[1].set_title('S')
axes[2].imshow(I, cmap='gray')
axes[2].set_title('I')
fig.tight_layout()
parrot
and parrotcompressed
.imrgb_compressed = plt.imread(path+"parrotcompressed.png")
H, S, I = HSI_space(imrgb_compressed)
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10))
axes[0].imshow(H, cmap='gray')
axes[0].set_title('H')
axes[1].imshow(S, cmap='gray')
axes[1].set_title('S')
axes[2].imshow(I, cmap='gray')
axes[2].set_title('I')
fig.tight_layout()
The conversion from RGB to HSV is a little bit more complex, you can use the function colors.rgb_to_hsv
of matplotlib
.
imhsv = matplotlib.colors.rgb_to_hsv(imrgb)
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 10))
axes[0].imshow(imhsv[:,:,0], cmap='gray')
axes[0].set_title('H')
axes[1].imshow(imhsv[:,:,1], cmap='gray')
axes[1].set_title('S')
axes[2].imshow(imhsv[:,:,2], cmap='gray')
axes[2].set_title('V')
fig.tight_layout()
def saturation_reduction(imrgb, alpha):
imhsv = matplotlib.colors.rgb_to_hsv(imrgb)
imhsv[:,:,1] *= alpha
return matplotlib.colors.hsv_to_rgb(imhsv)
alpha = 0.1
imrgb_redsat = saturation_reduction(imrgb, alpha)
plt.figure(figsize=(8,8))
plt.imshow(imrgb_redsat)
plt.show()
def rotation_hue(imrgb, delta_h):
imhsv = matplotlib.colors.rgb_to_hsv(imrgb)
imhsv[:,:,0] += delta_h
mask = imhsv[:,:,0]>1
imhsv[mask,0] -= 1
return matplotlib.colors.hsv_to_rgb(imhsv)
delta_h = 0.05
imrgb_rothue = rotation_hue(imrgb, delta_h)
plt.figure(figsize=(8,8))
plt.imshow(imrgb_rothue)
plt.show()
def gamma_correction(imrgb, gamma):
imhsv = matplotlib.colors.rgb_to_hsv(imrgb)
imhsv[:,:,2] = imhsv[:,:,2]**gamma
return matplotlib.colors.hsv_to_rgb(imhsv)
gamma = 0.25
imrgb_gcor = gamma_correction(imrgb, gamma)
plt.figure(figsize=(8,8))
plt.imshow(imrgb_gcor)
plt.show()
Simulation of the set of monochromatic colors in RGB. The cone responses are approximated by Gaussian distributions.
x=np.linspace(0,1000,1000)
r=np.exp(-(x-440)**2/(2*40**2))
g=np.exp(-(x-540)**2/(2*40**2))
b=np.exp(-(x-570)**2/(2*40**2))
fig = plt.figure(figsize=(15, 5))
axis = fig.add_subplot(1, 2, 1)
axis.plot(r,'r')
axis.plot(g,'g')
axis.plot(b,'b')
axis = fig.add_subplot(1, 2, 2, projection="3d")
axis.scatter(r,g,b)
plt.show()
# to do...
We will now display the color distribution of an RGB image as a 3D point cloud. If the image is large, the point cloud will be too dense for vizualization. A solution is to subsample randomly this point cloud for vizualization.
imrgb = plt.imread(path+"simpson512.png")
fig = plt.figure(figsize=(15, 7))
axis = fig.add_subplot(1, 2, 1)
axis.imshow(imrgb)
[nrow,ncol,nch] = imrgb.shape
X = imrgb.reshape((nrow*ncol,3))
nb = 3000
idx = np.random.randint(X.shape[0], size=(nb,))
Xs = X[idx, :]
axis = fig.add_subplot(1, 2, 2, projection="3d")
axis.scatter(Xs[:, 0], Xs[:,1],Xs[:, 2], c=Xs,s=40)
axis.set_xlabel("Red")
axis.set_ylabel("Green")
axis.set_zlabel("Blue")
plt.show()
Another solution is to perform clustering on the point cloud before displaying it, for instance with the Kmeans algorithm (which boils down to Lloyd-Max quantization for gray-level images). The 'scatter' command of matplotlib permits to display the point cloud with colors based on the weights of the points. We will need the scikitlearn package for 'kmeans'.
def color_quantization_with_kmeans(imrgb,k):
[nrow,ncol,nch]=imrgb.shape
X = imrgb.reshape((nrow*ncol,3))
x_pred = KMeans(n_clusters=k).fit_predict(X)
mu = np.zeros((k,3))
pi = np.zeros((k))
for i in range(k):
Xi = X[x_pred==i]
mu[i,:] = np.mean(Xi,0)
pi[i] = Xi.shape[0]/X.shape[0]
fig = plt.figure(figsize=(15, 7))
axis = fig.add_subplot(1, 2, 1)
axis.imshow(mu[x_pred].reshape((nrow,ncol,3)),cmap='gray')
axis = fig.add_subplot(1, 2, 2, projection="3d")
axis.scatter(mu[:, 0], mu[:,1],mu[:, 2],c=mu,s=5e4*pi)
axis.set_xlabel("Red")
axis.set_ylabel("Green")
axis.set_zlabel("Blue")
plt.show()
k = 30 # number of classes
color_quantization_with_kmeans(imrgb,k)
k = 30
imnoise = imrgb + 5/255*np.random.randn(nrow,ncol,3)
color_quantization_with_kmeans(imnoise,k)
Assume that a light source has a constant spectral density on the range of wavelengths $[a,b]$, i.e $$ f(\lambda) = \frac{1}{b-a}\mathbf{1}_{[a,b]}(\lambda).$$ What is the spectral density of this light source expressed as a function of frequency instead of wavelength ? Recall that frequency and wavelength of a light source are related by $\nu = \frac c \lambda$, with $c$ the light speed. What can be deduced on the possible definition of a white light ?
*Answer:* there is no good definition of a white light. If a random variable $X$ follows a uniform distribution on $[a,b]$, then the random variable $\frac c{X} $ is such that
$\mathbb{P}[\frac c{X}< \nu] = \mathbb{P}[X > c/\nu] = b-c/\nu$, so the probability distribution function of $\frac c{X}$ is the function $\nu \rightarrow c/{\nu}^2\mathbf{1}_{[c/b,c/a]}(\nu)$.
The chromaticities of objects in a scene
are highly dependent on the light sources. The goal of the white
balance algorithms is to estimate the color $e = (e_R,e_G,e_B)$ of the
illuminant and to globally correct the image using this estimate so
that it appears as if taken under a canonical illuminant, by
computing
$$R= \frac{R}{e_R},\; G= \frac{G}{e_G}\; B=\frac{B}{e_B}.$$
In this exercice, we propose to code and test some famous white balance algorithms on the image 'moscou.bmp' below. The white balance of the image is clearly not correct (the walls should be white).
im_moscow = plt.imread(path+'moscou.bmp')/255
plt.figure(figsize=(7, 7))
plt.imshow(im_moscow)
plt.show()
The White-Patch assumption supposes that a surface with perfect reflectance is present in the scene and that this surface correspond to the brighter points in the image. This results in the well-known Max-RGB algorithm, which infers the illuminant by computing separately the maxima $(R_{max},G_{max},B_{max})$ of the three RGB channels : $$e_R= R_{max},\; e_G= G_{max}\; e_B=B_{max}.$$
In practice, relying only on the brightest pixel in the image might not be a good idea. For instance on the image above, this brightest pixel has the value (1,1,1) although the white balance of the image is clearly not correct.
A more robust solution consists in choosing the mean value of a small percentage of the brightest pixels for $e_R$, $e_G$ and $e_B$.
p = 0.1 # percentage of brightest pixels used for the white patch
e = np.zeros((3,))
for k in range(3):
imhisto, bins = np.histogram(im_moscow[:,:,k], range=(0,1), bins = 256)
imhisto = imhisto/(im_moscow.shape[0]*im_moscow.shape[1])
imhistocum = np.cumsum(imhisto)
i = np.linspace(0,255,256)
e[k] = np.sum(imhisto[imhistocum>1-p]*i[imhistocum>1-p])/np.sum(imhisto[imhistocum>1-p])/255
#e = e/np.mean(e) # ensure that the image mean gray level won't change after multiplication by e
print(e)
new = np.zeros(im_moscow.shape)
new[:,:,0] = im_moscow[:,:,0]/e[0]
new[:,:,1] = im_moscow[:,:,1]/e[1]
new[:,:,2] = im_moscow[:,:,2]/e[2]
fig, axe = plt.subplots(1,2,figsize=(15,15))
axe[0].imshow(im_moscow)
axe[0].set_title('Original')
axe[1].imshow(new)
axe[1].set_title('White-Patch correction')
fig.tight_layout()
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
[0.91496901 0.79255944 0.53712305]
A very popular way to estimate illuminant chromaticities in images is to assume that under a canonic light, the average RGB value observed in a scene is grey. This assumption gives rise to the Grey-World algorithm, which consists in computing the average color in the image and compensating for the deviation due to the illuminant, i.e. $$e_R = \frac2{|\Omega|}\int R(x)dx, \qquad e_G = \frac2{|\Omega|}\int G(x) dx, \qquad e_B = \frac2{|\Omega|}\int B(x) dx.$$
def gray_world_white_balance(im):
# to do...
gray_world_white_balance(im_moscow)
The Gray-World algorithm was generalized into the Shades-of-Gray algorithm by adding the Minkowski norm p:
$$e_R = 2\left(\frac1{|\Omega|}\int R(x)^pdx\right)^{1/p},\qquad e_G = 2\left(\frac1{|\Omega|}\int G(x)^pdx\right)^{1/p}, \qquad e_B = 2\left(\frac1{|\Omega|}\int B(x)^pdx\right)^{1/p}.$$def shades_of_gray_white_balance(im,p):
# to do...
p=2
shades_of_gray_white_balance(im_moscow,p)
The previous algorithm is extended by smoothing locally the three channels by a Gaussian filter of standard deviation $\sigma$.
The following exercice can be done with the images parrot
or lighthouse
.
We will first create a RAW image from the RGB one: at each pixel, only one channel is kept
(red, green or blue), following the Bayer CFA. Remind that the bayer
CFA has the following structure :
| R | G | R | G |
|---+---+---+---|
| G | B | G | B |
|---+---+---+---|
| R | G | R | G |
|---+---+---+---|
| G | B | G | B |
parrot
or lighthouse
image and create a RAW images using the previous Bayer filter. Display the resulting image.imrgb = plt.imread(path+"parrot.png")
# extract the three (red,green,blue) channels from imrgb
imred = imrgb[:,:,0]
imgreen = imrgb[:,:,1]
imblue = imrgb[:,:,2]
#image size
[nrow,ncol,nch] = imrgb.shape
raw = np.zeros(imrgb.shape, imrgb.dtype)
raw[0:nrow:2, 0:ncol:2, 1] = imgreen[0:nrow:2, 0:ncol:2]
raw[1:nrow:2, 1:ncol:2, 1] = imgreen[1:nrow:2, 1:ncol:2]
raw[0:nrow:2, 1:ncol:2, 0] = imred[0:nrow:2, 1:ncol:2]
raw[1:nrow:2, 0:ncol:2, 2] = imblue[1:nrow:2, 0:ncol:2]
plt.figure(figsize=(15, 15))
plt.imshow(raw)
plt.show()
Now, in order to retrieve the original image, we can try to interpolate each channel, for instance by bilinear interpolation.
def interpolate_3_channels(raw):
R = raw[:, :, 0]
G = raw[:, :, 1]
B = raw[:, :, 2]
lap = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])/4
lap2 = np.array([[1, 0, 1], [0, 0, 0], [1, 0, 1]])/4
G = G + scs.convolve2d(G, lap, mode='same') # Interpolation of the green channel at the missing points
B = B + scs.convolve2d(B, lap2, mode='same')
B = B + scs.convolve2d(B, lap, mode='same') # Interpolation of the blue channel at the missing points
R = R + scs.convolve2d(R, lap2, mode='same')
R = R + scs.convolve2d(R, lap, mode='same') # Interpolation of the red channel at the missing points
output = np.zeros(raw.shape, raw.dtype)
output[:, :, 0] = R
output[:, :, 1] = G
output[:, :, 2] = B
return output
output = interpolate_3_channels(raw)
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 15))
axes[0].set_title('After linear demosaicing')
axes[0].imshow(output)
axes[1].set_title('Zoom After linear demosaicing')
axes[1].imshow(output[300:350,250:300])
axes[2].set_title('Zoom original')
axes[2].imshow(imrgb[300:350,250:300])
plt.show()
Zoom on the metal grid and observe the mosaic artifacts. How can you explain them ?
In order to understand these artifacts, create a white image with a black square in the first top quarter and apply the same process (RAW image creation and demosaicing by bilinear interpolation of each channel). Comment the result.
nrow = 50
ncol = 50
square = np.ones((nrow,ncol,3))*255
square[0:round(nrow/2), 0:round(ncol/2), :] = 0
raw = np.zeros(square.shape, square.dtype)
raw[0:nrow:2, 0:ncol:2, 1] = square[0:nrow:2, 0:ncol:2, 1]
raw[1:nrow:2, 1:ncol:2, 1] = square[1:nrow:2, 1:ncol:2, 1]
raw[0:nrow:2, 1:ncol:2, 0] = square[0:nrow:2, 1:ncol:2, 0]
raw[1:nrow:2, 0:ncol:2, 2] = square[1:nrow:2, 0:ncol:2, 2]
plt.figure(figsize=(6, 6))
fig, axe = plt.subplots(1,2,figsize=(15,15))
axe[0].imshow(square/255)
axe[0].set_title('Square')
axe[1].imshow(raw/255)
axe[1].set_title('Raw')
plt.show()
<Figure size 432x432 with 0 Axes>
output = interpolate_3_channels(raw)
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 15))
axes[0].set_title('After linear demosaicing')
axes[0].imshow(output/255)
axes[1].set_title('Zoom centre')
axes[1].imshow(output[19:29,19:29,:]/255)
axes[2].set_title('Zoom bord')
axes[2].imshow(output[40:,20:30,:]/255)
plt.show()