# Copyright 2019 - 2022 United Kingdom Research and Innovation
# Copyright 2019 - 2022 The University of Manchester
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Authored by: Evangelos Papoutsellis (UKRI-STFC)
# Gemma Fardell (UKRI-STFC)
# Laura Murgatroyd (UKRI-STFC)
# Letizia Protopapa (UKRI-STFC)
# Casper da Costa-Luis (UKRI-STFC)
# Margaret Duff (UKRI-STFC)
In this notebook, we present how to denoise and inpaint our first multichannel data using CIL, i.e., a data with only 3 channels that contains information from the Red, Green and Blue bands. We start by loading a colour image from CIL.
# import dataexample that contains different colour images
from cil.utilities import dataexample, noise
# import other libraries
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid
from cil.utilities.display import show2D
from cil.framework import BlockDataContainer
# Load Rainbow image
data = dataexample.RAINBOW.get(size=(500, 500), scale=(0, 1))
data.reorder(['horizontal_y', 'horizontal_x','channel'])
plt.figure(figsize=(10,10))
plt.imshow(data.array)
plt.title("Colour Image")
plt.show()
show2D(data, slice_list=[('channel',0),('channel',1),('channel',2)],
title=["Red","Green","Blue"], origin="upper", num_cols=3);
Let $u:\Omega\subset\mathbb{R}^{N\times M}\rightarrow\mathbb{R}^{N\times M\times3}$ a colour image that depicts a real perfect scene (the unknown). Typically, we assume that $u$ has been transformed through a continuous and linear operation $\mathcal{L}$ (forward operator). Additionally, we have a noisy component $\eta$ that usually follows a certain distribution, e.g., Gaussian , Salt and Pepper (Impluse). The Imaging model is defined as
$$ u_{0} = \mathcal{L}u + \eta\,. $$![]() | ![]() |
where $\mathcal{D}$ is a subdomain of $\Omega$ (inpainting domain). In the inpainting domain there is no data information available and we are trying to reconstruct $u$ based on the information provided from the known region of $u_{0}$.
![]() | ![]() |
In this notebook, we will consider the cases of
We solve the following minimisation problem to denoise our coloured image:
$$ u^{*} = \underset{u}{\operatorname{argmin}} \frac{1}{2}\| b - u \|^{2}_{2} + \alpha\,\mathrm{VTV}(u) $$where the data $b$ is corrupted with Gaussian noise and $\mathrm{\textbf{VTV}}$ is the Vectorial extension of the classical Total variation regularisation for coloured images. We recall that the definition of the (isotropic) Total Variation, used for gray-valued images, is
$$ \mathrm{TV}(u) = \|Du\|_{2,1} = \sum_{i,j}^{M,N}\big(|(D_{y}u, D_{x}u)|_{2}\big)_{i,j} = \sum_{i,j}^{M,N} \big(\sqrt{ (D_{y}u_{k})^{2} + (D_{x}u_{k})^{2}}\big)_{i,j}. $$Now, for vector-valued images the gradient is $Du=(Du_{1}, Du_{2}, Du_{3})$, where for each RGB channels $k=1,2,3$, $Du_{k}:=(D_{y}u_{k}, D_{x}u_{k})$.
For this type of multichannel data, we can create different configurations on how the colour channels, the derivatives and the image pixels are correlated and under which norm. One generic approach for this regulariser is presented in Duran et al, where the Collaborative Total variation is introduced, i.e.,
$$ \|A\|_{p,q,r} := \bigg(\sum_{i=1}^{N}\quad\bigg(\sum_{j=1}^{M}\quad\bigg(\sum_{k=1}^{C} |A_{i,j,k}|^{p}\bigg)^{\frac{q}{p}}\quad\bigg)^{\frac{r}{q}}\quad\bigg)^{\frac{1}{r}}\quad . $$For simplicity, in this notebook, we will use the Channelwise TV definition, namely,
$$ \mathrm{VTV}(u) := \|D u\|_{2,1} = \sum_{k=1}^{3}\sum_{i,j=1}^{M,N} (|Du_{k}|_{2})_{i,j} = \sum_{k=1}^{3}\sum_{i,j=1}^{M,N} \big( \sqrt{ (D_{y}u_{k})^{2} + (D_{x}u_{k})^{2}}\big) = \sum_{k=1}^{3} \mathrm{TV}(u_{k}). $$The above definition corresponds to the $\ell_{2,1,1}$ (derivative, pixels, colour) Collaborative TV. This means that, an $\ell_{2}$ norm is applied for the derivatives, followed by an $\ell_{1}$ norm for the pixels of the image and a final $\ell_{1}$ norm for the three channels.
# Import Total variation
from cil.optimisation.functions import TotalVariation
# Load Rainbow data
data = dataexample.RAINBOW.get(size=(500,500), scale=(0,1))
data.reorder(['horizontal_y', 'horizontal_x','channel'])
noisy_data = noise.gaussian(data, seed=10, var=0.005)
images = [data.as_array(), noisy_data.as_array(),
data.as_array()[:,:,0], noisy_data.as_array()[:,:,0],
data.as_array()[:,:,1], noisy_data.as_array()[:,:,1],
data.as_array()[:,:,2], noisy_data.as_array()[:,:,2]]
#create our custom colour maps for RGB images
from matplotlib.colors import LinearSegmentedColormap
colors = [(0, 0, 0), (1, 0, 0)] # first color is black, last is red
cm_r = LinearSegmentedColormap.from_list("Custom", colors, N=20)
colors = [(0, 0, 0), (0, 1, 0)] # first color is black, last is green
cm_g = LinearSegmentedColormap.from_list("Custom", colors, N=20)
colors = [(0, 0, 0), (0, 0, 1)] # first color is black, last is blue
cm_b = LinearSegmentedColormap.from_list("Custom", colors, N=20)
labels_y = ["Red", "Green","Blue"]
labels_x = ["Ground truth","Noisy data"]
# set fontszie xticks/yticks
plt.rcParams['xtick.labelsize']=15
plt.rcParams['ytick.labelsize']=15
fig = plt.figure(figsize=(20, 20))
grid = AxesGrid(fig, 111,
nrows_ncols=(4, 2),
axes_pad=0.1)
for k, ax in enumerate(grid):
img = ax.imshow(images[k])
if k < 2:
ax.set_title(labels_x[k],fontsize=25)
elif k==2:
ax.set_ylabel(labels_y[0],fontsize=25)
img.set_cmap(cm_r)
elif k==3:
img.set_cmap(cm_r)
elif k==4:
ax.set_ylabel(labels_y[1],fontsize=25)
img.set_cmap(cm_g)
elif k== 5:
img.set_cmap(cm_g)
elif k==6:
ax.set_ylabel(labels_y[2],fontsize=25)
img.set_cmap(cm_b)
elif k==7:
img.set_cmap(cm_b)
plt.show()
We solve the above minimisation problem using the proximal
method of the TotalVariation
class that was used in previous notebooks. Recall, that given a function $f$, the proximal operator of $f$ is
This definition is exactly the same as the above minimisation problem, if we replace $f$ by $\alpha\mathrm{VTV}$, $x$ with $b$ and $\tau=1.0$. Therefore, the proximal operator of VTV at $b$ is
$$\mathrm{prox}_{\tau (\alpha \mathrm{VTV})}(b)\, .$$alpha = 0.05
TV = alpha * TotalVariation(max_iteration=500, backend='numpy')
proxTV = TV.proximal(noisy_data, tau=1.0)
images = [data.as_array(), noisy_data.as_array(), proxTV.as_array(),
data.as_array()[:,:,0], noisy_data.as_array()[:,:,0], proxTV.as_array()[:,:,0],
data.as_array()[:,:,1], noisy_data.as_array()[:,:,1], proxTV.as_array()[:,:,1],
data.as_array()[:,:,2], noisy_data.as_array()[:,:,2], proxTV.as_array()[:,:,2]],
labels_x = ["Ground Truth", "Noisy Data", "TV denoising",
"(Red) Ground Truth", " (Red) Noisy Data", "(Red) TV denoising",
"(Green) Ground Truth", "(Green) Noisy Data", " (Green) TV denoising",
"(Blue) Ground Truth", "(Blue) Noisy Data", "(Blue) TV denoising"]
# set fontsize xticks/yticks
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
fig = plt.figure(figsize=(25, 25))
grid = AxesGrid(fig, 111,
nrows_ncols=(4, 3),
axes_pad=0.5,
cbar_mode=None)
for k, ax in enumerate(grid):
img = ax.imshow(images[0][k])
ax.set_title(labels_x[k],fontsize=25)
if k >= 9:
img.set_cmap(cm_b)
elif k >= 6:
img.set_cmap(cm_g)
elif k >= 3:
img.set_cmap(cm_r)
cbar = ax.cax.colorbar(img)
plt.show()
The triplet $(K, \mathcal{F}, \mathcal{G})$ is defined as:
$K = D \Longleftrightarrow$ K = GradientOperator(noisy_data.geometry)
.
$\mathcal{F}(z) = \alpha\,\|z\|_{2,1}\Longleftrightarrow$ F = alpha * MixedL21Norm()
.
$\mathcal{G}(u) = \frac{1}{2}\|b - u \|^{2}_{2}\, \Longleftrightarrow$ G = 0.5 * L2NormSquared(b=noisy_data)
.
Given an image where a specific region is unknown, the task of image inpainting is to recover the missing region $\mathcal{D}$ from the known part of the image $\Omega$. For this example, we will use the rainbow image, where we are trying to remove a repeated text (+ salt and pepper noise) from the image that represents the unknown domain $\mathcal{D}$.
We use the Pillow library to add text in our image.
Note: If you are on a windows machine it is possible that you do not have access to the font file "DejaVuSerif.tff". If you get an error on line font = ImageFont.truetype('DejaVuSerif.ttf', 50)
then we recommend that you download the files from https://www.fontsquirrel.com/fonts/dejavu-serif, place the "DejaVuSerif.tff" file in the same folder as this notebook and change the line to be font = ImageFont.truetype('./DejaVuSerif.ttf', 50)
.
# Import libraries
import numpy as np
from PIL import Image, ImageFont, ImageDraw
# Numpy array
img_np = data.array
# Add text to the image
img_pil = Image.fromarray(np.uint8(img_np*255)).convert('RGB')
text = "\n This is a double \
\n rainbow \n"*3
draw = ImageDraw.Draw(img_pil)
font = ImageFont.truetype('DejaVuSerif.ttf', 50)
draw.text((0, 0), text, (0, 0, 0), font=font)
# Pillow image to numpy
im1 = np.array(img_pil)
# Rescale numpy array
img_np_rescale = im1/im1.max()
# Get image geometry
ig = data.geometry
# Create ImageData
data_with_text = ig.allocate()
data_with_text.fill(img_np_rescale)
# Show rainbow with text
plt.figure(figsize=(10,10))
plt.imshow(data_with_text.array)
plt.title("Rainbow with text")
plt.show()
# Mask that contains only text information
mask_boolean = (data_with_text-data).abs().as_array()==0
# Show rainbow with text
plt.figure(figsize=(10,10))
plt.imshow(mask_boolean[:,:,0])
plt.title("Mask: (Yellow=True, Blue=False)")
plt.show()
from cil.optimisation.operators import MaskOperator, ChannelwiseOperator
mask = ig.get_slice(channel=0).allocate()
mask.fill(mask_boolean[:,:,0])
MO = ChannelwiseOperator(MaskOperator(mask), 3, dimension='append')
noisy_data = noise.saltnpepper(data_with_text, amount=0.01, seed=10)
noisy_data = MO.direct(noisy_data)
# noisy_data = MO.direct(data)
plt.figure(figsize=(10, 10))
plt.imshow(noisy_data.as_array())
plt.title("Corrupted image: Missing information + Salt and pepper noise")
plt.show()
We will use two different regularisation in order to restore the above corrupted image. We start with the TV regularisation described above and its generalisation, namely the Total Generalised Variation (TGV) introduced in Bredies et al.
TGV is a higher-order regulariser, that is able to obtain piecewise smooth solutions and restore staircasing artifacts that TV promotes. We let $\alpha, \beta>0$ be two regularisation parameters and define
$$ \mathrm{TGV}_{\alpha, \beta}(u) = \min_{w} \alpha \|D u - w \|_{2,1} + \beta\|\mathcal{E}w\|_{2,1}, $$where $\mathcal{E}$ denotes the Symmetrised Gradient operator defined as
$$ \mathcal{E}w = \frac{1}{2}(D w + D w^{T}). $$The minimisation problems, using the $L^{1}$ norm as a data fidelity term which is suitable for salt & pepper noise, are:
$$ u^{*} =\underset{u}{\operatorname{argmin}} \|\mathcal{M}u-b\|_{1} + \alpha\mathrm{VTV}(u) $$and
$$ \begin{aligned} u^{*} =\underset{u}{\operatorname{argmin}} & \|\mathcal{M}u-b\|_{1} + \mathrm{TGV}_{\alpha, \beta}(u) \Leftrightarrow \\ (u^{*},w^{*}) =\underset{u, w}{\operatorname{argmin}} & \|\mathcal{M}u -b\|_{1} + \alpha \|D u - w \|_{2,1} + \beta\|\mathcal{E}w\|_{2,1}, \end{aligned} $$where the $\mathcal{M}$ is a diagonal operator with 1 in the diagonal elements corresponding to pixels in $\Omega\setminus\mathcal{D}$ and 0 in $\mathcal{D}$.
We solve the above problems using the PDHG algorithm described in previous notebooks.
# Import libraries
from cil.optimisation.operators import BlockOperator, SymmetrisedGradientOperator, GradientOperator, ZeroOperator, IdentityOperator
from cil.optimisation.functions import ZeroFunction, L1Norm, MixedL21Norm, BlockFunction
from cil.optimisation.algorithms import PDHG
Grad = GradientOperator(ig, backend='numpy')
K = BlockOperator(Grad, MO)
alpha_tv = 0.5
f1 = alpha_tv * MixedL21Norm()
f2 = L1Norm(b=noisy_data)
F = BlockFunction(f1, f2)
G = ZeroFunction()
pdhg_tv = PDHG(f=F, g=G, operator=K, update_objective_interval=1000, initial=noisy_data)
pdhg_tv.run(500, verbose=1)
Recall, that we need to define the triplet ($K$, $\mathcal{F}$, $\mathcal{G}$) and write the above problem into the following form:
$$ u^{*} =\underset{u}{\operatorname{argmin}} \mathcal{F}(Ku) + \mathcal{G}(u). $$Let $\textbf{u} = (u, w)\in \mathbb{X}$ and define an operator $K:\mathbb{X}\rightarrow\mathbb{Y}$ as
$$ K = \begin{bmatrix} \mathcal{M} & \mathcal{O}\\ D & -\mathcal{I}\\ \mathcal{O} & \mathcal{E} \end{bmatrix} \quad\Rightarrow\quad K\textbf{u} = K \begin{bmatrix} u\\ w \end{bmatrix}= \begin{bmatrix} \mathcal{M}u\\ Du - w\\ \mathcal{E}w \end{bmatrix} = \begin{bmatrix} y_{1}\\ y_{2}\\ y_{3} \end{bmatrix} = \textbf{y}\in \mathbb{Y}, $$where $\mathcal{O}$, $\mathcal{I}$ denote the zero and identity operators respectively.
For the function $\mathcal{F}$, we have that
$$ \begin{aligned} & \mathcal{F}(\textbf{y}) := \mathcal{F}(y_{1}, y_{2}, y_{3}) = f_{1}(y_{1}) + f_{2}(y_{2}) + f_{3}(y_{3}), \text{ where},\\ & f_{1}(y_{1}) := \| y_{1} - b\|_1,\, f_{2}(y_{2}) := \alpha \|y_{2}\|_{2,1},\, f_{3}(y_{3}) := \beta\|y_{3}\|_{2,1}, \end{aligned} $$and for the function $\mathcal{G}$, $\mathcal{G}(\textbf{u}) = \mathcal{G}(u,w) = O(u)\equiv 0 $ is the zero function.
We conclude that
$$ \begin{aligned} f(K\textbf{u}) + g(\textbf{u}) & = f\bigg(\begin{bmatrix} \mathcal{M}u\\ Du - w\\ \mathcal{E}w \end{bmatrix}\bigg) = f_{1}(\mathcal{M}u) + f_{2}(Du-w) + f_{3}(\mathcal{E}w) \\ & = \|\mathcal{M}u -b\|_{1} + \alpha \|D u - w \|_{2,1} + \beta\|\mathcal{E}w\|_{2,1}, \end{aligned} $$which is exactly the objective function as we had in Total Generalised Variation with the $L^{1}$ norm.
Note that running the next cell can take about 8mins.
# Regularisation parameters
alpha_tgv = 0.4
beta_tgv = 0.2
# Define BlockOperator K
K11 = MO
K21 = Grad
K32 = SymmetrisedGradientOperator(K21.range)
K12 = ZeroOperator(K32.domain, ig)
K22 = IdentityOperator(K21.range)
K31 = ZeroOperator(ig, K32.range)
K = BlockOperator(K11, K12, K21, -K22, K31, K32, shape=(3, 2))
# Define BlockFunction f
f2 = alpha_tgv * MixedL21Norm()
f3 = beta_tgv * MixedL21Norm()
f1 = L1Norm(b=noisy_data)
F = BlockFunction(f1, f2, f3)
# Setup and run the PDHG algorithm
pdhg_tgv = PDHG(f=F, g=G, operator=K, update_objective_interval=10, initial=BlockDataContainer(noisy_data, Grad.direct(noisy_data), shape=(2,1)), sigma=1.4/K.norm(), tau=1.4/K.norm())
pdhg_tgv.run(1000, verbose=1)
images = [data, pdhg_tv.solution, (pdhg_tv.solution-data).abs()*3,
noisy_data, pdhg_tgv.solution[0], (pdhg_tgv.solution[0]-data).abs()*3]
labels_x = ["Ground Truth", "TV inpainting/denoising", "|Ground Truth - TV|",
"Corrupted Image", "TGV inpainting/denoising", "|Ground Truth - TGV|"]
# set fontsize xticks/yticks
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
fig = plt.figure(figsize=(20, 20))
grid = AxesGrid(fig, 111,
nrows_ncols=(2, 3),
axes_pad=0.8,
cbar_mode='single',
cbar_location='bottom',
cbar_size=0.5,
cbar_pad=0.3)
for ax, im, lab in zip(grid, images, labels_x):
img = ax.imshow(im.as_array().clip(0, 1))
ax.set_title(lab, fontsize=25)
cbar = ax.cax.colorbar(img)
plt.show()
In this notebook, we presented how to reconstruct multichannel data with 3 channels, using two different regularisers and data fitting terms. The following notebooks will demonstrate how to reconstruct multichannel data for CT applications: