%reload_ext autoreload
%autoreload 2
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
import numpy as np
import skimage.io as io
from scipy import linalg
import matplotlib as mpl
from sklearn import mixture
plt.rcParams["figure.figsize"] = (8, 8)
plt.rcParams["figure.dpi"] = 100
plt.rcParams["font.size"] = 14
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid': False})
Mitchell, H.B., "Data Fusion: Concepts and Ideas", Springer Verlag, 2012.
Mitchel, H.B., "Image Fusion - Theories, Techniques and Applications", Springer Verlag, 2010.
T. Stathaki, "Image fusion", Academic Press, 2008
Goshtasby, A. Ardeshir, "Image Registration Principles, Tools and Methods", Springer Verlag, 2012
Xiao, G., Bavirisetti, D.P., Liu, G., Zhang, X., "Image Fusion", Springer Verlag, to be published July, 2020
Hydrology in soil and geology | Cultural heritage |
---|---|
![]()
|
![]()
|
Building materials | Materials science |
![]()
|
![]()
|
Reasons to select or reject a specific imaging method
|
|
---|---|
|
|
Until now, we only collected image features from a single modality.
Match the advantages of each method against the disadvantages of the other methods to obtain more information than using each method individually.
Neutrons | X-rays |
---|---|
![]() ![]() |
![]() ![]() |
Transmission | Differential phase | Dark field |
---|---|---|
![]() |
![]() |
![]() |
S. Peetermans
The theory, techniques and tools which are used for
To improve the quality of the information, so that it is, in some sense, better than would be possible if the data sources were used individually.
The fusion strategy determined by:
Input | Output | Description |
---|---|---|
Data | Data | Input data is smoothed/filtered/segmented |
Data | Feature | The pixels are reduced to features using multiple sources. |
Feature | Feature | Input features are reduced in number, or new features are generated by fusing input features. |
Feature | Decision | Input features are fused together to give output decision. |
Decision | Decision | Multiple input decisions are fused together to give a final output decision. e.g. Random forest |
Registration is an optimization problem with many local minima.
Use e.g. VG Studio or 3DSlicer to
Neutrons | X-rays |
---|---|
![]() |
![]() |
mannes2015_NXCultHer
# Get images from volume, works only locally. Data too large for repos.
N=400
imgAFull = io.imread('/data/P20151041/04_evaluation/shell_N_final_transform.tiff')
imgBFull = io.imread('/data/P20151041/04_evaluation/shell_X.tiff')
imgA=imgAFull[N]
imgB=imgBFull[N]
np.save('../common/data/shellN.npy',imgA)
np.save('../common/data/shellX.npy',imgB)
imgA=np.load('../common/data/shellN.npy')
imgB=np.load('../common/data/shellX.npy')
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(12,5))
ax1.imshow(imgA,cmap='viridis'), ax1.set_title('Neutrons')
ax2.imshow(imgB,cmap='viridis'), ax2.set_title('X-rays');
ax3.imshow(plt.imread('../common/figures/snailshellNeutron.png')); ax3.axis('off');
def checkerBoard(imgA,imgB,tiles=10) :
if imgA.shape != imgB.shape :
raise Exception('Image have different sizes')
dims = imgA.shape
tileSize = (dims[0]//tiles,dims[1]//tiles)
mix = np.zeros(dims)
for r in np.arange(0,tiles) :
for c in np.arange(0,tiles) :
if (c+r) % 2 :
mix[(r*tileSize[0]):((r+1)*tileSize[0]),(c*tileSize[1]):((c+1)*tileSize[1])]= imgB[(r*tileSize[0]):((r+1)*tileSize[0]),(c*tileSize[1]):((c+1)*tileSize[1])]
else :
mix[(r*tileSize[0]):((r+1)*tileSize[0]),(c*tileSize[1]):((c+1)*tileSize[1])]= imgA[(r*tileSize[0]):((r+1)*tileSize[0]),(c*tileSize[1]):((c+1)*tileSize[1])]
return mix
plt.figure(figsize=(3,3))
plt.imshow(checkerBoard(np.ones((100,100)),np.zeros((100,100)),tiles=5));
fig,(ax1,ax2,ax3)=plt.subplots(1,3,figsize=(15,5))
ax1.imshow(imgA,cmap='viridis',vmin=10000,vmax=60000), ax1.set_title('Neutrons')
ax2.imshow(checkerBoard(imgA,imgB,tiles=5),cmap='viridis',vmin=10000,vmax=60000);
ax2.annotate('Neutrons',
xy=(60, 60), xycoords='data',
xytext=(0.1, 1.1), textcoords='axes fraction',
arrowprops=dict(facecolor='red', shrink=0.05),
horizontalalignment='center', verticalalignment='top')
ax2.annotate('X-rays',
xy=(190, 60), xycoords='data',
xytext=(0.3, 1.1), textcoords='axes fraction',
arrowprops=dict(facecolor='red', shrink=0.05),
horizontalalignment='center', verticalalignment='top')
ax3.imshow(imgB,cmap='viridis'), ax3.set_title('X-rays');
With two or more modalities, we can visualize the mix using the RGB color channels:
$$ \begin{eqnarray} R &=& modality_A\\ G &=& modality_B\\ B &=& \frac{modality_A+modality_B}{2} \end{eqnarray} $$some intensity scaling may be needed for best result.
def channelMix(imgA,imgB, order=(0,1,2)) :
imgAN=(imgA-imgA.min())/(imgA.max()-imgA.min())
imgBN=(imgB-imgB.min())/(imgB.max()-imgB.min())
rgb=np.zeros((imgA.shape[0],imgA.shape[1],3));
rgb[:,:,order[0]]=imgAN
rgb[:,:,order[1]]=imgBN
rgb[:,:,order[2]]=0.5*(imgAN+imgBN)
return rgb
fig,(ax1,ax2,ax3)=plt.subplots(1,3,figsize=(15,6))
ax1.imshow(channelMix(imgA,imgB,order=(0,1,2))), ax1.set_title(r'ImgA$\rightarrow$R, ImgB$\rightarrow$G');
ax2.imshow(channelMix(imgA,imgB,order=(2,1,0))), ax2.set_title(r'ImgA$\rightarrow$B, ImgB$\rightarrow$G');
ax3.imshow(channelMix(imgA,imgB,order=(0,2,1))), ax3.set_title(r'ImgA$\rightarrow$R, ImgB$\rightarrow$B');
Modality A | Modality B |
---|---|
![]() |
![]() |
The $N$ classes are described by: $$\begin{eqnarray} \mathcal{H}_1 : p(\mathbf{\mu}_1,\Sigma_1)\nonumber\\ \mathcal{H}_2 : p(\mathbf{\mu}_2,\Sigma_2)\nonumber\\ \vdots\nonumber\\ \mathcal{H}_N : p(\mathbf{\mu}_N,\Sigma_N)\nonumber \end{eqnarray} $$
With Gaussian distribution we can describe the bivariate histogram using: $$p(\theta)=\sum_{1}^{N} \phi\,\mathcal{N}(\mathbf{\mu}_i,\Sigma_i) $$
# Number of samples per component
n_samples = 500
# Generate random sample, two components
np.random.seed(0)
C1 = np.array([[1, -0.5], [-0.5, 1]])
C2 = np.array([[1, 0.25], [0.25, 1]])
X = np.r_[np.dot(np.random.randn(n_samples, 2), C1), np.dot(np.random.randn(n_samples, 2),C2) + np.array([-3, 3])]
plt.figure(figsize=[4,4])
plt.scatter(X[:,0],X[:,1],0.8)
plt.xlim(-7., 5.),plt.ylim(-4., 6.)
plt.title('Bi-variate data');
def plot_results(X, Y_, means, covariances, title, ax, showShape=True, showCenter=False):
color_iter = itertools.cycle(['navy', 'c', 'cornflowerblue', 'gold',
'darkorange'])
for i, (mean, covar, color) in enumerate(zip(
means, covariances, color_iter)):
v, w = linalg.eigh(covar)
v = 2. * np.sqrt(2.) * np.sqrt(v)
u = w[0] / linalg.norm(w[0])
# as the DP will not use every component it has access to
# unless it needs it, we shouldn't plot the redundant
# components.
if not np.any(Y_ == i):
continue
ax.scatter(X[Y_ == i, 0], X[Y_ == i, 1], .8, color=color)
# Plot an ellipse to show the Gaussian component
if showShape :
angle = np.arctan(u[1] / u[0])
angle = 180. * angle / np.pi # convert to degrees
ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color)
ell.set_clip_box(ax.bbox)
ell.set_alpha(0.5)
ax.add_artist(ell)
if showCenter :
ax.plot(mean[0],mean[1],'ro')
ax.set_xlim(-7., 5.)
ax.set_ylim(-4., 6.)
ax.set_title(title)
fig, axes = plt.subplots(2,2,figsize=(8,8))
# Fit a Gaussian mixture with EM using five components
for i,ax in zip(np.arange(0,len(axes.ravel())),axes.ravel()) :
gmm = mixture.GaussianMixture(n_components=i+1, covariance_type='full').fit(X)
plot_results(X, gmm.predict(X), gmm.means_, gmm.covariances_,
title='Gaussian Mixture ({} classes)'.format(i+1), ax=ax)
For a set of multivariate normal distributions $p_i=\mathcal{N}(\mu_i,\Sigma_i)$
We can find the nearest neighbor class using the following distances
Distance between two points $$D_{E}=\sqrt{(x-\mu_1)^T \cdot (x-\mu_1)} $$
Distance from class $i$ to point $x$ $$D_M=\sqrt{\left(x-\mu_i\right)^T \Sigma_i \left(x-\mu_i\right)}$$
Distance between two classes $$D_B=\frac{1}{8}\left(\mu_1-\mu_2\right)^T \Sigma \left(\mu_1-\mu_2\right) + \frac{1}{2}\ln\left(\frac{|\Sigma|}{\sqrt{|\Sigma_1|\cdot|\Sigma_2|}}\right)\qquad \Sigma=\frac{\Sigma_1+\Sigma_2}{2}$$
Assign the point to the class with shortest distance.
gmm = mixture.GaussianMixture(n_components=2, covariance_type='full').fit(X)
m=[-1.6,1.3]
fig,(ax1,ax2,ax3)=plt.subplots(1,3,figsize=(15,4))
plot_results(X, gmm.predict(X), gmm.means_, gmm.covariances_,
'Euclidean distance',ax1, showShape=False,showCenter=True)
ax1.plot(-1.6,1.3,'rx')
plot_results(X, gmm.predict(X), gmm.means_, gmm.covariances_,
'Mahanalubis distance'.format(2),ax2, showCenter=True)
ax2.plot(-1.6,1.3,'rx')
plot_results(X, gmm.predict(X), gmm.means_, gmm.covariances_,
'Bhattacharia distance'.format(2),ax3, showCenter=True)
v=1
ell = mpl.patches.Ellipse(m, v, v, 0, color='orange')
ell.set_clip_box(ax3.bbox)
ell.set_alpha(0.5)
ax3.add_artist(ell)
ax3.plot(m[0],m[1],'rx');
$\rho$ Material denstity
$A$ Atomic weight
$\sigma$ microscopic cross section
$x$ propagation length
With custom storage we have to implement functions for each operation:
We have already seen data frames in action but never formally introduced them...
A data frame is
You can
There are different ways to create a data frame:
First we have import pandas:
import pandas as pd
This is the very basic use case. We create the data frame as the features are produced.
dl = []
for i in np.arange(0,10) :
dl.append({'position' : i, 'sine' : np.sin(i)})
df = pd.DataFrame(dl)
df.head()
position | sine | |
---|---|---|
0 | 0 | 0.000000 |
1 | 1 | 0.841471 |
2 | 2 | 0.909297 |
3 | 3 | 0.141120 |
4 | 4 | -0.756802 |
Sometimes the features have been extracted elsewhere and stored in a file, e.g. CSV
pheno = pd.read_csv('../Exercises/10-Statistics_DataFrames/phenoTable.csv')
pheno.sample(5)
BMD | MECHANICS_STIFFNESS | CORT_DTO__C_TH | CORT_DTO__C_TH_SD | CORT_MOM__J | CT_TH_RAD | CT_TH_RAD_STD | CANAL_VOLUME | CANAL_COUNT | CANAL_DENSITY | ... | CANAL_THETA | CANAL_THETA_CV | CANAL_PCA1 | CANAL_PCA1_CV | CANAL_PCA2 | CANAL_PCA2_CV | CANAL_PCA3 | CANAL_PCA3_CV | FEMALE | ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
927 | 0.0331 | 54.710162 | 0.184314 | 0.016091 | 0.133865 | 80.738456 | 9.721094 | 30056.096390 | 219.0 | 256.122405 | ... | 50.506419 | 0.457766 | 282.380257 | 0.921480 | 74.700692 | 0.857098 | 31.999642 | 0.695682 | 1 | 2432 |
296 | 0.0362 | 73.530058 | 0.194861 | 0.023396 | 0.181654 | 100.159668 | 23.097699 | 91052.403555 | 293.0 | 270.159690 | ... | 45.065782 | 0.522117 | 239.630656 | 1.036752 | 83.551103 | 0.888058 | 37.632115 | 0.895730 | 1 | 1037 |
866 | 0.0367 | 76.442966 | 0.181637 | 0.014776 | 0.187737 | 83.383601 | 15.701657 | 16494.686437 | 108.0 | 136.734689 | ... | 59.137077 | 0.378295 | 307.349505 | 1.046169 | 59.023636 | 1.135279 | 26.410985 | 0.409488 | 1 | 2357 |
852 | 0.0378 | 58.308297 | 0.180191 | 0.018040 | 0.156586 | 77.207112 | 8.950630 | 22571.037406 | 236.0 | 275.537749 | ... | 56.423650 | 0.401693 | 280.723358 | 1.256418 | 74.577068 | 1.022312 | 35.912156 | 0.875530 | 1 | 2343 |
306 | 0.0351 | 64.398481 | 0.197205 | 0.021881 | 0.158393 | 90.299021 | 26.618817 | 63291.537997 | 183.0 | 209.393695 | ... | 45.807676 | 0.542518 | 229.465137 | 0.737940 | 69.664409 | 0.824979 | 29.518576 | 0.723313 | 1 | 1073 |
5 rows × 35 columns
Saving works similarly:
pheno.to_csv('pheno2.csv')
df['cosine']=np.cos(df['position'])
df['sum']=df['sine']+df['cosine']
df.head()
position | sine | cosine | sum | |
---|---|---|---|---|
0 | 0 | 0.000000 | 1.000000 | 1.000000 |
1 | 1 | 0.841471 | 0.540302 | 1.381773 |
2 | 2 | 0.909297 | -0.416147 | 0.493151 |
3 | 3 | 0.141120 | -0.989992 | -0.848872 |
4 | 4 | -0.756802 | -0.653644 | -1.410446 |
df2=df[0<df['sine']]
df2
position | sine | cosine | sum | |
---|---|---|---|---|
1 | 1 | 0.841471 | 0.540302 | 1.381773 |
2 | 2 | 0.909297 | -0.416147 | 0.493151 |
3 | 3 | 0.141120 | -0.989992 | -0.848872 |
7 | 7 | 0.656987 | 0.753902 | 1.410889 |
8 | 8 | 0.989358 | -0.145500 | 0.843858 |
9 | 9 | 0.412118 | -0.911130 | -0.499012 |
Note:Here is also a different way to create a data frame with dicts and lists.
df3 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df3.head()
A | B | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 5 |
2 | 3 | 6 |
df3.rename(columns={"A": "hej", "B": "hopp"})
hej | hopp | |
---|---|---|
0 | 1 | 4 |
1 | 2 | 5 |
2 | 3 | 6 |
You can easily compute statistics on a data frame:
df.mean()
position 4.500000 sine 0.195521 cosine 0.042162 sum 0.237683 dtype: float64
df.describe()
position | sine | cosine | sum | |
---|---|---|---|---|
count | 10.00000 | 10.000000 | 10.000000 | 10.000000 |
mean | 4.50000 | 0.195521 | 0.042162 | 0.237683 |
std | 3.02765 | 0.693076 | 0.765706 | 1.009325 |
min | 0.00000 | -0.958924 | -0.989992 | -1.410446 |
25% | 2.25000 | -0.209562 | -0.594269 | -0.631200 |
50% | 4.50000 | 0.276619 | 0.069081 | 0.586953 |
75% | 6.75000 | 0.795350 | 0.700502 | 0.960965 |
max | 9.00000 | 0.989358 | 1.000000 | 1.410889 |
Compute the standard deviation for all columns with the rows have 0<sum
df.head()
df[0<df["sum"]]['sine'].std()
0.5297967209946162
In addition to all other plotting options, Pandas supports some basic plotting functionality
fig,ax=plt.subplots(1,1,figsize=(4,4))
df.plot(ax=ax);
We mostly don't want to plot all columns at once
fig,(ax1,ax2,ax3)=plt.subplots(1,3,figsize=(15,5))
df['sine'].plot(ax=ax1); ax1.set_title("Plot column sine"); ax2.set_title("Plot column cosine as function of sine")
df[0<df['sine']].plot(x='position',y='cosine',ax=ax3); ax3.set_title("Plot column cos as function of position");
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(12,5))
df.plot(kind='bar',ax=ax1); ax1.set_title('Bar plot');
df.plot(kind='scatter',x='sine',y='cosine',ax=ax2); ax2.set_title('Scatter plot');
Further plotting options can be found on pandas visualization documentation
In our work we may produce several data frames that needs to be merged:
dfA = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
dfB = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
'B': ['B4', 'B5', 'B6', 'B7'],
'C': ['C4', 'C5', 'C6', 'C7'],
'D': ['D4', 'D5', 'D6', 'D7']})
frames = [dfA, dfB]
result = pd.concat(frames)
result
A | B | C | D | |
---|---|---|---|---|
0 | A0 | B0 | C0 | D0 |
1 | A1 | B1 | C1 | D1 |
2 | A2 | B2 | C2 | D2 |
3 | A3 | B3 | C3 | D3 |
0 | A4 | B4 | C4 | D4 |
1 | A5 | B5 | C5 | D5 |
2 | A6 | B6 | C6 | D6 |
3 | A7 | B7 | C7 | D7 |
dfA = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3])
dfB = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
'B': ['B4', 'B5', 'B6', 'B7'],
'C': ['C4', 'C5', 'C6', 'C7'],
'D': ['D4', 'D5', 'D6', 'D7']},
index=[4, 5, 6, 7])
frames = [dfA, dfB]
result = pd.concat(frames)
result
A | B | C | D | |
---|---|---|---|---|
0 | A0 | B0 | C0 | D0 |
1 | A1 | B1 | C1 | D1 |
2 | A2 | B2 | C2 | D2 |
3 | A3 | B3 | C3 | D3 |
4 | A4 | B4 | C4 | D4 |
5 | A5 | B5 | C5 | D5 |
6 | A6 | B6 | C6 | D6 |
7 | A7 | B7 | C7 | D7 |
dfA = pd.DataFrame({'id' : [1,2,3,4],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3])
dfB = pd.DataFrame({'id' : [1,2,3,4],
'X': ['B4', 'B5', 'B6', 'B7'],
'Y': ['C4', 'C5', 'C6', 'C7'],
'Z': ['D4', 'D5', 'D6', 'D7']},
index=[1,2,3,4])
result=pd.merge(dfA,dfB)
result
id | A | B | C | D | X | Y | Z | |
---|---|---|---|---|---|---|---|---|
0 | 1 | A0 | B0 | C0 | D0 | B4 | C4 | D4 |
1 | 2 | A1 | B1 | C1 | D1 | B5 | C5 | D5 |
2 | 3 | A2 | B2 | C2 | D2 | B6 | C6 | D6 |
3 | 4 | A3 | B3 | C3 | D3 | B7 | C7 | D7 |
new = result[['A','C','D']]
new.head()
A | C | D | |
---|---|---|---|
0 | A0 | C0 | D0 |
1 | A1 | C1 | D1 |
2 | A2 | C2 | D2 |
3 | A3 | C3 | D3 |