import numpy as np
np.random.seed(4949)

import teaching_plots as plot
import pods

import pods
from ipywidgets import IntSlider

pods.notebook.display_plots('two_point_sample{sample:0>3}.svg', 
                            '../slides/diagrams/gp', 
                            sample=IntSlider(9, 9, 12, 1))

import pods
from ipywidgets import IntSlider

pods.notebook.display_plots('two_point_sample{sample:0>3}.svg', 
                            '../slides/diagrams/gp', 
                            sample=IntSlider(13, 13, 17, 1))

# set prior variance on w
alpha = 4.
# set the order of the polynomial basis set
degree = 5
# set the noise variance
sigma2 = 0.01

import numpy as np

def polynomial(x, degree, loc, scale):
    degrees = np.arange(degree+1)
    return ((x-loc)/scale)**degrees

import pods

data = pods.datasets.olympic_marathon_men()
x = data['X']
y = data['Y']

loc = 1950.
scale = 100.
num_data = x.shape[0]
num_pred_data = 100 # how many points to use for plotting predictions
x_pred = np.linspace(1880, 2030, num_pred_data)[:, None] # input locations for predictions
Phi_pred = polynomial(x_pred, degree=degree, loc=loc, scale=scale)
Phi = polynomial(x, degree=degree, loc=loc, scale=scale)

import matplotlib.pyplot as plt
%matplotlib inline

num_samples = 10
K = degree+1
for i in range(num_samples):
    z_vec = np.random.normal(size=(K, 1))
    w_sample = z_vec*np.sqrt(alpha)
    f_sample = np.dot(Phi_pred,w_sample)
    plt.plot(x_pred, f_sample)

K = alpha*np.dot(Phi_pred, Phi_pred.T)

for i in np.arange(10):
    f_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), f_sample.flatten())

fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(K, interpolation='none')
fig.colorbar(im)

K = alpha*np.dot(Phi_pred, Phi_pred.T) + sigma2*np.eye(x_pred.size)
for i in range(10):
    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), y_sample.flatten())

sigma2 = 1.
K = alpha*np.dot(Phi_pred, Phi_pred.T) + sigma2*np.eye(x_pred.size)
for i in range(10):
    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    plt.plot(x_pred.flatten(), y_sample.flatten())

# Use this box for any code you need for the exercise


%load -s eq_cov mlai.py

%load -s Kernel mlai.py

kernel = Kernel(function=eq_cov, variance=1., lengthscale=10.)
K = kernel.K(x_pred, x_pred)

fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(K, interpolation='none')
fig.colorbar(im)

fig, ax = plt.subplots(figsize(8, 5))
for i in range(10):
    y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K)
    ax.plot(x_pred.flatten(), y_sample.flatten())

# Use this box for any code you need for the exercise


import pods
from ipywidgets import IntSlider

pods.notebook.display_plots('gp_rejection_sample{sample:0>3}.png', 
                            directory='../slides/diagrams/gp', 
                            sample=IntSlider(1,1,5,1))

%load -s GP mlai.py

# set covariance function parameters
variance = 16.0
lengthscale = 8
# set noise variance
sigma2 = 0.05

kernel = Kernel(eq_cov, variance=variance, lengthscale=lengthscale)
K = kernel.K(x, x)
K_star = kernel.K(x, x_pred)
K_starstar = kernel.K(x_pred, x_pred)

fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(np.vstack([np.hstack([K, K_star]), np.hstack([K_star.T, K_starstar])]), interpolation='none')
# Add lines for separating training and test data
ax.axvline(x.shape[0]-1, color='w')
ax.axhline(x.shape[0]-1, color='w')
fig.colorbar(im)

%load -s posterior_f mlai.py

# attach the new method to class GP():
GP.posterior_f = posterior_f

model = GP(x, y, sigma2, exponentiated_quadratic, variance=variance, lengthscale=lengthscale)
mu_f, C_f = model.posterior_f(x_pred)

fig, ax = plt.subplots(figsize=(8,8))
im = ax.imshow(C_f, interpolation='none')
fig.colorbar(im)

plt.plot(x, y, 'rx')
plt.plot(x_pred, mu_f, 'b-')

var_f = np.diag(C_f)[:, None]
std_f = np.sqrt(var_f)

plt.plot(x, y, 'rx')
plt.plot(x_pred, mu_f, 'b-')
plt.plot(x_pred, mu_f+2*std_f, 'b--')
plt.plot(x_pred, mu_f-2*std_f, 'b--')

# Use this box for any code you need for the exercise


from IPython.lib.display import YouTubeVideo
YouTubeVideo('ewJ3AxKclOg')

%load -s update_inverse mlai.py

GP.update_inverse = update_inverse

gpoptimizePlot1


\only<1>{\inputdiagram{../../../gp/tex/diagrams/gpOptimiseQuadratic1}}\only<2>{\inputdiagram{../../../gp/tex/diagrams/gpOptimiseQuadratic2}}\only<3>{\inputdiagram{../../../gp/tex/diagrams/gpOptimiseQuadratic3}}-->
<table>
<tr>
<td width="50%">
<img src="http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise006.svg" class="" align="100%" style="vertical-align:middle;">
</td>
<td width="50%">
<img src="http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise010.svg" class="" align="100%" style="vertical-align:middle;">
</td>
</tr>
</table>
<table>
<tr>
<td width="50%">
<img src="http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise016.svg" class="" align="100%" style="vertical-align:middle;">
</td>
<td width="50%">
<img src="http://inverseprobability.com/talks/slides/../slides/diagrams/gp/gp-optimise021.svg" class="" align="100%" style="vertical-align:middle;">
</td>
</tr>
</table>
Figure: <i>Variation in the data fit term, the capacity term and the
negative log likelihood for different lengthscales.</i>

## Exponentiated Quadratic Covariance \[<a href="https://github.com/lawrennd/snippets/edit/main/_kern/includes/eq-covariance.md" target="_blank" >edit</a>\]

The exponentiated quadratic covariance, also known as the Gaussian
covariance or the RBF covariance and the squared exponential. Covariance
between two points is related to the negative exponential of the squared
distnace between those points. This covariance function can be derived
in a few different ways: as the infinite limit of a radial basis
function neural network, as diffusion in the heat equation, as a
Gaussian filter in *Fourier space* or as the composition as a series of
linear filters applied to a base function.

The covariance takes the following form, $$
\kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left(-\frac{\ltwoNorm{\inputVector-\inputVector^\prime}^2}{2\lengthScale^2}\right)
$$ where $\ell$ is the *length scale* or *time scale* of the process and
$\alpha$ represents the overall process variance.

<center>
$$\kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left(-\frac{\ltwoNorm{\inputVector-\inputVector^\prime}^2}{2\lengthScale^2}\right)$$
</center>
<table>
<tr>
<td width="45%">
<img src="http://inverseprobability.com/talks/slides/../slides/diagrams/kern/eq_covariance.svg" class="100%" align="" style="vertical-align:middle">
</td>
<td width="45%">
<img class="negate" src="http://inverseprobability.com/talks/slides/../slides/diagrams/kern/eq_covariance.gif" style="width:100%">
</td>
</tr>
</table>
Figure: <i>The exponentiated quadratic covariance function.</i>

## Olympic Marathon Data \[<a href="https://github.com/lawrennd/snippets/edit/main/_ml/includes/olympic-marathon-data.md" target="_blank" >edit</a>\]

<table>
<tr>
<td width="70%">
-   Gold medal times for Olympic Marathon since 1896.
-   Marathons before 1924 didn't have a standardised distance.
-   Present results using pace per km.
-   In 1904 Marathon was badly organised leading to very slow times.

</td>
<td width="30%">
<img class="" src="http://inverseprobability.com/talks/slides/../slides/diagrams/Stephen_Kiprotich.jpg" style="width:100%">
<small>Image from Wikimedia Commons <http://bit.ly/16kMKHQ></small>
</td>
</tr>
</table>
The first thing we will do is load a standard data set for regression
modelling. The data consists of the pace of Olympic Gold Medal Marathon
winners for the Olympics from 1896 to present. First we load in the data
and plot.

``` {.python}
import numpy as np
import pods

data = pods.datasets.olympic_marathon_men()
x = data['X']
y = data['Y']

offset = y.mean()
scale = np.sqrt(y.var())

import matplotlib.pyplot as plt
import teaching_plots as plot
import mlai


xlim = (1875,2030)
ylim = (2.5, 6.5)
yhat = (y-offset)/scale

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
_ = ax.plot(x, y, 'r.',markersize=10)
ax.set_xlabel('year', fontsize=20)
ax.set_ylabel('pace min/km', fontsize=20)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

mlai.write_figure(figure=fig, 
                  filename='../slides/diagrams/datasets/olympic-marathon.svg', 
                  transparent=True, 
                  frameon=True)

import GPy

m_full = GPy.models.GPRegression(x,yhat)
_ = m_full.optimize() # Optimize parameters of covariance function

xt = np.linspace(1870,2030,200)[:,np.newaxis]
yt_mean, yt_var = m_full.predict(xt)
yt_sd=np.sqrt(yt_var)

import teaching_plots as plot

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
plot.model_output(m_full, scale=scale, offset=offset, ax=ax, xlabel='year', ylabel='pace min/km', fontsize=20, portion=0.2)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
mlai.write_figure(figure=fig,
                  filename='../slides/diagrams/gp/olympic-marathon-gp.svg', 
                  transparent=True, frameon=True)

x_clean=np.vstack((x[0:2, :], x[3:, :]))
y_clean=np.vstack((y[0:2, :], y[3:, :]))

m_clean = GPy.models.GPRegression(x_clean,y_clean)
_ = m_clean.optimize()

import numpy as np
import pods

data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=937)

x = data['X']
y = data['Y']

offset = y.mean()
scale = np.sqrt(y.var())

import matplotlib.pyplot as plt
import teaching_plots as plot
import mlai


xlim = (-20,260)
ylim = (5, 7.5)
yhat = (y-offset)/scale

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
_ = ax.plot(x, y, 'r.',markersize=10)
ax.set_xlabel('time/min', fontsize=20)
ax.set_ylabel('expression', fontsize=20)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

mlai.write_figure(figure=fig, 
                  filename='../slides/diagrams/datasets/della-gatta-gene.svg', 
                  transparent=True, 
                  frameon=True)

import GPy

m_full = GPy.models.GPRegression(x,yhat)
m_full.kern.lengthscale=50
_ = m_full.optimize() # Optimize parameters of covariance function

xt = np.linspace(-20,260,200)[:,np.newaxis]
yt_mean, yt_var = m_full.predict(xt)
yt_sd=np.sqrt(yt_var)

import teaching_plots as plot

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
plot.model_output(m_full, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full.log_likelihood()), fontsize=20)
mlai.write_figure(figure=fig,
                  filename='../slides/diagrams/gp/della-gatta-gene-gp.svg', 
                  transparent=True, frameon=True)

m_full2 = GPy.models.GPRegression(x,yhat)
m_full2.kern.lengthscale=2000
_ = m_full2.optimize() # Optimize parameters of covariance function

import teaching_plots as plot

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
plot.model_output(m_full2, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full2.log_likelihood()), fontsize=20)
mlai.write_figure(figure=fig,
                  filename='../slides/diagrams/gp/della-gatta-gene-gp2.svg', 
                  transparent=True, frameon=True)

m_full3 = GPy.models.GPRegression(x,yhat)
m_full3.kern.lengthscale=20
m_full3.likelihood.variance=0.001
_ = m_full3.optimize() # Optimize parameters of covariance function

import teaching_plots as plot

fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
plot.model_output(m_full3, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full3.log_likelihood()), fontsize=20)
mlai.write_figure(figure=fig,
                  filename='../slides/diagrams/gp/della-gatta-gene-gp3.svg', 
                  transparent=True, frameon=True)

%load -s basis_cov mlai.py

%load -s radial mlai.py

%load -s brownian_cov mlai.py

%load -s mlp_cov mlai.py