import numpy as np np.random.seed(4949) import teaching_plots as plot import pods import pods from ipywidgets import IntSlider pods.notebook.display_plots('two_point_sample{sample:0>3}.svg', '../slides/diagrams/gp', sample=IntSlider(9, 9, 12, 1)) import pods from ipywidgets import IntSlider pods.notebook.display_plots('two_point_sample{sample:0>3}.svg', '../slides/diagrams/gp', sample=IntSlider(13, 13, 17, 1)) # set prior variance on w alpha = 4. # set the order of the polynomial basis set degree = 5 # set the noise variance sigma2 = 0.01 import numpy as np def polynomial(x, degree, loc, scale): degrees = np.arange(degree+1) return ((x-loc)/scale)**degrees import pods data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] loc = 1950. scale = 100. num_data = x.shape[0] num_pred_data = 100 # how many points to use for plotting predictions x_pred = np.linspace(1880, 2030, num_pred_data)[:, None] # input locations for predictions Phi_pred = polynomial(x_pred, degree=degree, loc=loc, scale=scale) Phi = polynomial(x, degree=degree, loc=loc, scale=scale) import matplotlib.pyplot as plt %matplotlib inline num_samples = 10 K = degree+1 for i in range(num_samples): z_vec = np.random.normal(size=(K, 1)) w_sample = z_vec*np.sqrt(alpha) f_sample = np.dot(Phi_pred,w_sample) plt.plot(x_pred, f_sample) K = alpha*np.dot(Phi_pred, Phi_pred.T) for i in np.arange(10): f_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K) plt.plot(x_pred.flatten(), f_sample.flatten()) fig, ax = plt.subplots(figsize=(8,8)) im = ax.imshow(K, interpolation='none') fig.colorbar(im) K = alpha*np.dot(Phi_pred, Phi_pred.T) + sigma2*np.eye(x_pred.size) for i in range(10): y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K) plt.plot(x_pred.flatten(), y_sample.flatten()) sigma2 = 1. K = alpha*np.dot(Phi_pred, Phi_pred.T) + sigma2*np.eye(x_pred.size) for i in range(10): y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K) plt.plot(x_pred.flatten(), y_sample.flatten()) # Use this box for any code you need for the exercise %load -s eq_cov mlai.py %load -s Kernel mlai.py kernel = Kernel(function=eq_cov, variance=1., lengthscale=10.) K = kernel.K(x_pred, x_pred) fig, ax = plt.subplots(figsize=(8,8)) im = ax.imshow(K, interpolation='none') fig.colorbar(im) fig, ax = plt.subplots(figsize(8, 5)) for i in range(10): y_sample = np.random.multivariate_normal(mean=np.zeros(x_pred.size), cov=K) ax.plot(x_pred.flatten(), y_sample.flatten()) # Use this box for any code you need for the exercise import pods from ipywidgets import IntSlider pods.notebook.display_plots('gp_rejection_sample{sample:0>3}.png', directory='../slides/diagrams/gp', sample=IntSlider(1,1,5,1)) %load -s GP mlai.py # set covariance function parameters variance = 16.0 lengthscale = 8 # set noise variance sigma2 = 0.05 kernel = Kernel(eq_cov, variance=variance, lengthscale=lengthscale) K = kernel.K(x, x) K_star = kernel.K(x, x_pred) K_starstar = kernel.K(x_pred, x_pred) fig, ax = plt.subplots(figsize=(8,8)) im = ax.imshow(np.vstack([np.hstack([K, K_star]), np.hstack([K_star.T, K_starstar])]), interpolation='none') # Add lines for separating training and test data ax.axvline(x.shape[0]-1, color='w') ax.axhline(x.shape[0]-1, color='w') fig.colorbar(im) %load -s posterior_f mlai.py # attach the new method to class GP(): GP.posterior_f = posterior_f model = GP(x, y, sigma2, exponentiated_quadratic, variance=variance, lengthscale=lengthscale) mu_f, C_f = model.posterior_f(x_pred) fig, ax = plt.subplots(figsize=(8,8)) im = ax.imshow(C_f, interpolation='none') fig.colorbar(im) plt.plot(x, y, 'rx') plt.plot(x_pred, mu_f, 'b-') var_f = np.diag(C_f)[:, None] std_f = np.sqrt(var_f) plt.plot(x, y, 'rx') plt.plot(x_pred, mu_f, 'b-') plt.plot(x_pred, mu_f+2*std_f, 'b--') plt.plot(x_pred, mu_f-2*std_f, 'b--') # Use this box for any code you need for the exercise from IPython.lib.display import YouTubeVideo YouTubeVideo('ewJ3AxKclOg') %load -s update_inverse mlai.py GP.update_inverse = update_inverse gpoptimizePlot1 \only<1>{\inputdiagram{../../../gp/tex/diagrams/gpOptimiseQuadratic1}}\only<2>{\inputdiagram{../../../gp/tex/diagrams/gpOptimiseQuadratic2}}\only<3>{\inputdiagram{../../../gp/tex/diagrams/gpOptimiseQuadratic3}}-->
Figure: Variation in the data fit term, the capacity term and the negative log likelihood for different lengthscales. ## Exponentiated Quadratic Covariance \[edit\] The exponentiated quadratic covariance, also known as the Gaussian covariance or the RBF covariance and the squared exponential. Covariance between two points is related to the negative exponential of the squared distnace between those points. This covariance function can be derived in a few different ways: as the infinite limit of a radial basis function neural network, as diffusion in the heat equation, as a Gaussian filter in *Fourier space* or as the composition as a series of linear filters applied to a base function. The covariance takes the following form, $$ \kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left(-\frac{\ltwoNorm{\inputVector-\inputVector^\prime}^2}{2\lengthScale^2}\right) $$ where $\ell$ is the *length scale* or *time scale* of the process and $\alpha$ represents the overall process variance.
$$\kernelScalar(\inputVector, \inputVector^\prime) = \alpha \exp\left(-\frac{\ltwoNorm{\inputVector-\inputVector^\prime}^2}{2\lengthScale^2}\right)$$
Figure: The exponentiated quadratic covariance function. ## Olympic Marathon Data \[edit\]
- Gold medal times for Olympic Marathon since 1896. - Marathons before 1924 didn't have a standardised distance. - Present results using pace per km. - In 1904 Marathon was badly organised leading to very slow times. Image from Wikimedia Commons
The first thing we will do is load a standard data set for regression modelling. The data consists of the pace of Olympic Gold Medal Marathon winners for the Olympics from 1896 to present. First we load in the data and plot. ``` {.python} import numpy as np import pods data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] offset = y.mean() scale = np.sqrt(y.var()) import matplotlib.pyplot as plt import teaching_plots as plot import mlai xlim = (1875,2030) ylim = (2.5, 6.5) yhat = (y-offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) ax.set_xlabel('year', fontsize=20) ax.set_ylabel('pace min/km', fontsize=20) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/datasets/olympic-marathon.svg', transparent=True, frameon=True) import GPy m_full = GPy.models.GPRegression(x,yhat) _ = m_full.optimize() # Optimize parameters of covariance function xt = np.linspace(1870,2030,200)[:,np.newaxis] yt_mean, yt_var = m_full.predict(xt) yt_sd=np.sqrt(yt_var) import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full, scale=scale, offset=offset, ax=ax, xlabel='year', ylabel='pace min/km', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/olympic-marathon-gp.svg', transparent=True, frameon=True) x_clean=np.vstack((x[0:2, :], x[3:, :])) y_clean=np.vstack((y[0:2, :], y[3:, :])) m_clean = GPy.models.GPRegression(x_clean,y_clean) _ = m_clean.optimize() import numpy as np import pods data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=937) x = data['X'] y = data['Y'] offset = y.mean() scale = np.sqrt(y.var()) import matplotlib.pyplot as plt import teaching_plots as plot import mlai xlim = (-20,260) ylim = (5, 7.5) yhat = (y-offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) ax.set_xlabel('time/min', fontsize=20) ax.set_ylabel('expression', fontsize=20) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/datasets/della-gatta-gene.svg', transparent=True, frameon=True) import GPy m_full = GPy.models.GPRegression(x,yhat) m_full.kern.lengthscale=50 _ = m_full.optimize() # Optimize parameters of covariance function xt = np.linspace(-20,260,200)[:,np.newaxis] yt_mean, yt_var = m_full.predict(xt) yt_sd=np.sqrt(yt_var) import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full.log_likelihood()), fontsize=20) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/della-gatta-gene-gp.svg', transparent=True, frameon=True) m_full2 = GPy.models.GPRegression(x,yhat) m_full2.kern.lengthscale=2000 _ = m_full2.optimize() # Optimize parameters of covariance function import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full2, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full2.log_likelihood()), fontsize=20) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/della-gatta-gene-gp2.svg', transparent=True, frameon=True) m_full3 = GPy.models.GPRegression(x,yhat) m_full3.kern.lengthscale=20 m_full3.likelihood.variance=0.001 _ = m_full3.optimize() # Optimize parameters of covariance function import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full3, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full3.log_likelihood()), fontsize=20) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/della-gatta-gene-gp3.svg', transparent=True, frameon=True) %load -s basis_cov mlai.py %load -s radial mlai.py %load -s brownian_cov mlai.py %load -s mlp_cov mlai.py