import teaching_plots as plot import teaching_plots as plot import teaching_plots as plot import pods from ipywidgets import IntSlider pods.notebook.display_plots('pinball{sample:0>3}.svg', '../slides/diagrams', sample=IntSlider(1, 1, 2, 1)) import pods pods.notebook.display_plots('stack-gp-sample-Linear-{sample:0>1}.svg', directory='../../slides/diagrams/deepgp', sample=(0,4)) pods.notebook.display_plots('stack-gp-sample-RBF-{sample:0>1}.svg', directory='../../slides/diagrams/deepgp', sample=(0,4)) from IPython.lib.display import YouTubeVideo YouTubeVideo('XhIvygQYFFQ') pip install GPy pip install git+https://github.com/SheffieldML/PyDeepGP.git import numpy as np import pods data = pods.datasets.olympic_marathon_men() x = data['X'] y = data['Y'] offset = y.mean() scale = np.sqrt(y.var()) import matplotlib.pyplot as plt import teaching_plots as plot import mlai xlim = (1875,2030) ylim = (2.5, 6.5) yhat = (y-offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) ax.set_xlabel('year', fontsize=20) ax.set_ylabel('pace min/km', fontsize=20) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/datasets/olympic-marathon.svg', transparent=True, frameon=True) import GPy m_full = GPy.models.GPRegression(x,yhat) _ = m_full.optimize() # Optimize parameters of covariance function xt = np.linspace(1870,2030,200)[:,np.newaxis] yt_mean, yt_var = m_full.predict(xt) yt_sd=np.sqrt(yt_var) import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full, scale=scale, offset=offset, ax=ax, xlabel='year', ylabel='pace min/km', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/olympic-marathon-gp.svg', transparent=True, frameon=True) x_clean=np.vstack((x[0:2, :], x[3:, :])) y_clean=np.vstack((y[0:2, :], y[3:, :])) m_clean = GPy.models.GPRegression(x_clean,y_clean) _ = m_clean.optimize() import GPy import deepgp hidden = 1 m = deepgp.DeepGP([y.shape[1],hidden,x.shape[1]],Y=yhat, X=x, inits=['PCA','PCA'], kernels=[GPy.kern.RBF(hidden,ARD=True), GPy.kern.RBF(x.shape[1],ARD=True)], # the kernels for each layer num_inducing=50, back_constraint=False) import deepgp # Call the initalization m.initialize() for layer in m.layers: layer.likelihood.variance.constrain_positive(warning=False) m.optimize(messages=True,max_iters=10000) m.staged_optimize(messages=(True,True,True)) fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_sample(m, scale=scale, offset=offset, samps=10, ax=ax, xlabel='year', ylabel='pace min/km', portion = 0.225) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/olympic-marathon-deep-gp-samples.svg', transparent=True, frameon=True) m.visualize(scale=scale, offset=offset, xlabel='year', ylabel='pace min/km',xlim=xlim, ylim=ylim, dataset='olympic-marathon', diagrams='../slides/diagrams/deepgp') import pods pods.notebook.display_plots('olympic-marathon-deep-gp-layer-{sample:0>1}.svg', '../slides/diagrams/deepgp', sample=(0,1)) fig, ax = plt.subplots(figsize=plot.big_wide_figsize) m.visualize_pinball(ax=ax, scale=scale, offset=offset, points=30, portion=0.1, xlabel='year', ylabel='pace km/min', vertical=True) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/olympic-marathon-deep-gp-pinball.svg', transparent=True, frameon=True) import numpy as np import pods data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=937) x = data['X'] y = data['Y'] offset = y.mean() scale = np.sqrt(y.var()) import matplotlib.pyplot as plt import teaching_plots as plot import mlai xlim = (-20,260) ylim = (5, 7.5) yhat = (y-offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) ax.set_xlabel('time/min', fontsize=20) ax.set_ylabel('expression', fontsize=20) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(figure=fig, filename='../slides/diagrams/datasets/della-gatta-gene.svg', transparent=True, frameon=True) import GPy m_full = GPy.models.GPRegression(x,yhat) m_full.kern.lengthscale=50 _ = m_full.optimize() # Optimize parameters of covariance function xt = np.linspace(-20,260,200)[:,np.newaxis] yt_mean, yt_var = m_full.predict(xt) yt_sd=np.sqrt(yt_var) import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full.log_likelihood()), fontsize=20) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/della-gatta-gene-gp.svg', transparent=True, frameon=True) m_full2 = GPy.models.GPRegression(x,yhat) m_full2.kern.lengthscale=2000 _ = m_full2.optimize() # Optimize parameters of covariance function import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full2, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full2.log_likelihood()), fontsize=20) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/della-gatta-gene-gp2.svg', transparent=True, frameon=True) m_full3 = GPy.models.GPRegression(x,yhat) m_full3.kern.lengthscale=20 m_full3.likelihood.variance=0.001 _ = m_full3.optimize() # Optimize parameters of covariance function import teaching_plots as plot fig, ax = plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full3, scale=scale, offset=offset, ax=ax, xlabel='time/min', ylabel='expression', fontsize=20, portion=0.2) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_title('log likelihood: {ll:.3}'.format(ll=m_full3.log_likelihood()), fontsize=20) mlai.write_figure(figure=fig, filename='../slides/diagrams/gp/della-gatta-gene-gp3.svg', transparent=True, frameon=True) layers = [y.shape[1], 1,x.shape[1]] inits = ['PCA']*(len(layers)-1) kernels = [] for i in layers[1:]: kernels += [GPy.kern.RBF(i)] m = deepgp.DeepGP(layers,Y=yhat, X=x, inits=inits, kernels=kernels, # the kernels for each layer num_inducing=20, back_constraint=False) m.initialize() m.staged_optimize() fig, ax=plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m, scale=scale, offset=offset, ax=ax, fontsize=20, portion=0.5) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(filename='../slides/diagrams/deepgp/della-gatta-gene-deep-gp.svg', transparent=True, frameon=True) fig, ax=plt.subplots(figsize=plot.big_wide_figsize) plot.model_sample(m, scale=scale, offset=offset, samps=10, ax=ax, portion = 0.5) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/della-gatta-gene-deep-gp-samples.svg', transparent=True, frameon=True) m.visualize(offset=offset, scale=scale, xlim=xlim, ylim=ylim, dataset='della-gatta-gene', diagrams='../slides/diagrams/deepgp') fig, ax=plt.subplots(figsize=plot.big_wide_figsize) m.visualize_pinball(offset=offset, ax=ax, scale=scale, xlim=xlim, ylim=ylim, portion=0.1, points=50) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/della-gatta-gene-deep-gp-pinball.svg', transparent=True, frameon=True, ax=ax) num_low=25 num_high=25 gap = -.1 noise=0.0001 x = np.vstack((np.linspace(-1, -gap/2.0, num_low)[:, np.newaxis], np.linspace(gap/2.0, 1, num_high)[:, np.newaxis])) y = np.vstack((np.zeros((num_low, 1)), np.ones((num_high,1)))) scale = np.sqrt(y.var()) offset = y.mean() yhat = (y-offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) _ = ax.set_xlabel('$x$', fontsize=20) _ = ax.set_ylabel('$y$', fontsize=20) xlim = (-2, 2) ylim = (-0.6, 1.6) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(figure=fig, filename='../../slides/diagrams/datasets/step-function.svg', transparent=True, frameon=True) m_full = GPy.models.GPRegression(x,yhat) _ = m_full.optimize() # Optimize parameters of covariance function fig, ax=plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m_full, scale=scale, offset=offset, ax=ax, fontsize=20, portion=0.5) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(figure=fig,filename='../slides/diagrams/gp/step-function-gp.svg', transparent=True, frameon=True) layers = [y.shape[1], 1, 1, 1,x.shape[1]] inits = ['PCA']*(len(layers)-1) kernels = [] for i in layers[1:]: kernels += [GPy.kern.RBF(i)] m = deepgp.DeepGP(layers,Y=yhat, X=x, inits=inits, kernels=kernels, # the kernels for each layer num_inducing=20, back_constraint=False) m.initialize() m.staged_optimize() fig, ax=plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m, scale=scale, offset=offset, ax=ax, fontsize=20, portion=0.5) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(filename='../slides/diagrams/deepgp/step-function-deep-gp.svg', transparent=True, frameon=True) fig, ax=plt.subplots(figsize=plot.big_wide_figsize) plot.model_sample(m, scale=scale, offset=offset, samps=10, ax=ax, portion = 0.5) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/step-function-deep-gp-samples.svg', transparent=True, frameon=True) m.visualize(offset=offset, scale=scale, xlim=xlim, ylim=ylim, dataset='step-function', diagrams='../slides/diagrams/deepgp') fig, ax=plt.subplots(figsize=plot.big_wide_figsize) m.visualize_pinball(offset=offset, ax=ax, scale=scale, xlim=xlim, ylim=ylim, portion=0.1, points=50) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/step-function-deep-gp-pinball.svg', transparent=True, frameon=True, ax=ax) import pods data = pods.datasets.mcycle() x = data['X'] y = data['Y'] scale=np.sqrt(y.var()) offset=y.mean() yhat = (y - offset)/scale fig, ax = plt.subplots(figsize=plot.big_wide_figsize) _ = ax.plot(x, y, 'r.',markersize=10) _ = ax.set_xlabel('time', fontsize=20) _ = ax.set_ylabel('acceleration', fontsize=20) xlim = (-20, 80) ylim = (-175, 125) ax.set_xlim(xlim) ax.set_ylim(ylim) mlai.write_figure(filename='../slides/diagrams/datasets/motorcycle-helmet.svg', transparent=True, frameon=True) m_full = GPy.models.GPRegression(x,yhat) _ = m_full.optimize() # Optimize parameters of covariance function import deepgp layers = [y.shape[1], 1, x.shape[1]] inits = ['PCA']*(len(layers)-1) kernels = [] for i in layers[1:]: kernels += [GPy.kern.RBF(i)] m = deepgp.DeepGP(layers,Y=yhat, X=x, inits=inits, kernels=kernels, # the kernels for each layer num_inducing=20, back_constraint=False) m.initialize() m.staged_optimize(iters=(1000,1000,10000), messages=(True, True, True)) import teaching_plots as plot import mlai fig, ax=plt.subplots(figsize=plot.big_wide_figsize) plot.model_output(m, scale=scale, offset=offset, ax=ax, xlabel='time', ylabel='acceleration/$g$', fontsize=20, portion=0.5) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(filename='../slides/diagrams/deepgp/motorcycle-helmet-deep-gp.svg', transparent=True, frameon=True) import teaching_plots as plot import mlai fig, ax=plt.subplots(figsize=plot.big_wide_figsize) plot.model_sample(m, scale=scale, offset=offset, samps=10, ax=ax, xlabel='time', ylabel='acceleration/$g$', portion = 0.5) ax.set_ylim(ylim) ax.set_xlim(xlim) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/motorcycle-helmet-deep-gp-samples.svg', transparent=True, frameon=True) m.visualize(xlim=xlim, ylim=ylim, scale=scale,offset=offset, xlabel="time", ylabel="acceleration/$g$", portion=0.5, dataset='motorcycle-helmet', diagrams='../slides/diagrams/deepgp') fig, ax=plt.subplots(figsize=plot.big_wide_figsize) m.visualize_pinball(ax=ax, xlabel='time', ylabel='acceleration/g', points=50, scale=scale, offset=offset, portion=0.1) mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/motorcycle-helmet-deep-gp-pinball.svg', transparent=True, frameon=True) from sklearn.datasets import fetch_mldata mnist = fetch_mldata('MNIST original') import numpy as np np.random.seed(0) digits = [0,1,2,3,4] N_per_digit = 100 Y = [] labels = [] for d in digits: imgs = mnist['data'][mnist['target']==d] Y.append(imgs[np.random.permutation(imgs.shape[0])][:N_per_digit]) labels.append(np.ones(N_per_digit)*d) Y = np.vstack(Y).astype(np.float64) labels = np.hstack(labels) Y /= 255. import deepgp import GPy num_latent = 2 num_hidden_2 = 5 m = deepgp.DeepGP([Y.shape[1],num_hidden_2,num_latent], Y, kernels=[GPy.kern.RBF(num_hidden_2,ARD=True), GPy.kern.RBF(num_latent,ARD=False)], num_inducing=50, back_constraint=False, encoder_dims=[[200],[200]]) m.obslayer.likelihood.variance[:] = Y.var()*0.01 for layer in m.layers: layer.kern.variance.fix(warning=False) layer.likelihood.variance.fix(warning=False) m.optimize(messages=False,max_iters=100) for layer in m.layers: layer.kern.variance.constrain_positive(warning=False) m.optimize(messages=False,max_iters=100) for layer in m.layers: layer.likelihood.variance.constrain_positive(warning=False) m.optimize(messages=True,max_iters=10000) import matplotlib.pyplot as plt from matplotlib import rc import teaching_plots as plot import mlai rc("font", **{'family':'sans-serif','sans-serif':['Helvetica'],'size':20}) fig, ax = plt.subplots(figsize=plot.big_figsize) for d in digits: ax.plot(m.layer_1.X.mean[labels==d,0],m.layer_1.X.mean[labels==d,1],'.',label=str(d)) _ = plt.legend() mlai.write_figure(figure=fig, filename="../slides/diagrams/deepgp/usps-digits-latent.svg", transparent=True) m.obslayer.kern.lengthscale import matplotlib.pyplot as plt import mlai fig, ax = plt.subplots(figsize=plot.big_figsize) for i in range(5): for j in range(i): dims=[i, j] ax.cla() for d in digits: ax.plot(m.obslayer.X.mean[labels==d,dims[0]], m.obslayer.X.mean[labels==d,dims[1]], '.', label=str(d)) plt.legend() plt.xlabel('dimension ' + str(dims[0])) plt.ylabel('dimension ' + str(dims[1])) mlai.write_figure(figure=fig, filename="../slides/diagrams/deepgp/usps-digits-hidden-" + str(dims[0]) + '-' + str(dims[1]) + '.svg', transparent=True) rows = 10 cols = 20 t=np.linspace(-1, 1, rows*cols)[:, None] kern = GPy.kern.RBF(1,lengthscale=0.05) cov = kern.K(t, t) x = np.random.multivariate_normal(np.zeros(rows*cols), cov, num_latent).T import matplotlib.pyplot as plt import mlai yt = m.predict(x) fig, axs = plt.subplots(rows,cols,figsize=(10,6)) for i in range(rows): for j in range(cols): #v = np.random.normal(loc=yt[0][i*cols+j, :], scale=np.sqrt(yt[1][i*cols+j, :])) v = yt[0][i*cols+j, :] axs[i,j].imshow(v.reshape(28,28), cmap='gray', interpolation='none', aspect='equal') axs[i,j].set_axis_off() mlai.write_figure(figure=fig, filename="../slides/diagrams/deepgp/digit-samples-deep-gp.svg", transparent=True)