import pods
from ipywidgets import IntSlider
pods.notebook.display_plots('ai-bd-dm-dl-ml-google-trends{sample:0>3}.svg',
'../slides/diagrams/data-science/', sample=IntSlider(1, 1, 4, 1))
import pods
from ipywidgets import IntSlider
pods.notebook.display_plots('bd-ds-iot-ml-google-trends{sample:0>3}.svg',
'../slides/diagrams/data-science/', sample=IntSlider(1, 1, 4, 1))
import pods
pods.notebook.display_plots('pinball{sample:0>3}.svg',
'../slides/diagrams', sample=(1,2))
import numpy as np
%load -s compute_kernel mlai.py
%load -s exponentiated_quadratic mlai.py
import pods
from ipywidgets import IntSlider
pods.notebook.display_plots('gp_rejection_sample{sample:0>3}.svg',
directory='../slides/diagrams/gp',
sample=IntSlider(1,1,5,1))
### Olympic Marathon Data
The first thing we will do is load a standard data set for regression modelling. The data consists of the pace of Olympic Gold Medal Marathon winners for the Olympics from 1896 to present. First we load in the data and plot.
### Olympic Marathon Data
- Gold medal times for Olympic Marathon since 1896.
- Marathons before 1924 didn’t have a standardised distance.
- Present results using pace per km.
- In 1904 Marathon was badly organised leading to very slow times.
|
![image](../slides/diagrams/Stephen_Kiprotich.jpg)
Image from Wikimedia Commons
|
Things to notice about the data include the outlier in 1904, in this year, the olympics was in St Louis, USA. Organizational problems and challenges with dust kicked up by the cars following the race meant that participants got lost, and only very few participants completed.
More recent years see more consistently quick marathons.
Our first objective will be to perform a Gaussian process fit to the data, we'll do this using the [GPy software](https://github.com/SheffieldML/GPy).
```{.python}
m_full = GPy.models.GPRegression(x,yhat)
_ = m_full.optimize() # Optimize parameters of covariance function
m_full.optimize()
y_mean, y_var = m_full.predict(xt)
xt = np.linspace(1870,2030,200)[:,np.newaxis]
yt_mean, yt_var = m_full.predict(xt)
yt_sd=np.sqrt(yt_var)
import teaching_plots as plot
x_clean=np.vstack((x[0:2, :], x[3:, :]))
y_clean=np.vstack((y[0:2, :], y[3:, :]))
m_clean = GPy.models.GPRegression(x_clean,y_clean)
_ = m_clean.optimize()
import GPy
import deepgp
hidden = 1
m = deepgp.DeepGP([y.shape[1],hidden,x.shape[1]],Y=yhat, X=x, inits=['PCA','PCA'],
kernels=[GPy.kern.RBF(hidden,ARD=True),
GPy.kern.RBF(x.shape[1],ARD=True)], # the kernels for each layer
num_inducing=50, back_constraint=False)
import numpy as np
import GPy
import deepgp
def initialize(self, noise_factor=0.01, linear_factor=1):
"""Helper function for deep model initialization."""
self.obslayer.likelihood.variance = self.Y.var()*noise_factor
for layer in self.layers:
if type(layer.X) is GPy.core.parameterization.variational.NormalPosterior:
if layer.kern.ARD:
var = layer.X.mean.var(0)
else:
var = layer.X.mean.var()
else:
if layer.kern.ARD:
var = layer.X.var(0)
else:
var = layer.X.var()
# Average 0.5 upcrossings in four standard deviations.
layer.kern.lengthscale = linear_factor*np.sqrt(layer.kern.input_dim)*2*4*np.sqrt(var)/(2*np.pi)
# Bind the new method to the Deep GP object.
deepgp.DeepGP.initialize=initialize
# Call the initalization
m.initialize()
m.optimize(messages=False,max_iters=100)
for layer in m.layers:
pass #layer.kern.variance.constrain_positive(warning=False)
m.obslayer.kern.variance.constrain_positive(warning=False)
m.optimize(messages=False,max_iters=100)
for layer in m.layers:
layer.likelihood.variance.constrain_positive(warning=False)
m.optimize(messages=True,max_iters=10000)
def staged_optimize(self, iters=(1000,1000,10000), messages=(False, False, True)):
"""Optimize with parameters constrained and then with parameters released"""
for layer in self.layers:
# Fix the scale of each of the covariance functions.
layer.kern.variance.fix(warning=False)
layer.kern.lengthscale.fix(warning=False)
# Fix the variance of the noise in each layer.
layer.likelihood.variance.fix(warning=False)
self.optimize(messages=messages[0],max_iters=iters[0])
for layer in self.layers:
layer.kern.lengthscale.constrain_positive(warning=False)
self.obslayer.kern.variance.constrain_positive(warning=False)
self.optimize(messages=messages[1],max_iters=iters[1])
for layer in self.layers:
layer.kern.variance.constrain_positive(warning=False)
layer.likelihood.variance.constrain_positive(warning=False)
self.optimize(messages=messages[2],max_iters=iters[2])
# Bind the new method to the Deep GP object.
deepgp.DeepGP.staged_optimize=staged_optimize
m.staged_optimize(messages=(True,True,True))
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
plot.model_output(m, scale=scale, offset=offset, ax=ax, xlabel='year', ylabel='pace min/km',
fontsize=20, portion=0.2)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
mlai.write_figure(figure=fig, filename='../slides/diagrams/deepgp/olympic-marathon-deep-gp.svg',
transparent=True, frameon=True)
def posterior_sample(self, X, **kwargs):
"""Give a sample from the posterior of the deep GP."""
Z = X
for i, layer in enumerate(reversed(self.layers)):
Z = layer.posterior_samples(Z, size=1, **kwargs)[:, :, 0]
return Z
deepgp.DeepGP.posterior_sample = posterior_sample
def visualize(self, scale=1.0, offset=0.0, xlabel='input', ylabel='output',
xlim=None, ylim=None, fontsize=20, portion=0.2,dataset=None,
diagrams='../diagrams'):
"""Visualize the layers in a deep GP with one-d input and output."""
depth = len(self.layers)
if dataset is None:
fname = 'deep-gp-layer'
else:
fname = dataset + '-deep-gp-layer'
filename = os.path.join(diagrams, fname)
last_name = xlabel
last_x = self.X
for i, layer in enumerate(reversed(self.layers)):
if i>0:
plt.plot(last_x, layer.X.mean, 'r.',markersize=10)
last_x=layer.X.mean
ax=plt.gca()
name = 'layer ' + str(i)
plt.xlabel(last_name, fontsize=fontsize)
plt.ylabel(name, fontsize=fontsize)
last_name=name
mlai.write_figure(filename=filename + '-' + str(i-1) + '.svg',
transparent=True, frameon=True)
if i==0 and xlim is not None:
xt = plot.pred_range(np.array(xlim), portion=0.0)
elif i>0:
xt = plot.pred_range(np.array(next_lim), portion=0.0)
else:
xt = plot.pred_range(last_x, portion=portion)
yt_mean, yt_var = layer.predict(xt)
if layer==self.obslayer:
yt_mean = yt_mean*scale + offset
yt_var *= scale*scale
yt_sd = np.sqrt(yt_var)
gpplot(xt,yt_mean,yt_mean-2*yt_sd,yt_mean+2*yt_sd)
ax = plt.gca()
if i>0:
ax.set_xlim(next_lim)
elif xlim is not None:
ax.set_xlim(xlim)
next_lim = plt.gca().get_ylim()
plt.plot(last_x, self.Y*scale + offset, 'r.',markersize=10)
plt.xlabel(last_name, fontsize=fontsize)
plt.ylabel(ylabel, fontsize=fontsize)
mlai.write_figure(filename=filename + '-' + str(i) + '.svg',
transparent=True, frameon=True)
if ylim is not None:
ax=plt.gca()
ax.set_ylim(ylim)
# Bind the new method to the Deep GP object.
deepgp.DeepGP.visualize=visualize
pods.notebook.display_plots('olympic-marathon-deep-gp-layer-{sample:0>1}.svg',
'../slides/diagrams/deepgp', sample=(0,1))
def scale_data(x, portion):
scale = (x.max()-x.min())/(1-2*portion)
offset = x.min() - portion*scale
return (x-offset)/scale, scale, offset
def visualize_pinball(self, ax=None, scale=1.0, offset=0.0, xlabel='input', ylabel='output',
xlim=None, ylim=None, fontsize=20, portion=0.2, points=50, vertical=True):
"""Visualize the layers in a deep GP with one-d input and output."""
if ax is None:
fig, ax = plt.subplots(figsize=plot.big_wide_figsize)
depth = len(self.layers)
last_name = xlabel
last_x = self.X
# Recover input and output scales from output plot
plot_model_output(self, scale=scale, offset=offset, ax=ax,
xlabel=xlabel, ylabel=ylabel,
fontsize=fontsize, portion=portion)
xlim=ax.get_xlim()
xticks=ax.get_xticks()
xtick_labels=ax.get_xticklabels().copy()
ylim=ax.get_ylim()
yticks=ax.get_yticks()
ytick_labels=ax.get_yticklabels().copy()
# Clear axes and start again
ax.cla()
if vertical:
ax.set_xlim((0, 1))
ax.invert_yaxis()
ax.set_ylim((depth, 0))
else:
ax.set_ylim((0, 1))
ax.set_xlim((0, depth))
ax.set_axis_off()#frame_on(False)
def pinball(x, y, y_std, color_scale=None,
layer=0, depth=1, ax=None,
alpha=1.0, portion=0.0, vertical=True):
scaledx, xscale, xoffset = scale_data(x, portion=portion)
scaledy, yscale, yoffset = scale_data(y, portion=portion)
y_std /= yscale
# Check whether data is anti-correlated on output
if np.dot((scaledx-0.5).T, (scaledy-0.5))<0:
scaledy=1-scaledy
flip=-1
else:
flip=1
if color_scale is not None:
color_scale, _, _=scale_data(color_scale, portion=0)
scaledy = (1-alpha)*scaledx + alpha*scaledy
def color_value(x, cmap=None, width=None, centers=None):
"""Return color as a function of position along x axis"""
if cmap is None:
cmap = np.asarray([[1, 0, 0], [1, 1, 0], [0, 1, 0]])
ncenters = cmap.shape[0]
if centers is None:
centers = np.linspace(0+0.5/ncenters, 1-0.5/ncenters, ncenters)
if width is None:
width = 0.25/ncenters
r = (x-centers)/width
weights = np.exp(-0.5*r*r).flatten()
weights /=weights.sum()
weights = weights[:, np.newaxis]
return np.dot(cmap.T, weights).flatten()
for i in range(x.shape[0]):
if color_scale is not None:
color = color_value(color_scale[i])
else:
color=(1, 0, 0)
x_plot = np.asarray((scaledx[i], scaledy[i])).flatten()
y_plot = np.asarray((layer, layer+alpha)).flatten()
if vertical:
ax.plot(x_plot, y_plot, color=color, alpha=0.5, linewidth=3)
ax.plot(x_plot, y_plot, color='k', alpha=0.5, linewidth=0.5)
else:
ax.plot(y_plot, x_plot, color=color, alpha=0.5, linewidth=3)
ax.plot(y_plot, x_plot, color='k', alpha=0.5, linewidth=0.5)
# Plot error bars that increase as sqrt of distance from start.
std_points = 50
stdy = np.linspace(0, alpha,std_points)
stdx = np.sqrt(stdy)*y_std[i]
stdy += layer
mean_vals = np.linspace(scaledx[i], scaledy[i], std_points)
upper = mean_vals+stdx
lower = mean_vals-stdx
fillcolor=color
x_errorbars=np.hstack((upper,lower[::-1]))
y_errorbars=np.hstack((stdy,stdy[::-1]))
if vertical:
ax.fill(x_errorbars,y_errorbars,
color=fillcolor, alpha=0.1)
ax.plot(scaledy[i], layer+alpha, '.',markersize=10, color=color, alpha=0.5)
else:
ax.fill(y_errorbars,x_errorbars,
color=fillcolor, alpha=0.1)
ax.plot(layer+alpha, scaledy[i], '.',markersize=10, color=color, alpha=0.5)
# Marker to show end point
return flip
# Whether final axis is flipped
flip = 1
first_x=last_x
for i, layer in enumerate(reversed(self.layers)):
if i==0:
xt = plot.pred_range(last_x, portion=portion, points=points)
color_scale=xt
yt_mean, yt_var = layer.predict(xt)
if layer==self.obslayer:
yt_mean = yt_mean*scale + offset
yt_var *= scale*scale
yt_sd = np.sqrt(yt_var)
flip = flip*pinball(xt,yt_mean,yt_sd,color_scale,portion=portion,
layer=i, ax=ax, depth=depth,vertical=vertical)#yt_mean-2*yt_sd,yt_mean+2*yt_sd)
xt = yt_mean
# Make room for axis labels
if vertical:
ax.set_ylim((2.1, -0.1))
ax.set_xlim((-0.02, 1.02))
ax.set_yticks(range(depth,0,-1))
else:
ax.set_xlim((-0.1, 2.1))
ax.set_ylim((-0.02, 1.02))
ax.set_xticks(range(0, depth))
def draw_axis(ax, scale=1.0, offset=0.0, level=0.0, flip=1,
label=None,up=False, nticks=10, ticklength=0.05,
vertical=True,
fontsize=20):
def clean_gap(gap):
nsf = np.log10(gap)
if nsf>0:
nsf = np.ceil(nsf)
else:
nsf = np.floor(nsf)
lower_gaps = np.asarray([0.005, 0.01, 0.02, 0.03, 0.04, 0.05,
0.1, 0.25, 0.5,
1, 1.5, 2, 2.5, 3, 4, 5, 10, 25, 50, 100])
upper_gaps = np.asarray([1, 2, 3, 4, 5, 10])
if nsf >2 or nsf<-2:
d = np.abs(gap-upper_gaps*10**nsf)
ind = np.argmin(d)
return upper_gaps[ind]*10**nsf
else:
d = np.abs(gap-lower_gaps)
ind = np.argmin(d)
return lower_gaps[ind]
tickgap = clean_gap(scale/(nticks-1))
nticks = int(scale/tickgap) + 1
tickstart = np.round(offset/tickgap)*tickgap
ticklabels = np.asarray(range(0, nticks))*tickgap + tickstart
ticks = (ticklabels-offset)/scale
axargs = {'color':'k', 'linewidth':1}
if not up:
ticklength=-ticklength
tickspot = np.linspace(0, 1, nticks)
if flip < 0:
ticks = 1-ticks
for tick, ticklabel in zip(ticks, ticklabels):
if vertical:
ax.plot([tick, tick], [level, level-ticklength], **axargs)
ax.text(tick, level-ticklength*2, ticklabel, horizontalalignment='center',
fontsize=fontsize/2)
ax.text(0.5, level-5*ticklength, label, horizontalalignment='center', fontsize=fontsize)
else:
ax.plot([level, level-ticklength], [tick, tick], **axargs)
ax.text(level-ticklength*2, tick, ticklabel, horizontalalignment='center',
fontsize=fontsize/2)
ax.text(level-5*ticklength, 0.5, label, horizontalalignment='center', fontsize=fontsize)
if vertical:
xlim = list(ax.get_xlim())
if ticks.min()xlim[1]:
xlim[1] = ticks.max()
ax.set_xlim(xlim)
ax.plot([ticks.min(), ticks.max()], [level, level], **axargs)
else:
ylim = list(ax.get_ylim())
if ticks.min()ylim[1]:
ylim[1] = ticks.max()
ax.set_ylim(ylim)
ax.plot([level, level], [ticks.min(), ticks.max()], **axargs)
_, xscale, xoffset = scale_data(first_x, portion=portion)
_, yscale, yoffset = scale_data(yt_mean, portion=portion)
draw_axis(ax=ax, scale=xscale, offset=xoffset, level=0.0, label=xlabel,
up=True, vertical=vertical)
draw_axis(ax=ax, scale=yscale, offset=yoffset,
flip=flip, level=depth, label=ylabel, up=False, vertical=vertical)
#for txt in xticklabels:
# txt.set
# Bind the new method to the Deep GP object.
deepgp.DeepGP.visualize_pinball=visualize_pinball
from IPython.lib.display import YouTubeVideo
YouTubeVideo('1y2UKz47gew')
import gym
env = gym.make('MountainCarContinuous-v0')
import mountain_car as mc
import GPyOpt
obj_func = lambda x: mc.run_simulation(env, x)[0]
objective = GPyOpt.core.task.SingleObjective(obj_func)
## --- We define the input space of the emulator
space= [{'name':'postion_parameter', 'type':'continuous', 'domain':(-1.2, +1)},
{'name':'velocity_parameter', 'type':'continuous', 'domain':(-1/0.07, +1/0.07)},
{'name':'constant', 'type':'continuous', 'domain':(-1, +1)}]
design_space = GPyOpt.Design_space(space=space)
model = GPyOpt.models.GPModel(optimize_restarts=5, verbose=False, exact_feval=True, ARD=True)
aquisition_optimizer = GPyOpt.optimization.AcquisitionOptimizer(design_space)
acquisition = GPyOpt.acquisitions.AcquisitionEI(model, design_space, optimizer=aquisition_optimizer)
evaluator = GPyOpt.core.evaluators.Sequential(acquisition) # Collect points sequentially, no parallelization.
from GPyOpt.experiment_design.random_design import RandomDesign
n_initial_points = 25
random_design = RandomDesign(design_space)
initial_design = random_design.get_samples(n_initial_points)
import numpy as np
random_controller = initial_design[0,:]
_, _, _, frames = mc.run_simulation(env, np.atleast_2d(random_controller), render=True)
anim=mc.animate_frames(frames, 'Random linear controller')
from IPython.core.display import HTML
HTML(anim.to_jshtml())
max_iter = 50
bo = GPyOpt.methods.ModularBayesianOptimization(model, design_space, objective, acquisition, evaluator, initial_design)
bo.run_optimization(max_iter = max_iter )
_, _, _, frames = mc.run_simulation(env, np.atleast_2d(bo.x_opt), render=True)
anim=mc.animate_frames(frames, 'Best controller after 50 iterations of Bayesian optimization')
HTML(anim.to_jshtml())
import gym
env = gym.make('MountainCarContinuous-v0')
import GPyOpt
space_dynamics = [{'name':'position', 'type':'continuous', 'domain':[-1.2, +0.6]},
{'name':'velocity', 'type':'continuous', 'domain':[-0.07, +0.07]},
{'name':'action', 'type':'continuous', 'domain':[-1, +1]}]
design_space_dynamics = GPyOpt.Design_space(space=space_dynamics)
position_model = GPyOpt.models.GPModel(optimize_restarts=5, verbose=False, exact_feval=True, ARD=True)
velocity_model = GPyOpt.models.GPModel(optimize_restarts=5, verbose=False, exact_feval=True, ARD=True)
import numpy as np
from GPyOpt.experiment_design.random_design import RandomDesign
import mountain_car as mc
### --- Random locations of the inputs
n_initial_points = 500
random_design_dynamics = RandomDesign(design_space_dynamics)
initial_design_dynamics = random_design_dynamics.get_samples(n_initial_points)
### --- Simulation of the (normalized) outputs
y = np.zeros((initial_design_dynamics.shape[0], 2))
for i in range(initial_design_dynamics.shape[0]):
y[i, :] = mc.simulation(initial_design_dynamics[i, :])
# Normalize the data from the simulation
y_normalisation = np.std(y, axis=0)
y_normalised = y/y_normalisation
position_model.updateModel(initial_design_dynamics, y[:, [0]], None, None)
velocity_model.updateModel(initial_design_dynamics, y[:, [1]], None, None)
from IPython.html.widgets import interact
controller_gains = np.atleast_2d([0, .6, 1]) # change the valus of the linear controller to observe the trayectories.
### --- Optimize control parameters with emulator
car_initial_location = np.asarray([-0.58912799, 0])
### --- Reward objective function using the emulator
obj_func_emulator = lambda x: mc.run_emulation([position_model, velocity_model], x, car_initial_location)[0]
objective_emulator = GPyOpt.core.task.SingleObjective(obj_func_emulator)
### --- Elements of the optimization that will use the multi-fidelity emulator
model = GPyOpt.models.GPModel(optimize_restarts=5, verbose=False, exact_feval=True, ARD=True)
space= [{'name':'linear_1', 'type':'continuous', 'domain':(-1/1.2, +1)},
{'name':'linear_2', 'type':'continuous', 'domain':(-1/0.07, +1/0.07)},
{'name':'constant', 'type':'continuous', 'domain':(-1, +1)}]
design_space = GPyOpt.Design_space(space=space)
aquisition_optimizer = GPyOpt.optimization.AcquisitionOptimizer(design_space)
random_design = RandomDesign(design_space)
initial_design = random_design.get_samples(25)
acquisition = GPyOpt.acquisitions.AcquisitionEI(model, design_space, optimizer=aquisition_optimizer)
evaluator = GPyOpt.core.evaluators.Sequential(acquisition)
bo_emulator = GPyOpt.methods.ModularBayesianOptimization(model, design_space, objective_emulator, acquisition, evaluator, initial_design)
bo_emulator.run_optimization(max_iter=50)
_, _, _, frames = mc.run_simulation(env, np.atleast_2d(bo_emulator.x_opt), render=True)
anim=mc.animate_frames(frames, 'Best controller using the emulator of the dynamics')
from IPython.core.display import HTML
HTML(anim.to_jshtml())
import gym
env = gym.make('MountainCarContinuous-v0')
import GPyOpt
### --- Collect points from low and high fidelity simulator --- ###
space = GPyOpt.Design_space([
{'name':'position', 'type':'continuous', 'domain':(-1.2, +1)},
{'name':'velocity', 'type':'continuous', 'domain':(-0.07, +0.07)},
{'name':'action', 'type':'continuous', 'domain':(-1, +1)}])
n_points = 250
random_design = GPyOpt.experiment_design.RandomDesign(space)
x_random = random_design.get_samples(n_points)
import numpy as np
import mountain_car as mc
d_position_hf = np.zeros((n_points, 1))
d_velocity_hf = np.zeros((n_points, 1))
d_position_lf = np.zeros((n_points, 1))
d_velocity_lf = np.zeros((n_points, 1))
# --- Collect high fidelity points
for i in range(0, n_points):
d_position_hf[i], d_velocity_hf[i] = mc.simulation(x_random[i, :])
# --- Collect low fidelity points
for i in range(0, n_points):
d_position_lf[i], d_velocity_lf[i] = mc.low_cost_simulation(x_random[i, :])
### --- Optimize controller parameters
obj_func = lambda x: mc.run_simulation(env, x)[0]
obj_func_emulator = lambda x: mc.run_emulation([position_model, velocity_model], x, car_initial_location)[0]
objective_multifidelity = GPyOpt.core.task.SingleObjective(obj_func)
from GPyOpt.experiment_design.random_design import RandomDesign
model = GPyOpt.models.GPModel(optimize_restarts=5, verbose=False, exact_feval=True, ARD=True)
space= [{'name':'linear_1', 'type':'continuous', 'domain':(-1/1.2, +1)},
{'name':'linear_2', 'type':'continuous', 'domain':(-1/0.07, +1/0.07)},
{'name':'constant', 'type':'continuous', 'domain':(-1, +1)}]
design_space = GPyOpt.Design_space(space=space)
aquisition_optimizer = GPyOpt.optimization.AcquisitionOptimizer(design_space)
n_initial_points = 25
random_design = RandomDesign(design_space)
initial_design = random_design.get_samples(n_initial_points)
acquisition = GPyOpt.acquisitions.AcquisitionEI(model, design_space, optimizer=aquisition_optimizer)
evaluator = GPyOpt.core.evaluators.Sequential(acquisition)
bo_multifidelity = GPyOpt.methods.ModularBayesianOptimization(model, design_space, objective_multifidelity, acquisition, evaluator, initial_design)
bo_multifidelity.run_optimization(max_iter=50)
_, _, _, frames = mc.run_simulation(env, np.atleast_2d(bo_multifidelity.x_opt), render=True)
anim=mc.animate_frames(frames, 'Best controller with multi-fidelity emulator')
from IPython.core.display import HTML
HTML(anim.to_jshtml())