import numpy as np
import GPy
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
from pylab import *
Let's first generate some synthetic data.
# Let's make some synthetic data
x = np.linspace(0.,2*np.pi,100)[:,None]
y = -cos(x)+np.random.randn(*x.shape)*0.3+1
_ = plot(x,y,'.')
Let us Make a GP Regression model and give some general prior distributions to model parameters.
# Make a GP regression model
m = GPy.models.GPRegression(x,y)
# Give some general prior distributions for model parameters
m.kern.lengthscale.set_prior(GPy.priors.Gamma.from_EV(1.,10.))
m.kern.variance.set_prior(GPy.priors.Gamma.from_EV(1.,10.))
m.likelihood.variance.set_prior(GPy.priors.Gamma.from_EV(1.,10.))
_=m.plot()
WARNING: reconstraining parameters GP_regression.rbf.lengthscale WARNING: reconstraining parameters GP_regression.rbf.variance WARNING: reconstraining parameters GP_regression.Gaussian_noise.variance
Draw 1000 samples from the GP model
hmc = GPy.inference.mcmc.HMC(m,stepsize=5e-2)
s = hmc.sample(num_samples=1000) # Burnin
s = hmc.sample(num_samples=1000)
Plot the samples:
plot(s)
[<matplotlib.lines.Line2D at 0x7f786dbb8f90>, <matplotlib.lines.Line2D at 0x7f786dbb8e10>, <matplotlib.lines.Line2D at 0x7f786dbb8990>]
Plot the posterior marginal distribution of model parameters:
labels = ['kern variance', 'kern lengthscale','noise variance']
samples = s[300:] # cut out the burn-in period
from scipy import stats
xmin = samples.min()
xmax = samples.max()
xs = np.linspace(xmin,xmax,100)
for i in xrange(samples.shape[1]):
kernel = stats.gaussian_kde(samples[:,i])
plot(xs,kernel(xs),label=labels[i])
_ = legend()
Plot the model parameters (lengthscale, variance and noise variance) against each other:
fig = figure(figsize=(14,4))
ax = fig.add_subplot(131)
_=ax.plot(samples[:,0],samples[:,1],'.')
ax.set_xlabel(labels[0]); ax.set_ylabel(labels[1])
ax = fig.add_subplot(132)
_=ax.plot(samples[:,1],samples[:,2],'.')
ax.set_xlabel(labels[1]); ax.set_ylabel(labels[2])
ax = fig.add_subplot(133)
_=ax.plot(samples[:,0],samples[:,2],'.')
ax.set_xlabel(labels[0]); ax.set_ylabel(labels[2])
<matplotlib.text.Text at 0x7f785f2c5c10>
By setting the model parameters to the posterior mean, we can visualize the model fit:
# Set the model parameters as the posterior mean
m.kern.variance[:] = samples[:,0].mean()
m.kern.lengthscale[:] = samples[:,1].mean()
m.likelihood.variance[:] = samples[:,2].mean()
print m
_=m.plot()
Name : GP regression Objective : 39.2970574179 Number of Parameters : 3 Number of Optimization Parameters : 3 Updates : True Parameters: GP_regression. | value | constraints | priors rbf.variance | 1.41818926221 | +ve | Ga(0.1, 0.1) rbf.lengthscale | 1.53693651461 | +ve | Ga(0.1, 0.1) Gaussian_noise.variance | 0.0920819762721 | +ve | Ga(0.1, 0.1)
Given some new observations, inferring the posterior distribution of the corresponding inputs is difficult, because it can lead to multi-modal distributions.
Assume we have a new observation $1.5$, and try to infer its input distribution.
y_new = np.array([1.5])[:,None]
Generate the inference model for the new observations. X_new are the MAP estimations by optimizing the log likelihood. As plotted with a red dot, the MAP estimation corresponds to only one of the modes.
x_new,mi = m.infer_newX(y_new)
print mi
m.plot()
plot(x_new,y_new,'or')
Name : inferenceX Objective : -12.192425512 Number of Parameters : 1 Number of Optimization Parameters : 1 Updates : True Parameters: inferenceX. | value | constraints | priors latent mean | [ 2.11646619] | |
[<matplotlib.lines.Line2D at 0x7f786dbdc510>]
Draw 10,000 samples from the inference model:
hmc_new = GPy.inference.mcmc.HMC(mi,stepsize=2e-1)
s_new = hmc_new.sample(num_samples=10000,hmc_iters=10)
Plot the samples:
_ = plot(s_new[:,:])
Plot the marginal distribution of inferred inputs. The two modes of inputs are clearly visible from the sampled posterior distribution.
from scipy import stats
samples_new = s_new[:]
xmin = samples_new.min()
xmax = samples_new.max()
xs = np.linspace(xmin,xmax,100)
for i in xrange(samples_new.shape[1]):
kernel = stats.gaussian_kde(samples_new[:,i])
plot(xs,kernel(xs))
m = GPy.examples.regression.olympic_marathon_men()
#
#set prior for lengthscale and variance.
m.kern.variance.set_prior(GPy.priors.Gamma.from_EV(25.,150.))
m.kern.lengthscale.set_prior(GPy.priors.Gamma.from_EV(120.,2000.))
print m
WARNING: reconstraining parameters GP_regression.rbf.variance WARNING: reconstraining parameters GP_regression.rbf.lengthscale Name : GP regression Objective : 15.5939533439 Number of Parameters : 3 Number of Optimization Parameters : 3 Updates : True Parameters: GP_regression. | value | constraints | priors rbf.variance | 25.3995048123 | +ve | Ga(4.2, 0.17) rbf.lengthscale | 152.045313268 | +ve | Ga(7.2, 0.06) Gaussian_noise.variance | 0.0485064747583 | +ve |
# initialise hmc
hmc = GPy.inference.mcmc.HMC(m,stepsize=2e-1)
# run hmc
t = hmc.sample(num_samples=20000,hmc_iters=20)
# Sample parameters
#hmc = GPy.inference.optimization.HMC(m, stepsize=5e-1)
#t = hmc.sample(m_iters=50000,hmc_iters=20)
_=plot(t)
print t.mean(axis=0)
print t.std(axis=0)
_=hist(t[:,:2],50)
[ 2.34246741e+01 1.25455721e+02 5.32988933e-02] [ 1.06955975e+01 3.11140579e+01 1.72613018e-02]
Using Seaborn for plotting distributions over Hyperparameters:
import seaborn as sns, pandas as pd
plt.rcParams['text.usetex'] = False
df = pd.DataFrame(t, columns=m.parameter_names_flat())
ax = sns.kdeplot(df['rbf.variance'],
color="b", shade=True, shade_lowest=False)
ax = sns.kdeplot(df['rbf.lengthscale'],
color="r", shade=True, shade_lowest=False)
sns.set(style="white", color_codes=True)
_ = sns.jointplot(data=df, x='rbf.variance', y='rbf.lengthscale', kind="hex",
marginal_kws=dict(kde=True, hist=True, kde_kws=dict(shade=False)),
stat_func=None
)
df
rbf.variance | rbf.lengthscale | Gaussian_noise.variance | |
---|---|---|---|
0 | 25.698306 | 156.070699 | 0.059547 |
1 | 18.743578 | 156.595044 | 0.074173 |
2 | 13.887465 | 152.355175 | 0.069016 |
3 | 14.155445 | 151.787650 | 0.081634 |
4 | 20.421911 | 154.500416 | 0.063637 |
5 | 30.195630 | 152.830979 | 0.072222 |
6 | 26.109314 | 157.151448 | 0.055252 |
7 | 20.467120 | 156.583988 | 0.052092 |
8 | 30.400506 | 152.062560 | 0.064315 |
9 | 28.352966 | 152.437746 | 0.055480 |
10 | 29.774243 | 153.919031 | 0.035826 |
11 | 33.207275 | 151.824560 | 0.037272 |
12 | 32.003007 | 145.061784 | 0.057754 |
13 | 26.803550 | 145.314209 | 0.055612 |
14 | 25.152094 | 138.583572 | 0.040753 |
15 | 27.634485 | 132.632704 | 0.049969 |
16 | 29.605808 | 137.675906 | 0.036453 |
17 | 34.438591 | 136.449935 | 0.026122 |
18 | 35.887368 | 136.237108 | 0.028379 |
19 | 35.887368 | 136.237108 | 0.028379 |
20 | 43.910838 | 133.445562 | 0.026832 |
21 | 43.485700 | 139.080476 | 0.033924 |
22 | 48.157112 | 140.288812 | 0.044917 |
23 | 53.635800 | 130.723554 | 0.033170 |
24 | 53.635800 | 130.723554 | 0.033170 |
25 | 57.156723 | 121.930448 | 0.027956 |
26 | 53.572664 | 124.038815 | 0.036399 |
27 | 49.700571 | 130.319728 | 0.045346 |
28 | 48.870969 | 127.856158 | 0.034672 |
29 | 47.474023 | 127.517905 | 0.053423 |
... | ... | ... | ... |
19970 | 13.297307 | 109.510459 | 0.040657 |
19971 | 13.297307 | 109.510459 | 0.040657 |
19972 | 13.297307 | 109.510459 | 0.040657 |
19973 | 17.929041 | 112.442954 | 0.059544 |
19974 | 24.739505 | 113.709543 | 0.040424 |
19975 | 24.341697 | 116.804279 | 0.045263 |
19976 | 19.742260 | 121.770468 | 0.046144 |
19977 | 25.370468 | 119.295514 | 0.050279 |
19978 | 22.303060 | 126.546828 | 0.038183 |
19979 | 18.741118 | 120.461284 | 0.033619 |
19980 | 13.677802 | 126.895327 | 0.039083 |
19981 | 14.740390 | 122.796441 | 0.041132 |
19982 | 10.440311 | 124.437531 | 0.034105 |
19983 | 18.213002 | 129.423301 | 0.043406 |
19984 | 17.618734 | 125.726755 | 0.032402 |
19985 | 19.329991 | 127.194190 | 0.050174 |
19986 | 18.250197 | 126.106561 | 0.054068 |
19987 | 15.832616 | 125.425730 | 0.055303 |
19988 | 8.600004 | 122.485631 | 0.054096 |
19989 | 13.351543 | 121.282395 | 0.062515 |
19990 | 17.307096 | 121.825491 | 0.039614 |
19991 | 15.615636 | 122.234168 | 0.043671 |
19992 | 15.407677 | 121.591582 | 0.048710 |
19993 | 15.772050 | 126.809752 | 0.065785 |
19994 | 11.103071 | 131.416994 | 0.087163 |
19995 | 13.936923 | 128.758211 | 0.085912 |
19996 | 14.504106 | 125.278166 | 0.099385 |
19997 | 13.472985 | 128.396493 | 0.083822 |
19998 | 12.813494 | 127.803197 | 0.086454 |
19999 | 10.234419 | 123.825535 | 0.078223 |
20000 rows × 3 columns