from IPython.display import Image
import prettyplotlib as ppl
from prettyplotlib import plt
import numpy as np
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 'size': 22})
rc('xtick', labelsize=14)
rc('ytick', labelsize=14)
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
%matplotlib inline
Image('backbox_ml.png')
Image('openbox_pp.png')
Moreover...
from scipy import stats
# set every possibility to be equally possible
x_coin = np.linspace(0, 1, 101)
import prettyplotlib as ppl
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, xlabel=r'Hypothesis for chance of heads',
ylabel=r'Probability of hypothesis',
title=r'Prior probability distribution after no coin tosses')
ppl.plot(ax, x_coin, stats.beta(2, 2).pdf(x_coin), linewidth=3.)
ax.set_xticklabels([r'0\%', r'20\%', r'40\%', r'60\%', r'80\%', r'100\%']);
# fig.savefig('coin1.png')
[<matplotlib.text.Text at 0x48432d0>, <matplotlib.text.Text at 0x4841210>, <matplotlib.text.Text at 0x486e590>, <matplotlib.text.Text at 0x486ec50>, <matplotlib.text.Text at 0x4872350>, <matplotlib.text.Text at 0x4872a10>]
/home/wiecki/envs/pymc3/local/lib/python2.7/site-packages/IPython/core/formatters.py:239: FormatterWarning: Exception in image/png formatter: LaTeX was not able to process the following string: '0\\%' Here is the full report generated by LaTeX: This is pdfTeX, Version 3.1415926-2.5-1.40.14 (TeX Live 2013/Debian) restricted \write18 enabled. entering extended mode (./73e233a9842fdd7f7961a32eb5d9efbb.tex LaTeX2e <2011/06/27> Babel <3.9h> and hyphenation patterns for 2 languages loaded. (/usr/share/texlive/texmf-dist/tex/latex/base/article.cls Document Class: article 2007/10/19 v1.4h Standard LaTeX document class (/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo)) (/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty) (/usr/share/texlive/texmf-dist/tex/latex/psnfss/helvet.sty (/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)) (/usr/share/texlive/texmf-dist/tex/latex/psnfss/courier.sty) (/usr/share/texlive/texmf-dist/tex/latex/base/textcomp.sty (/usr/share/texlive/texmf-dist/tex/latex/base/ts1enc.def)) (/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifpdf.sty) (/usr/share/texlive/texmf-dist/tex/generic/oberdiek/ifvtex.sty) (/usr/share/texlive/texmf-dist/tex/generic/ifxetex/ifxetex.sty) Package geometry Warning: Over-specification in `h'-direction. `width' (5058.9pt) is ignored. Package geometry Warning: Over-specification in `v'-direction. `height' (5058.9pt) is ignored. ) No file 73e233a9842fdd7f7961a32eb5d9efbb.aux. (/usr/share/texlive/texmf-dist/tex/latex/base/ts1cmr.fd) (/usr/share/texlive/texmf-dist/tex/latex/psnfss/ot1pnc.fd) ! Font OT1/pnc/m/n/10=pncr7t at 10.0pt not loadable: Metric (TFM) file not foun d. <to be read again> relax l.11 \begin{document} *geometry* driver: auto-detecting *geometry* detected driver: dvips (/usr/share/texlive/texmf-dist/tex/latex/psnfss/ot1phv.fd) ! Font OT1/phv/m/n/14=phvr7t at 14.0pt not loadable: Metric (TFM) file not foun d. <to be read again> relax l.12 \fontsize{14.000000}{17.500000}{\sffamily 0\%} ! Font OT1/pnc/m/n/14=pncr7t at 14.0pt not loadable: Metric (TFM) file not foun d. <to be read again> relax l.13 \end{document} [1] (./73e233a9842fdd7f7961a32eb5d9efbb.aux) ) (see the transcript file for additional information) Output written on 73e233a9842fdd7f7961a32eb5d9efbb.dvi (1 page, 188 bytes). Transcript written on 73e233a9842fdd7f7961a32eb5d9efbb.log. FormatterWarning,
<matplotlib.figure.Figure at 0x41dd610>
stats.beta(2,2).rvs(1)
array([ 0.37833632])
import random
import numpy as np
def choose_coin():
return stats.beta(2, 2).rvs(1) # random.uniform(0,1) # np.random.normal(0,1) #
# pylab.hist([choose_coin() for dummy in range(100000)], normed=True, bins=100)
successes = []
returns = 0.
for i in range(10000):
prob_heads = choose_coin()
results = [random.uniform(0,1) < prob_heads for dummy in range(10)]
if results.count(True) == 9:
successes.append(prob_heads)
if random.uniform(0,1) < prob_heads:
returns += -1
else:
returns += 1
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111)
r = ax.hist(np.array(successes), normed=True, bins=20)
print len(successes)
print "Average return", returns / len(successes)
719 Average return -0.613351877608
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, xlabel='Hypothesis for chance of heads',
ylabel='Probability of hypothesis',
title='Posterior probability distribution after first heads')
ppl.plot(ax, x_coin, stats.beta(1, 1).pdf(x_coin)*stats.beta(90, 10).pdf(x_coin), linewidth=3.)
ax.set_xticklabels([r'0\%', r'20\%', r'40\%', r'60\%', r'80\%', r'100\%']);
plt.savefig('coin2.png')
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, xlabel='Hypothesis for chance of heads',
ylabel='Probability of hypothesis',
title='Posterior probability distribution after 1 head, 1 tail')
ppl.plot(ax, x_coin, stats.beta(3, 3).pdf(x_coin), linewidth=3.)
ax.set_xticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']);
fig.savefig('coin3.png')
Image('coin3.png')
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, xlabel='Hypothesis for chance of heads',
ylabel='Probability of hypothesis',
title='Posterior probability distribution after 20 heads and 20 tails')
ppl.plot(ax, x_coin, stats.beta(22, 22).pdf(x_coin), linewidth=3.)
ax.set_xticklabels(['0\%', '20\%', '40\%', '60\%', '80\%', '100\%']);
fig.savefig('coin4.png')
Image('coin4.png')
$\theta$: Parameters of model (chance of getting heads)).
from scipy import stats
fig = ppl.plt.figure(figsize=(14, 6))
ax1 = fig.add_subplot(121, title='What we want', ylim=(0, .5), xlabel=r'\theta', ylabel=r'P(\theta)')
ppl.plot(ax1, np.linspace(-4, 4, 100), stats.norm.pdf(np.linspace(-3, 3, 100)), linewidth=4.)
ax2 = fig.add_subplot(122, title='What we get', xlim=(-4, 4), ylim=(0, 1800), xlabel=r'\theta', ylabel='\# of samples')
ax2.hist(np.random.randn(10000), bins=20);
Image('wantget.png')
size = 200
true_intercept = 1
true_slope = 2
x = np.linspace(0, 1, size)
# y = a + b*x
true_regression_line = true_intercept + true_slope * x
# add noise
y = true_regression_line + np.random.normal(scale=.5, size=size)
data = dict(x=x, y=y)
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='Value of gold', ylabel='Value of gold miners', title='Synthetic data and underlying model')
ppl.scatter(ax, x, y, label='sampled data')
ppl.plot(ax, x, true_regression_line, label='true regression line', linewidth=4.)
ax.legend(loc=2);
fig.savefig('synth_data.png')
Image('synth_data.png')
with $$ \epsilon \sim \mathcal{N}(0, \sigma^2) $$
import pymc as pm
with pm.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
# Define priors
alpha = pm.Normal('alpha', mu=0, sd=20)
beta = pm.Normal('beta', mu=0, sd=20)
sigma = pm.Uniform('sigma', lower=0, upper=20)
# Define linear regression
y_est = alpha + beta * x
# Define likelihood
likelihood = pm.Normal('y', mu=y_est, sd=sigma, observed=y)
# Inference!
start = pm.find_MAP() # Find starting value by optimization
step = pm.NUTS(state=start) # Instantiate MCMC sampling algorithm
trace = pm.sample(2000, step, start=start, progressbar=False) # draw 2000 posterior samples using NUTS sampling
/Users/alexcoventry/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/theano/scan_module/scan_perform_ext.py:85: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility from scan_perform.scan_perform import *
with pm.Model() as model:
# specify glm and pass in data. The resulting linear model, its likelihood and
# and all its parameters are automatically added to our model.
pm.glm.glm('y ~ x', data)
step = pm.NUTS() # Instantiate MCMC sampling algorithm
trace = pm.sample(2000, step, progressbar=False) # draw 2000 posterior samples using NUTS sampling
fig = pm.traceplot(trace, lines={'alpha': 1, 'beta': 2, 'sigma': .5});
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, xlabel='Value of gold', ylabel='Value of gold miners', title='Posterior predictive regression lines')
ppl.scatter(ax, x, y, label='data')
from pymc import glm
glm.plot_posterior_predictive(trace, samples=100,
label='posterior predictive regression lines')
ppl.plot(ax, x, true_regression_line, label='true regression line', linewidth=5.)
ax.legend(loc=0);
fig.savefig('ppc1.png')