import pyhf
import numpy as np
import matplotlib.pyplot as plt
import getpass
from matplotlib import rcParams
np.random.seed(0)
rcParams.update({'figure.autolayout': True})
Let's look at an example of a well seperated signal and background and see that pyhf makes lots of pseudoexperiments fast.
model = pyhf.simplemodels.hepdata_like([20], [50], [np.sqrt(50)])
signal_pars = model.config.suggested_init()
signal_pars[model.config.poi_index] = 1.0
bkg_pars = model.config.suggested_init()
bkg_pars[model.config.poi_index] = 0.0
signal_pdf = model.make_pdf(signal_pars)
bkg_pdf = model.make_pdf(bkg_pars)
sample_size = 1000000
sample_shape = (sample_size,)
sample = signal_pdf.sample(sample_shape)
fig, ax = plt.subplots(figsize=(5, 5))
ax.hist(
signal_pdf.log_prob(sample),
bins=40,
histtype="step",
linestyle="dashed",
label=r"$f\,(x_s|1)$",
)
ax.hist(
bkg_pdf.log_prob(sample),
bins=40,
histtype="step",
label=r"$f\,(x_s|0)$",
)
ax.semilogy()
ax.set_xlabel(r"$\ln f(x_s|\theta)$", fontsize=20)
ax.set_ylabel("Count", fontsize=20)
ax.legend(loc="best", fontsize=14);
Now, let's recreate Figure 5b of "Asymptotic formulae for likelihood-based tests of new physics" (arXiv:1007.1727). The pseudoexperiment are still fast, but the test statistic evaluation is slower.
model = pyhf.simplemodels.hepdata_like([6], [9], [np.sqrt(9)])
signal_pars = model.config.suggested_init()
signal_pars[model.config.poi_index] = 1.0
bkg_pars = model.config.suggested_init()
bkg_pars[model.config.poi_index] = 0.0
par_bounds = model.config.suggested_bounds()
signal_pdf = model.make_pdf(signal_pars)
bkg_pdf = model.make_pdf(bkg_pars)
# protect against limited resources on Binder
sample_size = 5000 if getpass.getuser() == "jovyan" else 10000
sample_shape = (sample_size,)
signal_sample = signal_pdf.sample(sample_shape)
bkg_sample = bkg_pdf.sample(sample_shape)
signal_qtilde = np.asarray(
[
pyhf.utils.qmu(1.0, sample, model, signal_pars, par_bounds)
for sample in signal_sample
]
)
bkg_qtilde = np.asarray(
[pyhf.utils.qmu(1.0, sample, model, bkg_pars, par_bounds) for sample in bkg_sample]
)
fig, ax = plt.subplots(figsize=(5, 5))
ax.hist(
signal_qtilde,
bins=np.linspace(0, 10, 26),
histtype="step",
density=True,
linestyle="dashed",
label=r"$f\,(\tilde{q}_1|1)$",
)
ax.hist(
bkg_qtilde,
bins=np.linspace(0, 10, 26),
histtype="step",
density=True,
label=r"$f\,(\tilde{q}_1|0)$",
)
ax.set_xlim(0, 9)
ax.set_ylim(1e-3, 2)
ax.semilogy()
ax.set_xlabel(r"$\tilde{q}_1$", fontsize=20)
ax.set_ylabel(r"$f\,(\tilde{q}_1|\theta)$", fontsize=20)
ax.legend(loc="best", fontsize=14);
fig.savefig("alternative_statistic.pdf")
fig.savefig("alternative_statistic.png")