Figure 2.29: (left) Overall negative log probability for the original model and the model with learned guess probabilities. The lower red bar indicates that learning the guess probabilities gives a substantially better model, according to this metric. (right) Negative log probability for each skill, showing that the improvement varies from skill to skill.
log_density
of the model it is the negative log probability of the ground truth. For a participant with $skill_i$ the negative log probability is . where $truth_i$ is an indicator variable of having $skill_i$ and the probability of each skill is $p(skill_i)$ ~ $Bernoulli(\theta_i)$
Further details from the text:
A common metric to use is the probability of the ground truth values under the inferred distributions. Sometimes it is convenient to take the logarithm of the probability, since this gives a more manageable number when the probability is very small. When we use the logarithm of the probability, the metric is referred to as the log probability. So, if the inferred probability of a person having a particular skill is $p$, then the log probability is $log(p)$ if the person has the skill and $log(1−p)$ if they don’t. If the person does have the skill then the best possible prediction is $p=1.0$, which gives log probability of $log(1.0)=0$ (the logarithm of one is zero). A less confident prediction, such as $p=0.8$ will give a log probability with a negative value, in this case $log(0.8)=−0.097$. The worst possible prediction of $p=0.0$ gives a log probability of negative infinity. ...
import operator
from functools import reduce
from typing import Callable, Dict, List
import arviz as az
import jax
import jax.numpy as jnp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats
from jax.scipy.special import logsumexp
import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS, DiscreteHMCGibbs, log_likelihood
from numpyro.infer.util import log_density, potential_energy
%matplotlib inline
%reload_ext autoreload
%autoreload 2
%load_ext watermark
%watermark -v -m -p arviz,jax,matplotlib,numpy,pandas,scipy,numpyro
%watermark -gb
def neg_log_proba_score(theta: np.array, y_true: np.array):
"""
Calculates the the negative log probability of the ground truth, the self assessed skills.
:param theta np.array: array of beta probabilities
:param y_true np.array, dtype == int: array of indicator variables for skill of participants
"""
assert theta.shape == y_true.shape
assert np.issubdtype(y_true.dtype, np.integer)
score = scipy.stats.bernoulli(theta).pmf(y_true)
score[score == 0.0] = np.finfo(float).eps
return -np.log(score)
def plot_bars(
data: np.array,
columns: List[str],
index: List[str],
ax=None,
tick_step=0.5,
**kwargs,
):
if ax is None:
fig, ax = plt.subplots()
else:
fig = None
pd.DataFrame(data, columns=columns, index=index).plot(
kind="bar", color=["b", "r"], ax=ax, zorder=3, **kwargs
)
ax.grid(zorder=0, axis="y")
ax.yaxis.set_ticks(np.arange(0, data.max(), tick_step));
signature: log_likelihood(model, posterior_samples, *args, parallel=False, batch_ndims=1, **kwargs)
log_likelihood
is from the Example: Baseball Batting Averagedef log_ppd(
model: Callable,
posterior_samples: Dict,
*args,
parallel=False,
batch_ndims=1,
**kwargs
):
"""
Log pointwise predictive density
:param model Callable: Python callable containing Pyro primitives
:param posterior_samples Dict: dictionary of samples from the posterior.
:param args: model arguments
:param parallel bool: passed to `log_likelihood` from numpyro.infer
:param batch_ndims Union[0, 1, 2]: passed to `log_likelihood` from numpyro.infer, see `log_likelihood` for details
:param kwargs: model kwargs
"""
post_loglik = log_likelihood(
model,
posterior_samples,
*args,
parallel=parallel,
batch_ndims=batch_ndims,
**kwargs
)
post_loglik_res = np.concatenate(
[obs[:, None] for obs in post_loglik.values()], axis=1
)
exp_log_density = logsumexp(post_loglik_res, axis=0) - jnp.log(
jnp.shape(post_loglik_res)[0]
)
return exp_log_density
rng_key = jax.random.PRNGKey(2)
raw_data = pd.read_csv(
"http://www.mbmlbook.com/Downloads/LearningSkills_Real_Data_Experiments-Original-Inputs-RawResponsesAsDictionary.csv"
)
self_assessed = raw_data.iloc[1:, 1:8].copy()
self_assessed = self_assessed.astype(int)
skills_key = pd.read_csv(
"http://www.mbmlbook.com/Downloads/LearningSkills_Real_Data_Experiments-Original-Inputs-Quiz-SkillsQuestionsMask.csv",
header=None,
)
skills_needed = []
for index, row in skills_key.iterrows():
skills_needed.append([i for i, x in enumerate(row) if x])
responses = pd.read_csv(
"http://www.mbmlbook.com/Downloads/LearningSkills_Real_Data_Experiments-Original-Inputs-IsCorrect.csv",
header=None,
)
responses = responses.astype("int32")
def model_00(
graded_responses, skills_needed: List[List[int]], prob_mistake=0.1, prob_guess=0.2
):
n_questions, n_participants = graded_responses.shape
n_skills = max(map(max, skills_needed)) + 1
participants_plate = numpyro.plate("participants_plate", n_participants)
with participants_plate:
skills = []
for s in range(n_skills):
skills.append(numpyro.sample("skill_{}".format(s), dist.Bernoulli(0.5)))
for q in range(n_questions):
has_skills = reduce(operator.mul, [skills[i] for i in skills_needed[q]])
prob_correct = has_skills * (1 - prob_mistake) + (1 - has_skills) * prob_guess
isCorrect = numpyro.sample(
"isCorrect_{}".format(q),
dist.Bernoulli(prob_correct).to_event(1),
obs=graded_responses[q],
)
nuts_kernel = NUTS(model_00)
kernel = DiscreteHMCGibbs(nuts_kernel, modified=True)
mcmc_00 = MCMC(
kernel, num_warmup=200, num_samples=1000, num_chains=4, jit_model_args=False
)
mcmc_00.run(
rng_key,
jnp.array(responses),
skills_needed,
extra_fields=(
"z",
"hmc_state.potential_energy",
"hmc_state.z",
"rng_key",
"hmc_state.rng_key",
),
)
mcmc_00.print_summary()
ds = az.from_numpyro(mcmc_00)
az.plot_trace(ds);
log_density_model_00, model_00_trace = log_density(
model_00,
(jnp.array(responses), skills_needed),
dict(prob_mistake=0.1, prob_guess=0.2),
{key: value.mean(0) for key, value in mcmc_00.get_samples().items()},
)
pe_model_00 = mcmc_00.get_extra_fields()["hmc_state.potential_energy"]
exp_log_density_00 = log_ppd(
model_00, mcmc_00.get_samples(), jnp.array(responses), skills_needed
)
# post_loglik_00 = log_likelihood(
# model_00, mcmc_00.get_samples(), jnp.array(responses), skills_needed,
# )
# post_loglik_00_res = np.concatenate(
# [obs[:, None] for obs in post_loglik_00.values()], axis=1
# )
# exp_log_density_00 = logsumexp(post_loglik_00_res, axis=0) - jnp.log(
# jnp.shape(post_loglik_00_res)[0]
# )
theta_model_00 = np.zeros((22, 7))
for i, param in enumerate(["skill_" + str(i) for i in range(7)]):
theta_model_00[:, i] = np.mean(mcmc_00.get_samples()[param], axis=0)
neg_log_proba_model_00 = neg_log_proba_score(theta_model_00, self_assessed.values)
def model_02(
graded_responses, skills_needed: List[List[int]], prob_mistake=0.1,
):
n_questions, n_participants = graded_responses.shape
n_skills = max(map(max, skills_needed)) + 1
with numpyro.plate("questions_plate", n_questions):
prob_guess = numpyro.sample("prob_guess", dist.Beta(2.5, 7.5))
participants_plate = numpyro.plate("participants_plate", n_participants)
with participants_plate:
skills = []
for s in range(n_skills):
skills.append(numpyro.sample("skill_{}".format(s), dist.Bernoulli(0.5)))
for q in range(n_questions):
has_skills = reduce(operator.mul, [skills[i] for i in skills_needed[q]])
prob_correct = (
has_skills * (1 - prob_mistake) + (1 - has_skills) * prob_guess[q]
)
isCorrect = numpyro.sample(
"isCorrect_{}".format(q),
dist.Bernoulli(prob_correct).to_event(1),
obs=graded_responses[q],
)
nuts_kernel = NUTS(model_02)
kernel = DiscreteHMCGibbs(nuts_kernel, modified=True)
mcmc_02 = MCMC(kernel, num_warmup=200, num_samples=1000, num_chains=4)
mcmc_02.run(
rng_key,
jnp.array(responses),
skills_needed,
extra_fields=(
"z",
"hmc_state.potential_energy",
"hmc_state.z",
"rng_key",
"hmc_state.rng_key",
),
)
mcmc_02.print_summary()
ds = az.from_numpyro(mcmc_02)
az.plot_trace(ds);