using Pkg;Pkg.activate("probprog/workspace/");Pkg.instantiate();
IJulia.clear_output();
using Distributions, PyPlot
N = 100
generative_dist = MvNormal([0,1.], [0.8 0.5; 0.5 1.0])
function plotObservations(obs::Matrix)
plot(obs[1,:], obs[2,:], "kx", zorder=3)
fill_between([0., 2.], 1., 2., color="k", alpha=0.4, zorder=2) # Shaded area
text(2.05, 1.8, "S", fontsize=12)
xlim([-3,3]); ylim([-2,4]); xlabel("a"); ylabel("b")
end
D = rand(generative_dist, N) # Generate observations from generative_dist
plotObservations(D)
x_dot = rand(generative_dist) # Generate x∙
plot(x_dot[1], x_dot[2], "ro");
where $|\Sigma| \triangleq \mathrm{det}(\Sigma)$ is the determinant of $\Sigma$.
Let's estimate a constant $\theta$ from one 'noisy' measurement $x$ about that constant.
We assume the following measurement equations (the tilde $\sim$ means: 'is distributed as'):
which we recognize as a Gaussian distribution w.r.t. $\theta$.
where $$\begin{align*} \frac{1}{\sigma_1^2} &= \frac{\sigma_0^2 + \sigma^2}{\sigma^2 \sigma_0^2} = \frac{1}{\sigma_0^2} + \frac{1}{\sigma^2} \\ \mu_1 &= \frac{\sigma_0^2 x + \sigma^2 \mu_0}{\sigma^2 + \sigma_0^2} = \sigma_1^2 \, \left( \frac{1}{\sigma_0^2} \mu_0 + \frac{1}{\sigma^2} x \right) \end{align*}$$
where $$\begin{align*} \Sigma_c^{-1} &= \Sigma_a^{-1} + \Sigma_b^{-1} \\ \Sigma_c^{-1} \mu_c &= \Sigma_a^{-1}\mu_a + \Sigma_b^{-1}\mu_b \end{align*}$$
using PyPlot, Distributions
d1 = Normal(0, 1) # μ=0, σ^2=1
d2 = Normal(3, 2) # μ=3, σ^2=4
# Calculate the parameters of the product d1*d2
s2_prod = (d1.σ^-2 + d2.σ^-2)^-1
m_prod = s2_prod * ((d1.σ^-2)*d1.μ + (d2.σ^-2)*d2.μ)
d_prod = Normal(m_prod, sqrt(s2_prod)) # Note that we neglect the normalization constant.
# Plot stuff
x = range(-4, stop=8, length=100)
plot(x, pdf.(d1,x), "k")
plot(x, pdf.(d2,x), "b")
plot(x, pdf.(d1,x) .* pdf.(d2,x), "r-") # Plot the exact product
plot(x, pdf.(d_prod,x), "r:") # Plot the normalized Gaussian product
legend([L"\mathcal{N}(0,1)",
L"\mathcal{N}(3,4)",
L"\mathcal{N}(0,1) \mathcal{N}(3,4)",
L"Z^{-1} \mathcal{N}(0,1) \mathcal{N}(3,4)"]);
The solid and dotted red curves are identical up to a scaling factor $Z$.
x_n &= \theta + \epsilon_n \ \epsilon_n &\sim \mathcal{N}(0,\sigma^2) \end{align}$$ and the same prior for $\theta$: $$\begin{align} \theta &\sim \mathcal{N}(\mu_0,\sigma_0^2) \ \end{align*}$$
which is a multiplication of $N+1$ Gaussians and is therefore also Gaussian distributed.
using PyPlot, Distributions
μ = [1.0; 2.0]
Σ = [0.3 0.7;
0.7 2.0]
joint = MvNormal(μ,Σ)
marginal_x = Normal(μ[1], sqrt(Σ[1,1]))
#Plot p(x,y)
subplot(221)
x_range = y_range = range(-2,stop=5,length=1000)
joint_pdf = [ pdf(joint, [x_range[i];y_range[j]]) for j=1:length(y_range), i=1:length(x_range)]
imshow(joint_pdf, origin="lower", extent=[x_range[1], x_range[end], y_range[1], y_range[end]])
grid(); xlabel("x"); ylabel("y"); title("p(x,y)"); tight_layout()
# Plot p(x)
subplot(222)
plot(range(-2,stop=5,length=1000), pdf.(marginal_x, range(-2,stop=5,length=1000)))
grid(); xlabel("x"); ylabel("p(x)"); title("Marginal distribution p(x)"); tight_layout()
# Plot p(y|x)
x = 0.1
conditional_y_m = μ[2]+Σ[2,1]*inv(Σ[1,1])*(x-μ[1])
conditional_y_s2 = Σ[2,2] - Σ[2,1]*inv(Σ[1,1])*Σ[1,2]
conditional_y = Normal(conditional_y_m, sqrt.(conditional_y_s2))
subplot(223)
plot(range(-2,stop=5,length=1000), pdf.(conditional_y, range(-2,stop=5,length=1000)))
grid(); xlabel("y"); ylabel("p(y|x)"); title("Conditional distribution p(y|x)"); tight_layout()
As is clear from the plots, the conditional distribution is a renormalized slice from the joint distribution.
with a Gaussian prior for $\theta$: $$ p(\theta) = \mathcal{N}(\theta\,|\,\mu_0,\sigma_0^2) $$
with $$\begin{align*} K &= \frac{\sigma_0^2}{\sigma_0^2+\sigma^2} \qquad \text{($K$ is called: Kalman gain)}\\ \mu_1 &= \mu_0 + K \cdot (x-\mu_0)\\ \sigma_1^2 &= \left( 1-K \right) \sigma_0^2 \end{align*}$$
Consider the signal $x_t=\theta+\epsilon_t$, where $D_t= \left\{x_1,\ldots,x_t\right\}$ is observed sequentially (over time).
Problem: Derive a recursive algorithm for $p(\theta|D_t)$, i.e., an update rule for (posterior) $p(\theta|D_t)$ based on (prior) $p(\theta|D_{t-1})$ and (new observation) $x_t$.
Let's define the estimate after $t$ observations (i.e., our solution ) as $p(\theta|D_t) = \mathcal{N}(\theta\,|\,\mu_t,\sigma_t^2)$.
We define the joint distribution for $\theta$ and $x_t$, given background $D_{t-1}$, by
with $$\begin{align*} K_t &= \frac{\sigma_{t-1}^2}{\sigma_{t-1}^2+\sigma^2} \qquad \text{(Kalman gain)}\\ \mu_t &= \mu_{t-1} + K_t \cdot (x_t-\mu_{t-1})\\ \sigma_t^2 &= \left( 1-K_t \right) \sigma_{t-1}^2 \end{align*}$$
using PyPlot
N = 50 # Number of observations
θ = 2.0 # True value of the variable we want to estimate
σ_ϵ2 = 0.25 # Observation noise variance
x = sqrt(σ_ϵ2) * randn(N) .+ θ # Generate N noisy observations of θ
t = 0
μ = fill!(Vector{Float64}(undef,N), NaN) # Means of p(θ|D) over time
σ_μ2 = fill!(Vector{Float64}(undef,N), NaN) # Variances of p(θ|D) over time
function performKalmanStep()
# Perform a Kalman filter step, update t, μ, σ_μ2
global t += 1
if t>1 # Use posterior from prev. step as prior
K = σ_μ2[t-1] / (σ_ϵ2 + σ_μ2[t-1]) # Kalman gain
μ[t] = μ[t-1] + K*(x[t] - μ[t-1]) # Update mean using (1)
σ_μ2[t] = σ_μ2[t-1] * (1.0-K) # Update variance using (2)
elseif t==1 # Use prior
# Prior p(θ) = N(0,1000)
K = 1000.0 / (σ_ϵ2 + 1000.0) # Kalman gain
μ[t] = 0 + K*(x[t] - 0) # Update mean using (1)
σ_μ2[t] = 1000 * (1.0-K) # Update variance using (2)
end
end
while t<N
performKalmanStep()
end
# Plot the 'true' value of θ, noisy observations x, and the recursively updated posterior p(θ|D)
t = collect(1:N)
plot(t, θ*ones(N), "k--")
plot(t, x, "rx")
plot(t, μ, "b-")
fill_between(t, μ-sqrt.(σ_μ2), μ+sqrt.(σ_μ2), color="b", alpha=0.3)
legend([L"\theta", L"x[t]", L"\mu[t]"])
xlim((1, N)); xlabel(L"t"); grid()
The shaded area represents 2 standard deviations of posterior $p(\theta|D)$. The variance of the posterior is guaranteed to decrease monotonically for the standard Kalman filter.
where $\mathrm{K}_n(z)$ is a modified Bessel function of the second kind.
using PyPlot, Distributions, SpecialFunctions
X = Normal(0,1)
Y = Normal(0,1)
pdf_product_std_normals(z::Vector) = (besselk.(0, abs.(z))./π)
range1 = collect(range(-4,stop=4,length=100))
plot(range1, pdf.(X, range1))
plot(range1, pdf.(X,range1).*pdf.(Y,range1))
plot(range1, pdf_product_std_normals(range1))
legend([L"p(X)=p(Y)=\mathcal{N}(0,1)", L"p(X)*p(Y)",L"p(Z=X*Y)"]); grid()
using HCubature, LinearAlgebra# Numerical integration package
# Maximum likelihood estimation of 2D Gaussian
N = length(sum(D,dims=1))
μ = 1/N * sum(D,dims=2)[:,1]
D_min_μ = D - repeat(μ, 1, N)
Σ = Hermitian(1/N * D_min_μ*D_min_μ')
m = MvNormal(μ, convert(Matrix, Σ));
# # Contour plot of estimated Gaussian density
A = Matrix{Float64}(undef,100,100); B = Matrix{Float64}(undef,100,100)
density = Matrix{Float64}(undef,100,100)
for i=1:100
for j=1:100
A[i,j] = a = (i-1)*6/100 .- 2
B[i,j] = b = (j-1)*6/100 .- 3
density[i,j] = pdf(m, [a,b])
end
end
c = contour(A, B, density, 6, zorder=1)
PyPlot.set_cmap("cool")
clabel(c, inline=1, fontsize=10)
# Plot observations, x∙, and the countours of the estimated Gausian density
plotObservations(D)
plot(x_dot[1], x_dot[2], "ro")
# Numerical integration of p(x|m) over S:
(val,err) = hcubature((x)->pdf(m,x), [0., 1.], [2., 2.])
println("p(x⋅∈S|m) ≈ $(val)")
p(x⋅∈S|m) ≈ 0.20937033913104705
┌ Info: Precompiling HCubature [19dc6840-f33b-545b-b366-655c7e3ffd49] └ @ Base loading.jl:1423
A linear transformation of a Gaussian-distributed (potentially multivariate) variable remains Gaussian.
Bayesian inference with Gaussian prior and likelihood leads to an analytically computable Gaussian posterior.
Here's a nice summary of Gaussian calculations by Sam Roweis.
where $a>0$ and $b>0$ are known as the shape and rate parameters, respectively.
with $$\begin{align*} a_N &= a_0 + \frac{N}{2} \tag{B-2.150} \\ b_N &= b_0 + \frac{1}{2}\sum_n \left( x_n-\mu\right)^2 \tag{B-2.151} \end{align*}$$
open("../../styles/aipstyle.html") do f
display("text/html", read(f, String))
end