import numpy as np
import pylab as pb
import scipy.stats as sp
%pylab inline
def generate_data(m,c,n):
global mus, covs, ns
mus=m
covs=c
ns=n
l1=[]
for i in range(len(mus)):
l1.append(np.random.multivariate_normal(mus[i], covs[i], ns[i]))
X=np.vstack(l1)
return X
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
def plot_cov_ellipse(cov, pos, nstd=2, ax=None, **kwargs):
#REF: http://stackoverflow.com/a/12321306/1472461
"""
Plots an `nstd` sigma error ellipse based on the specified covariance
matrix (`cov`). Additional keyword arguments are passed on to the
ellipse patch artist.
Parameters
----------
cov : The 2x2 covariance matrix to base the ellipse on
pos : The location of the center of the ellipse. Expects a 2-element
sequence of [x0, y0].
nstd : The radius of the ellipse in numbers of standard deviations.
Defaults to 2 standard deviations.
ax : The axis that the ellipse will be plotted on. Defaults to the
current axis.
Additional keyword arguments are pass on to the ellipse patch.
Returns
-------
A matplotlib ellipse artist
"""
def eigsorted(cov):
vals, vecs = np.linalg.eigh(cov)
order = vals.argsort()[::-1]
return vals[order], vecs[:,order]
if ax is None:
ax = pb.gca()
vals, vecs = eigsorted(cov)
theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))
# Width and height are "full" widths, not radius
width, height = 2 * nstd * np.sqrt(vals)
ellip = Ellipse(xy=pos, width=width, height=height, angle=theta,alpha=0.2, **kwargs)
ax.add_artist(ellip)
return ellip
def plot_data(X):
global mus, covs, ns
for i in range(len(mus)):
plot_cov_ellipse(covs[i], mus[i])
pb.plot(X[:,0],X[:,1],'o',ms=6)
def loglikelihood(X,parameters,Z):
ll=0
for i in range(X.shape[0]):
lt=0
j=Z[i]
lt+=np.log(parameters["taus"][j])
lt+=- 0.5*np.linalg.slogdet(parameters["covs"][j])[1]
#print np.linalg.inv(parameters["covs"][j]).shape
inm=np.dot((X[i,:]-parameters["mus"][j]).reshape((1,X.shape[1])),np.linalg.inv(parameters["covs"][j]))
#print inm.shape
lt+=- 0.5*np.dot(inm,(X[i,:]-parameters["mus"][j]).reshape((X.shape[1],1)))
lt+= - (len(parameters["mus"])/2.0) * np.log(2*np.pi)
ll+=lt
return ll
def estep(X,parameters):
#return assignment probabilities T
ts=[]
mns=[]
for i in range(len(parameters["mus"])):
mns.append(sp.multivariate_normal(mean=parameters["mus"][i], cov=parameters["covs"][i]))
#print mns
for i in range(X.shape[0]):
tt=[]
nsum=0.0
for j in range(len(parameters["mus"])):
nsum+=parameters["taus"][j]*mns[j].pdf(X[i,:])
for j in range(len(parameters["mus"])):
tt.append((parameters["taus"][j]*mns[j].pdf(X[i,:]))/nsum)
ts.append(np.array(tt))
return np.vstack(ts)
def mstep(X,parameters, T):
#return new parameters theta_t+1
newmus=[]
newcovs=[]
newtaus=[]
newparams={}
for j in range(len(parameters["mus"])):
newtaus.append(1/(float(X.shape[0])) * np.sum(T[:,j]))
msum=np.zeros(parameters["mus"][0].shape)
for i in range(X.shape[0]):
msum+=T[i,j]*X[i,:]
newmus.append(msum/float(np.sum(T[:,j])))
csum=np.zeros(parameters["covs"][0].shape)
for i in range(X.shape[0]):
#print (X[i,:]-newmus[j])
#print np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1])))
csum+=T[i,j]*np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1])))
#print csum
newcovs.append(csum/float(np.sum(T[:,j])))
newparams["mus"]=newmus
newparams["covs"]=newcovs
newparams["taus"]=newtaus
return newparams
def plotpred(X, parameters, Z):
global mus, covs, ns
mycolors=["red","green","blue","yellow"]
for i in range(len(mus)):
plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors[i])
for i in range(len(mus)):
pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors[i])
def step(x):
if x>=0.5:
return 0
else:
return 1
vstep=np.vectorize(step)
Populating the interactive namespace from numpy and matplotlib
%pylab inline
#print generate_data()
means=[np.array([-3,5]),np.array([3,3]),np.array([-5,-4]),np.array([0.5,-2])]
covs=[np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[2,0],[0,1.4]]),np.array([[2,-0.5],[-0.5,2]])]
ns=[100,50,50, 100]
X=generate_data(means, covs, ns)
print X.shape
plot_data(X)
Populating the interactive namespace from numpy and matplotlib (300, 2)
WARNING: pylab import has clobbered these variables: ['step'] `%matplotlib` prevents importing * from pylab and numpy
parameters={"mus":[np.array([0.5,-0.1]),np.array([5.3,1.2]),np.array([-.2,1.1]),np.array([6,-6])],\
"covs": [np.array([[1.3,0.4],[0.4,1.3]]),np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[1.3,-0.4],[-0.4,1.3]])],\
"taus": [0.3,0.2,0.2,0.3]}
T=estep(X,parameters)
Z=np.argmax(T,axis=1)
print loglikelihood(X,parameters,Z)
print T.shape
past=[]
past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z))
for i in range(50):
T=estep(X,parameters)
Z=np.argmax(T,axis=1)
newparams=mstep(X,parameters,T)
#print newparams
parameters=newparams
#print newparams
print loglikelihood(X,parameters,Z)
past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z))
#print len(past)
#print past[0]
[[-4999.01727711]] (300, 4) [[-2220.66514883]] [[-2169.7073554]] [[-2164.8603898]] [[-2148.35842299]] [[-2038.93068375]] [[-1976.68090819]] [[-1947.48988125]] [[-1937.7391345]] [[-1936.00856604]] [[-1935.56825023]] [[-1934.68479364]] [[-1931.58122941]] [[-1929.34091732]] [[-1925.62858235]] [[-1922.22099993]] [[-1919.00384678]] [[-1915.53650223]] [[-1912.67945371]] [[-1910.21167729]] [[-1907.84398807]] [[-1905.10332812]] [[-1903.38489095]] [[-1902.26232094]] [[-1901.72636337]] [[-1900.27262105]] [[-1899.47597449]] [[-1898.59589333]] [[-1898.23705127]] [[-1898.11827969]] [[-1898.17927671]] [[-1898.11024976]] [[-1897.49809384]] [[-1897.25289078]] [[-1897.09708407]] [[-1896.80518568]] [[-1896.59924636]] [[-1896.44966431]] [[-1896.34157726]] [[-1896.26363876]] [[-1896.20743756]] [[-1896.16685867]] [[-1896.13750511]] [[-1896.11622987]] [[-1896.10078172]] [[-1896.08954724]] [[-1896.08136674]] [[-1896.07540408]] [[-1896.07105463]] [[-1896.06788008]] [[-1896.06556204]]
plotpred(X,parameters,Z)
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import StaticInteract, RangeWidget, RadioWidget
#REF: https://jakevdp.github.io/blog/2013/12/05/static-interactive-widgets/
mycolors2=["red","green","blue","yellow"]
def plotpred2(iteration):
global X
global past, mycolors2
global mus, covs, ns
global parameters
parameters=past[iteration][0]
# the histogram of the data with histtype='step'
fig, ax = plt.subplots(figsize=(12, 9),
subplot_kw={'axisbg':'#EEEEEE',
'axisbelow':True})
ax.grid(color='w', linewidth=2, linestyle='solid')
for i in range(len(mus)):
plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors2[i])
# add a line showing the expected distribution
st1=" Current estimates:\n"
for j in range(len(parameters["mus"])):
st1+="mu_%i: (%.2f, %.2f)\n" % (j,parameters["mus"][j][0],parameters["mus"][j][1])
st2=" Real values:\n"
for j in range(len(parameters["mus"])):
st2+="mu_%i: (%.2f, %.2f)\n" % (j,mus[j][0],mus[j][1])
st3=" Log-likelihood: %.2f" % (past[iteration][1])
#print iteration
Z=past[iteration][2]
for i in range(len(mus)):
pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors2[i])
#pb.plot(X[:,0],X[:,1],'o',ms=6, color=mycolors2[i])
#ax.set_xlim(-10, 10)
#ax.set_ylim(-10, 10)
ax.set_title("EM Algorithm: Iteration " + str(iteration))
ax.text(-9.5,2.9,st1+"\n"+st2+"\n"+st3)
return fig
#for i in range(20):
# fig=plotpred2(i)
# plt.savefig('foo'+str(i)+'.png')
StaticInteract?
StaticInteract(plotpred2,
iteration=RangeWidget(0, 50, 1))