import numpy as np import pylab as pb import scipy.stats as sp %pylab inline def generate_data(m,c,n): global mus, covs, ns mus=m covs=c ns=n l1=[] for i in range(len(mus)): l1.append(np.random.multivariate_normal(mus[i], covs[i], ns[i])) X=np.vstack(l1) return X import matplotlib.pyplot as plt from matplotlib.patches import Ellipse def plot_cov_ellipse(cov, pos, nstd=2, ax=None, **kwargs): #REF: http://stackoverflow.com/a/12321306/1472461 """ Plots an `nstd` sigma error ellipse based on the specified covariance matrix (`cov`). Additional keyword arguments are passed on to the ellipse patch artist. Parameters ---------- cov : The 2x2 covariance matrix to base the ellipse on pos : The location of the center of the ellipse. Expects a 2-element sequence of [x0, y0]. nstd : The radius of the ellipse in numbers of standard deviations. Defaults to 2 standard deviations. ax : The axis that the ellipse will be plotted on. Defaults to the current axis. Additional keyword arguments are pass on to the ellipse patch. Returns ------- A matplotlib ellipse artist """ def eigsorted(cov): vals, vecs = np.linalg.eigh(cov) order = vals.argsort()[::-1] return vals[order], vecs[:,order] if ax is None: ax = pb.gca() vals, vecs = eigsorted(cov) theta = np.degrees(np.arctan2(*vecs[:,0][::-1])) # Width and height are "full" widths, not radius width, height = 2 * nstd * np.sqrt(vals) ellip = Ellipse(xy=pos, width=width, height=height, angle=theta,alpha=0.2, **kwargs) ax.add_artist(ellip) return ellip def plot_data(X): global mus, covs, ns for i in range(len(mus)): plot_cov_ellipse(covs[i], mus[i]) pb.plot(X[:,0],X[:,1],'o',ms=6) def loglikelihood(X,parameters,Z): ll=0 for i in range(X.shape[0]): lt=0 j=Z[i] lt+=np.log(parameters["taus"][j]) lt+=- 0.5*np.linalg.slogdet(parameters["covs"][j])[1] #print np.linalg.inv(parameters["covs"][j]).shape inm=np.dot((X[i,:]-parameters["mus"][j]).reshape((1,X.shape[1])),np.linalg.inv(parameters["covs"][j])) #print inm.shape lt+=- 0.5*np.dot(inm,(X[i,:]-parameters["mus"][j]).reshape((X.shape[1],1))) lt+= - (len(parameters["mus"])/2.0) * np.log(2*np.pi) ll+=lt return ll def estep(X,parameters): #return assignment probabilities T ts=[] mns=[] for i in range(len(parameters["mus"])): mns.append(sp.multivariate_normal(mean=parameters["mus"][i], cov=parameters["covs"][i])) #print mns for i in range(X.shape[0]): tt=[] nsum=0.0 for j in range(len(parameters["mus"])): nsum+=parameters["taus"][j]*mns[j].pdf(X[i,:]) for j in range(len(parameters["mus"])): tt.append((parameters["taus"][j]*mns[j].pdf(X[i,:]))/nsum) ts.append(np.array(tt)) return np.vstack(ts) def mstep(X,parameters, T): #return new parameters theta_t+1 newmus=[] newcovs=[] newtaus=[] newparams={} for j in range(len(parameters["mus"])): newtaus.append(1/(float(X.shape[0])) * np.sum(T[:,j])) msum=np.zeros(parameters["mus"][0].shape) for i in range(X.shape[0]): msum+=T[i,j]*X[i,:] newmus.append(msum/float(np.sum(T[:,j]))) csum=np.zeros(parameters["covs"][0].shape) for i in range(X.shape[0]): #print (X[i,:]-newmus[j]) #print np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1]))) csum+=T[i,j]*np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1]))) #print csum newcovs.append(csum/float(np.sum(T[:,j]))) newparams["mus"]=newmus newparams["covs"]=newcovs newparams["taus"]=newtaus return newparams def plotpred(X, parameters, Z): global mus, covs, ns mycolors=["red","green","blue","yellow"] for i in range(len(mus)): plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors[i]) for i in range(len(mus)): pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors[i]) def step(x): if x>=0.5: return 0 else: return 1 vstep=np.vectorize(step) %pylab inline #print generate_data() means=[np.array([-3,5]),np.array([3,3]),np.array([-5,-4]),np.array([0.5,-2])] covs=[np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[2,0],[0,1.4]]),np.array([[2,-0.5],[-0.5,2]])] ns=[100,50,50, 100] X=generate_data(means, covs, ns) print X.shape plot_data(X) parameters={"mus":[np.array([0.5,-0.1]),np.array([5.3,1.2]),np.array([-.2,1.1]),np.array([6,-6])],\ "covs": [np.array([[1.3,0.4],[0.4,1.3]]),np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[1.3,-0.4],[-0.4,1.3]])],\ "taus": [0.3,0.2,0.2,0.3]} T=estep(X,parameters) Z=np.argmax(T,axis=1) print loglikelihood(X,parameters,Z) print T.shape past=[] past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z)) for i in range(50): T=estep(X,parameters) Z=np.argmax(T,axis=1) newparams=mstep(X,parameters,T) #print newparams parameters=newparams #print newparams print loglikelihood(X,parameters,Z) past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z)) #print len(past) #print past[0] plotpred(X,parameters,Z) %matplotlib inline import numpy as np import matplotlib.pyplot as plt from ipywidgets import StaticInteract, RangeWidget, RadioWidget #REF: https://jakevdp.github.io/blog/2013/12/05/static-interactive-widgets/ mycolors2=["red","green","blue","yellow"] def plotpred2(iteration): global X global past, mycolors2 global mus, covs, ns global parameters parameters=past[iteration][0] # the histogram of the data with histtype='step' fig, ax = plt.subplots(figsize=(12, 9), subplot_kw={'axisbg':'#EEEEEE', 'axisbelow':True}) ax.grid(color='w', linewidth=2, linestyle='solid') for i in range(len(mus)): plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors2[i]) # add a line showing the expected distribution st1=" Current estimates:\n" for j in range(len(parameters["mus"])): st1+="mu_%i: (%.2f, %.2f)\n" % (j,parameters["mus"][j][0],parameters["mus"][j][1]) st2=" Real values:\n" for j in range(len(parameters["mus"])): st2+="mu_%i: (%.2f, %.2f)\n" % (j,mus[j][0],mus[j][1]) st3=" Log-likelihood: %.2f" % (past[iteration][1]) #print iteration Z=past[iteration][2] for i in range(len(mus)): pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors2[i]) #pb.plot(X[:,0],X[:,1],'o',ms=6, color=mycolors2[i]) #ax.set_xlim(-10, 10) #ax.set_ylim(-10, 10) ax.set_title("EM Algorithm: Iteration " + str(iteration)) ax.text(-9.5,2.9,st1+"\n"+st2+"\n"+st3) return fig #for i in range(20): # fig=plotpred2(i) # plt.savefig('foo'+str(i)+'.png') StaticInteract? StaticInteract(plotpred2, iteration=RangeWidget(0, 50, 1))