In [1]:

import numpy as np
import pylab as pb
import scipy.stats as sp
%pylab inline



def generate_data(m,c,n):
    global mus, covs, ns
    mus=m
    covs=c
    ns=n
    l1=[]
    for i in range(len(mus)):
        l1.append(np.random.multivariate_normal(mus[i], covs[i], ns[i]))
    X=np.vstack(l1)
    return X




import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse



def plot_cov_ellipse(cov, pos, nstd=2, ax=None, **kwargs):
    #REF: http://stackoverflow.com/a/12321306/1472461
    """
    Plots an `nstd` sigma error ellipse based on the specified covariance
    matrix (`cov`). Additional keyword arguments are passed on to the 
    ellipse patch artist.

    Parameters
    ----------
        cov : The 2x2 covariance matrix to base the ellipse on
        pos : The location of the center of the ellipse. Expects a 2-element
            sequence of [x0, y0].
        nstd : The radius of the ellipse in numbers of standard deviations.
            Defaults to 2 standard deviations.
        ax : The axis that the ellipse will be plotted on. Defaults to the 
            current axis.
        Additional keyword arguments are pass on to the ellipse patch.

    Returns
    -------
        A matplotlib ellipse artist
    """
    def eigsorted(cov):
        vals, vecs = np.linalg.eigh(cov)
        order = vals.argsort()[::-1]
        return vals[order], vecs[:,order]

    if ax is None:
        ax = pb.gca()

    vals, vecs = eigsorted(cov)
    theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))

    # Width and height are "full" widths, not radius
    width, height = 2 * nstd * np.sqrt(vals)
    ellip = Ellipse(xy=pos, width=width, height=height, angle=theta,alpha=0.2, **kwargs)

    ax.add_artist(ellip)
    return ellip

def plot_data(X):
    global mus, covs, ns
    for i in range(len(mus)):
        plot_cov_ellipse(covs[i], mus[i])
    pb.plot(X[:,0],X[:,1],'o',ms=6)


def loglikelihood(X,parameters,Z):
    ll=0
    for i in range(X.shape[0]):
        lt=0
        j=Z[i]
        lt+=np.log(parameters["taus"][j])
        lt+=- 0.5*np.linalg.slogdet(parameters["covs"][j])[1]
        #print np.linalg.inv(parameters["covs"][j]).shape
        inm=np.dot((X[i,:]-parameters["mus"][j]).reshape((1,X.shape[1])),np.linalg.inv(parameters["covs"][j]))
        #print inm.shape
        
        lt+=- 0.5*np.dot(inm,(X[i,:]-parameters["mus"][j]).reshape((X.shape[1],1)))
        lt+= - (len(parameters["mus"])/2.0) * np.log(2*np.pi)
            
        ll+=lt
    return ll


def estep(X,parameters):
    #return assignment probabilities T
    ts=[]
    mns=[]
    for i in range(len(parameters["mus"])):
        mns.append(sp.multivariate_normal(mean=parameters["mus"][i], cov=parameters["covs"][i]))

    #print mns
    for i in range(X.shape[0]):
        tt=[]
        nsum=0.0
        for j in range(len(parameters["mus"])):
            nsum+=parameters["taus"][j]*mns[j].pdf(X[i,:])
        for j in range(len(parameters["mus"])):
            tt.append((parameters["taus"][j]*mns[j].pdf(X[i,:]))/nsum)
        
        ts.append(np.array(tt))
        
    return np.vstack(ts)

def mstep(X,parameters, T):
    #return new parameters theta_t+1
    newmus=[]
    newcovs=[]
    newtaus=[]
    
    
    newparams={}
    for j in range(len(parameters["mus"])):
        newtaus.append(1/(float(X.shape[0])) * np.sum(T[:,j]))

        msum=np.zeros(parameters["mus"][0].shape)
        for i in range(X.shape[0]):
            msum+=T[i,j]*X[i,:]
        newmus.append(msum/float(np.sum(T[:,j])))
        
        csum=np.zeros(parameters["covs"][0].shape)
        for i in range(X.shape[0]):
            #print (X[i,:]-newmus[j])
            #print np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1])))
            csum+=T[i,j]*np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1])))
            #print csum
        newcovs.append(csum/float(np.sum(T[:,j])))
        
    newparams["mus"]=newmus
    newparams["covs"]=newcovs
    newparams["taus"]=newtaus
    
    return newparams

def plotpred(X, parameters, Z):
    global mus, covs, ns
    mycolors=["red","green","blue","yellow"]
    for i in range(len(mus)):
        plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors[i])
    for i in range(len(mus)):
        pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors[i])
    
    

def step(x):
    if x>=0.5:
        return 0
    else:
        return 1
    
vstep=np.vectorize(step)

Populating the interactive namespace from numpy and matplotlib

In [2]:

%pylab inline
#print generate_data()
means=[np.array([-3,5]),np.array([3,3]),np.array([-5,-4]),np.array([0.5,-2])]

covs=[np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[2,0],[0,1.4]]),np.array([[2,-0.5],[-0.5,2]])]

ns=[100,50,50, 100]


X=generate_data(means, covs, ns)
print X.shape
plot_data(X)

Populating the interactive namespace from numpy and matplotlib
(300, 2)

WARNING: pylab import has clobbered these variables: ['step']
`%matplotlib` prevents importing * from pylab and numpy

In [3]:

parameters={"mus":[np.array([0.5,-0.1]),np.array([5.3,1.2]),np.array([-.2,1.1]),np.array([6,-6])],\
            "covs": [np.array([[1.3,0.4],[0.4,1.3]]),np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[1.3,-0.4],[-0.4,1.3]])],\
            "taus": [0.3,0.2,0.2,0.3]}


T=estep(X,parameters)
Z=np.argmax(T,axis=1)
print loglikelihood(X,parameters,Z)
print T.shape
past=[]
past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z))
for i in range(50):
    T=estep(X,parameters)
    Z=np.argmax(T,axis=1)
    newparams=mstep(X,parameters,T)
    #print newparams
    parameters=newparams
    #print newparams
    print loglikelihood(X,parameters,Z)
    past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z))

#print len(past)
#print past[0]

[[-4999.01727711]]
(300, 4)
[[-2220.66514883]]
[[-2169.7073554]]
[[-2164.8603898]]
[[-2148.35842299]]
[[-2038.93068375]]
[[-1976.68090819]]
[[-1947.48988125]]
[[-1937.7391345]]
[[-1936.00856604]]
[[-1935.56825023]]
[[-1934.68479364]]
[[-1931.58122941]]
[[-1929.34091732]]
[[-1925.62858235]]
[[-1922.22099993]]
[[-1919.00384678]]
[[-1915.53650223]]
[[-1912.67945371]]
[[-1910.21167729]]
[[-1907.84398807]]
[[-1905.10332812]]
[[-1903.38489095]]
[[-1902.26232094]]
[[-1901.72636337]]
[[-1900.27262105]]
[[-1899.47597449]]
[[-1898.59589333]]
[[-1898.23705127]]
[[-1898.11827969]]
[[-1898.17927671]]
[[-1898.11024976]]
[[-1897.49809384]]
[[-1897.25289078]]
[[-1897.09708407]]
[[-1896.80518568]]
[[-1896.59924636]]
[[-1896.44966431]]
[[-1896.34157726]]
[[-1896.26363876]]
[[-1896.20743756]]
[[-1896.16685867]]
[[-1896.13750511]]
[[-1896.11622987]]
[[-1896.10078172]]
[[-1896.08954724]]
[[-1896.08136674]]
[[-1896.07540408]]
[[-1896.07105463]]
[[-1896.06788008]]
[[-1896.06556204]]

In [4]:

plotpred(X,parameters,Z)

In [7]:

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from ipywidgets import StaticInteract, RangeWidget, RadioWidget
#REF: https://jakevdp.github.io/blog/2013/12/05/static-interactive-widgets/
mycolors2=["red","green","blue","yellow"]

        
def plotpred2(iteration):
    global X
    global past, mycolors2
    global mus, covs, ns
    global parameters
    parameters=past[iteration][0]
    # the histogram of the data with histtype='step'
    fig, ax = plt.subplots(figsize=(12, 9),
                           subplot_kw={'axisbg':'#EEEEEE',
                                       'axisbelow':True})
    ax.grid(color='w', linewidth=2, linestyle='solid')

    
    for i in range(len(mus)):
        plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors2[i])
    


    # add a line showing the expected distribution

    
    st1=" Current estimates:\n"
    for j in range(len(parameters["mus"])):
        st1+="mu_%i: (%.2f, %.2f)\n" % (j,parameters["mus"][j][0],parameters["mus"][j][1])
        
    st2=" Real values:\n"
    for j in range(len(parameters["mus"])):
        st2+="mu_%i: (%.2f, %.2f)\n" % (j,mus[j][0],mus[j][1])    
        
    st3=" Log-likelihood: %.2f" % (past[iteration][1])
    
    #print iteration
    Z=past[iteration][2]

    for i in range(len(mus)):
        pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors2[i])
    #pb.plot(X[:,0],X[:,1],'o',ms=6, color=mycolors2[i])
    #ax.set_xlim(-10, 10)
    #ax.set_ylim(-10, 10)
    ax.set_title("EM Algorithm: Iteration " + str(iteration))
    ax.text(-9.5,2.9,st1+"\n"+st2+"\n"+st3)
    return fig


#for i in range(20):
#    fig=plotpred2(i)
#    plt.savefig('foo'+str(i)+'.png')

In [124]:

StaticInteract?

In [8]:

StaticInteract(plotpred2,
               iteration=RangeWidget(0, 50, 1))

Out[8]:

iteration:

In [ ]: