import numpy as np
import pylab as pb
import scipy.stats as sp
%pylab inline


def generate_data(m,c,n):
    global mus, covs, ns
    mus=m
    covs=c
    ns=n
    l1=[]
    for i in range(len(mus)):
        l1.append(np.random.multivariate_normal(mus[i], covs[i], ns[i]))
    X=np.vstack(l1)
    return X


import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse


def plot_cov_ellipse(cov, pos, nstd=2, ax=None, **kwargs):
    #REF: http://stackoverflow.com/a/12321306/1472461
    """
    Plots an `nstd` sigma error ellipse based on the specified covariance
    matrix (`cov`). Additional keyword arguments are passed on to the 
    ellipse patch artist.

    Parameters
    ----------
        cov : The 2x2 covariance matrix to base the ellipse on
        pos : The location of the center of the ellipse. Expects a 2-element
            sequence of [x0, y0].
        nstd : The radius of the ellipse in numbers of standard deviations.
            Defaults to 2 standard deviations.
        ax : The axis that the ellipse will be plotted on. Defaults to the 
            current axis.
        Additional keyword arguments are pass on to the ellipse patch.

    Returns
    -------
        A matplotlib ellipse artist
    """
    def eigsorted(cov):
        vals, vecs = np.linalg.eigh(cov)
        order = vals.argsort()[::-1]
        return vals[order], vecs[:,order]

    if ax is None:
        ax = pb.gca()

    vals, vecs = eigsorted(cov)
    theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))

    # Width and height are "full" widths, not radius
    width, height = 2 * nstd * np.sqrt(vals)
    ellip = Ellipse(xy=pos, width=width, height=height, angle=theta,alpha=0.2, **kwargs)

    ax.add_artist(ellip)
    return ellip

def plot_data(X):
    global mus, covs, ns
    for i in range(len(mus)):
        plot_cov_ellipse(covs[i], mus[i])
    pb.plot(X[:,0],X[:,1],'o',ms=6)


def loglikelihood(X,parameters,Z):
    ll=0
    for i in range(X.shape[0]):
        lt=0
        j=Z[i]
        lt+=np.log(parameters["taus"][j])
        lt+=- 0.5*np.linalg.slogdet(parameters["covs"][j])[1]
        #print np.linalg.inv(parameters["covs"][j]).shape
        inm=np.dot((X[i,:]-parameters["mus"][j]).reshape((1,X.shape[1])),np.linalg.inv(parameters["covs"][j]))
        #print inm.shape
        
        lt+=- 0.5*np.dot(inm,(X[i,:]-parameters["mus"][j]).reshape((X.shape[1],1)))
        lt+= - (len(parameters["mus"])/2.0) * np.log(2*np.pi)
            
        ll+=lt
    return ll


def estep(X,parameters):
    #return assignment probabilities T
    ts=[]
    mns=[]
    for i in range(len(parameters["mus"])):
        mns.append(sp.multivariate_normal(mean=parameters["mus"][i], cov=parameters["covs"][i]))

    #print mns
    for i in range(X.shape[0]):
        tt=[]
        nsum=0.0
        for j in range(len(parameters["mus"])):
            nsum+=parameters["taus"][j]*mns[j].pdf(X[i,:])
        for j in range(len(parameters["mus"])):
            tt.append((parameters["taus"][j]*mns[j].pdf(X[i,:]))/nsum)
        
        ts.append(np.array(tt))
        
    return np.vstack(ts)

def mstep(X,parameters, T):
    #return new parameters theta_t+1
    newmus=[]
    newcovs=[]
    newtaus=[]
    
    
    newparams={}
    for j in range(len(parameters["mus"])):
        newtaus.append(1/(float(X.shape[0])) * np.sum(T[:,j]))

        msum=np.zeros(parameters["mus"][0].shape)
        for i in range(X.shape[0]):
            msum+=T[i,j]*X[i,:]
        newmus.append(msum/float(np.sum(T[:,j])))
        
        csum=np.zeros(parameters["covs"][0].shape)
        for i in range(X.shape[0]):
            #print (X[i,:]-newmus[j])
            #print np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1])))
            csum+=T[i,j]*np.dot((X[i,:]-newmus[j]).reshape((X.shape[1],1)),(X[i,:]-newmus[j]).reshape((1,X.shape[1])))
            #print csum
        newcovs.append(csum/float(np.sum(T[:,j])))
        
    newparams["mus"]=newmus
    newparams["covs"]=newcovs
    newparams["taus"]=newtaus
    
    return newparams

def plotpred(X, parameters, Z):
    global mus, covs, ns
    mycolors=["red","green","blue","yellow"]
    for i in range(len(mus)):
        plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors[i])
    for i in range(len(mus)):
        pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors[i])
    
    
def step(x):
    if x>=0.5:
        return 0
    else:
        return 1
    
vstep=np.vectorize(step)

%pylab inline
#print generate_data()
means=[np.array([-3,5]),np.array([3,3]),np.array([-5,-4]),np.array([0.5,-2])]

covs=[np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[2,0],[0,1.4]]),np.array([[2,-0.5],[-0.5,2]])]

ns=[100,50,50, 100]


X=generate_data(means, covs, ns)
print X.shape
plot_data(X)

parameters={"mus":[np.array([0.5,-0.1]),np.array([5.3,1.2]),np.array([-.2,1.1]),np.array([6,-6])],\
            "covs": [np.array([[1.3,0.4],[0.4,1.3]]),np.array([[1,0],[0,1]]),np.array([[1,0.5],[0.5,1]]),np.array([[1.3,-0.4],[-0.4,1.3]])],\
            "taus": [0.3,0.2,0.2,0.3]}


T=estep(X,parameters)
Z=np.argmax(T,axis=1)
print loglikelihood(X,parameters,Z)
print T.shape
past=[]
past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z))
for i in range(50):
    T=estep(X,parameters)
    Z=np.argmax(T,axis=1)
    newparams=mstep(X,parameters,T)
    #print newparams
    parameters=newparams
    #print newparams
    print loglikelihood(X,parameters,Z)
    past.append((parameters,loglikelihood(X,parameters,Z)[0][0],Z))

#print len(past)
#print past[0]

plotpred(X,parameters,Z)

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from ipywidgets import StaticInteract, RangeWidget, RadioWidget
#REF: https://jakevdp.github.io/blog/2013/12/05/static-interactive-widgets/
mycolors2=["red","green","blue","yellow"]

        
def plotpred2(iteration):
    global X
    global past, mycolors2
    global mus, covs, ns
    global parameters
    parameters=past[iteration][0]
    # the histogram of the data with histtype='step'
    fig, ax = plt.subplots(figsize=(12, 9),
                           subplot_kw={'axisbg':'#EEEEEE',
                                       'axisbelow':True})
    ax.grid(color='w', linewidth=2, linestyle='solid')

    
    for i in range(len(mus)):
        plot_cov_ellipse(parameters["covs"][i], parameters["mus"][i], color=mycolors2[i])
    

    # add a line showing the expected distribution

    
    st1=" Current estimates:\n"
    for j in range(len(parameters["mus"])):
        st1+="mu_%i: (%.2f, %.2f)\n" % (j,parameters["mus"][j][0],parameters["mus"][j][1])
        
    st2=" Real values:\n"
    for j in range(len(parameters["mus"])):
        st2+="mu_%i: (%.2f, %.2f)\n" % (j,mus[j][0],mus[j][1])    
        
    st3=" Log-likelihood: %.2f" % (past[iteration][1])
    
    #print iteration
    Z=past[iteration][2]

    for i in range(len(mus)):
        pb.plot(X[:,0][np.where(Z==i)],X[:,1][np.where(Z==i)],'o',ms=6, color=mycolors2[i])
    #pb.plot(X[:,0],X[:,1],'o',ms=6, color=mycolors2[i])
    #ax.set_xlim(-10, 10)
    #ax.set_ylim(-10, 10)
    ax.set_title("EM Algorithm: Iteration " + str(iteration))
    ax.text(-9.5,2.9,st1+"\n"+st2+"\n"+st3)
    return fig


#for i in range(20):
#    fig=plotpred2(i)
#    plt.savefig('foo'+str(i)+'.png')

StaticInteract?

StaticInteract(plotpred2,
               iteration=RangeWidget(0, 50, 1))