#!/usr/bin/env python
# coding: utf-8

# In[577]:


get_ipython().run_line_magic('pylab', 'inline')
import pandas as pd

rcParams['axes.spines.top'] = False
rcParams['axes.spines.right'] = False


# In[578]:


import theano
theano.config.floatX = 'float64'

import theano.tensor as tt
from theano.tensor.nlinalg import det

import pymc3 as pm


# In[ ]:


# In[579]:


cluster = np.random.binomial(1, 0.33, (1000, 1))
batch = np.random.binomial(1, 0.5, (1000, 1))
depth = np.random.uniform(size=(1000, 1))
noise = np.random.normal(size=(1000, 1))

df = pd.DataFrame(data={'cluster': cluster[:, 0],
                        'batch': batch[:, 0],
                        'depth': depth[:, 0],
                        'noise': noise[:, 0]},
                  index=np.arange(cluster.shape[0]))


# In[580]:


df['y'] = df['cluster'] + df['batch'] * 0.5 + df['depth'] * 2 + df['noise'] * 0.25


# In[581]:


df['y'].hist(bins=20, grid=False, ec='k', fc='w');


# In[ ]:


# In[602]:


for v, g in df.groupby('cluster'):
    g['y'].hist(bins=20, grid=False, ec='k', fc=cm.Greys(v / 2));


# In[601]:


for c, gc in df.groupby('cluster'):
    plt.scatter(gc['depth'], gc['y'], c=cm.Greys(c / 2 + 1 / 2));


# In[603]:


for v, g in df.groupby('cluster'):
    (g['y'] - g['depth'] * 2).hist(bins=20, grid=False, ec='k', fc=cm.Greys(v / 2));


# In[604]:


for v, g in df.groupby('cluster'):
    (g['y'] - g['batch'] * 0.5 - g['depth'] * 2).hist(bins=20, grid=False, ec='k',fc=cm.Greys(v / 2));


# \\[
# y \vert (z_i = k) \sim \mathcal{N}(\mu_{k}, \sigma_k)
# \\]

# In[637]:


K = 2
with pm.Model() as model:
    sigma = pm.HalfCauchy('sigma', beta=10, testval=1., shape=(K,))
    mu_cluster = pm.Normal('mu_cluster', mu=0., sd=1., testval=1., shape=(1, K))
    pi = pm.Dirichlet('pi', a=0.1 * np.ones(K), shape=(K,))
    
    likelihood = pm.NormalMixture('y', w=pi, mu=mu_cluster, sd=sigma, observed=df['y'])


# In[638]:


means, sds, elbos = pm.variational.advi(model=model, n=1000, learning_rate=1e-1)


# In[639]:


means


# In[652]:


weights = pm.distributions.transforms.stick_breaking.backward(means['pi_stickbreaking_']).eval()
plt.bar(np.arange(K), weights, color='k');


# In[590]:


from scipy import stats


# In[641]:


RV = stats.norm(loc=means['mu_cluster'], scale=exp(means['sigma_log_']))


# In[642]:


inferred_cluster = df['y'].apply(RV.logpdf).apply(np.argmax)
df['inferred_cluster'] = inferred_cluster


# In[643]:


pd.crosstab(df['cluster'], df['inferred_cluster'])


# In[644]:


for i, g in df.groupby('inferred_cluster'):
    for c, gc in g.groupby('cluster'):
        noise = np.random.rand(gc.shape[0]) * 0.5
        plt.scatter(i + gc.y * 0 + noise , gc.y, c=cm.Greys(c / 2 + 1 / 2))


# In[ ]:


# In[ ]:


# In[ ]:


# In[650]:


df.head()


# \\[
# y \vert (z_i = k) \sim \mathcal{N}(\mu_k + \alpha \cdot x_\text{depth} + \beta_j \cdot x_{\text{batch} = j}, \sigma_k)
# \\]

# In[608]:


X = patsy.dmatrix('~ C(batch) + depth', df, return_type='dataframe')
XX = X.values

K = 10
P = X.shape[1]


# In[686]:


XX.shape


# In[687]:


(np.dot(XX, np.ones((P, 1))) + np.ones((1, K))).shape


# In[ ]:


# In[688]:


K = 2
P = X.shape[1]
with pm.Model() as model:
    sigma = pm.HalfCauchy('sigma', beta=10, testval=1., shape=(K,))
    alpha = pm.Normal('alpha', mu=0., sd=1., testval=1., shape=(P, 1))
    mu_cluster = pm.Normal('mu_cluster', mu=0., sd=1., testval=1., shape=(1, K))
    pi = pm.Dirichlet('pi', a=0.1 * np.ones(K), shape=(K,))

    mu = pm.math.dot(XX, alpha) + mu_cluster
    likelihood = pm.NormalMixture('y', w=pi, mu=mu, sd=sigma, observed=df['y'])


# In[689]:


means, sds, elbos = pm.variational.advi(model=model, n=1000, learning_rate=1e-1)


# In[690]:


means


# In[691]:


weights = pm.distributions.transforms.stick_breaking.backward(means['pi_stickbreaking_']).eval()
plt.bar(np.arange(K), weights, color='k');


# In[692]:


inferred_cluster = []
for y, xx in zip(df.y[:, None], XX):
    mu = xx.dot(means['alpha'])
    RV = stats.norm(loc=mu + means['mu_cluster'], scale=np.exp(means['sigma_log_']))
    inferred_cluster.append(RV.logpdf(y).argmax())


# In[693]:


df['inferred_cluster'] = inferred_cluster


# In[694]:


pd.crosstab(df.cluster, df.inferred_cluster)


# In[714]:


for i, g in df.groupby('inferred_cluster'):
    for c, gc in g.groupby('cluster'):
        noise = np.random.rand(gc.shape[0]) * 0.5
        plt.scatter(i + gc.y * 0 + noise , gc.y, c=cm.Greys(c / 2 + 1 / 2), label='Ground Truth: {}'.format(c))

plt.xticks([0.25, 1.25], [0, 1]);
plt.xlabel('Inferred cluster')
plt.ylabel('Expression');


# In[ ]:


# In[ ]:


# \\[
# y_g \vert (z_i = k) \sim \mathcal{N}(\mu_{g,k} + \alpha_g \cdot x_\text{depth} + \beta_{g, j} \cdot x_{\text{batch} = j}, \sigma_{g, k})
# \\]

# In[720]:


noise2 = np.random.normal(size=(1000, 1))
df['noise2'] = noise2
df['y2'] = 1. + 1.2 * df['cluster'] + df['batch'] * 0.5 + df['depth'] * 1.8 + df['noise2'] * 0.25


# In[726]:


plt.scatter(df['y'], df['y2'], c=df.cluster);


# In[727]:


plt.scatter(df['y'] - df.depth * 2. - df.batch * 0.5, df['y2'] - df.depth * 1.8 - df.batch * 0.5, c=df.cluster);


# In[ ]:


# In[731]:


from pymc3.math import logsumexp


# In[732]:


# Log likelihood of normal distribution
def logp_normal(mu, tau, value):
    # log probability of individual samples
    k = tau.shape[0]
    delta = lambda mu: value - mu
    return (-1 / 2.) * (k * tt.log(2 * np.pi) + tt.log(1./det(tau)) +
                         (delta(mu).dot(tau) * delta(mu)).sum(axis=1))


# In[734]:


# Log likelihood of Gaussian mixture distribution
def logp_gmix(mus, pi, tau, n_samples):
    def logp_(value):
        logps = [tt.log(pi[i]) + logp_normal(mu, tau, value)
                 for i, mu in enumerate(mus)]

        return tt.sum(logsumexp(tt.stacklists(logps)[:, :n_samples], axis=0))

    return logp_


# In[800]:


G = 2
K = 2
with pm.Model() as model:
    mus = [pm.MvNormal('mu_%d' % i, mu=np.zeros(2), tau=0.1 * np.eye(G), shape=(G,)) for i in range(K)]
    sigma = pm.HalfCauchy('sigma', beta=10, testval=1.)
    alpha = pm.Normal('alpha', mu=0., sd=1., testval=1., shape=(P, G))
    pi = pm.Dirichlet('pi', a=0.1 * np.ones(K), shape=(K,))
    
    mu_tech = [pm.math.dot(X, alpha) + mu for mu in mus]
    yg = pm.DensityDist('Y', logp_gmix(mu_tech, pi, sigma * np.eye(K), df.shape[0]), observed=df[['y', 'y2']])


# In[805]:


means, sds, elbos = pm.variational.advi(model=model, n=1000, learning_rate=1e-1)


# In[806]:


means


# In[807]:


plt.scatter(df.y, df.y2)
plt.scatter(*means['mu_0'])
plt.scatter(*means['mu_1'])


# In[808]:


plt.plot(elbos);


# In[816]:


plt.scatter(df['y'] - df.depth * 2. - df.batch * 0.5, df['y2'] - df.depth * 1.8 - df.batch * 0.5, c=df.cluster);
plt.scatter(*means['mu_0'])
plt.scatter(*means['mu_1']);


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# In[730]:


K = 2
P = X.shape[1]
G = 2
with pm.Model() as model:
    sigma = pm.HalfCauchy('sigma', beta=10, testval=1., shape=(G, K))
    alpha = pm.Normal('alpha', mu=0., sd=1., testval=1., shape=(P, G))
    mu_cluster = pm.Normal('mu_cluster', mu=0., sd=1., testval=1., shape=(G, K))
    pi = pm.Dirichlet('pi', a=0.1 * np.ones(K), shape=(K,))

    mu = pm.math.dot(XX, alpha) + mu_cluster
    likelihood = pm.NormalMixture('y', w=pi, mu=mu, sd=sigma, observed=df[['y', 'y2']])


# In[ ]:


# In[ ]:


# In[254]:


# \\[
# \mathbf{y}_i \vert (z_i = k) \sim \mathcal{N}(\alpha_i \cdot \mathbf{\mu}_k, \beta_i \cdot \Sigma_k)
# \\]

# In[256]:


# In[262]:


# In[ ]:


# In[ ]:


# \\[
# \begin{align}
# y_{g, j} | (z_j = k) &\sim \mathcal{N}(X \cdot \alpha_{g, k}, \sigma^2_{g, k} )
# \end{align}
# \\]

# In[86]:


import patsy


# In[107]:


X = patsy.dmatrix('np.log(total_counts) + 1', sample_info, return_type='dataframe')

Y = np.log1p(counts.sample(10, axis=0).T)

K = 3
G = Y.shape[1]
P = X.shape[1]


# In[ ]:


with pm.Model() as model:
    alpha = pm.Normal('alpha', mu=0., sd=1., testval=1., shape=(G, K, P))


# In[118]:


alpha = np.ones((P, G, K))


# In[122]:


a_0 = alpha[:, :, 0]


# In[132]:


X.dot(a_0).shape


# In[130]:


Y.shape


# In[ ]: