#!/usr/bin/env python
# coding: utf-8

# *First compiled on: December 3, 2016.*

# # Simulating myeloid progenitors

# In[1]:


import numpy as np
import scanpy.api as sc

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures
sc.logging.print_versions()


# Here, we simulate data using a literature-curated boolean gene
# regulatory network, which is believed to describe myeloid differentiation
# ([Krumsiek *et al.*, 2011](https://doi.org/10.1371/journal.pone.0022649)). Using [sim](https://github.com/theislab/scanpy/tree/master/scanpy/tools/sim.py), the
# [boolean model](https://github.com/theislab/scanpy/tree/master/scanpy/sim_models/krumsiek11.txt)
# ```
# Gata2 = Gata2 and not (Gata1 and Fog1) and not Pu.1
# Gata1 = (Gata1 or Gata2 or Fli1) and not Pu.1
# Fog1 = Gata1
# EKLF = Gata1 and not Fli1
# Fli1 = Gata1 and not EKLF
# SCL = Gata1 and not Pu.1
# Cebpa = Cebpa and not (Gata1 and Fog1 and SCL)
# Pu.1 = (Cebpa or Pu.1) and not (Gata1 or Gata2)
# cJun = Pu.1 and not Gfi1
# EgrNab = (Pu.1 and cJun) and not Gfi1
# Gfi1 = Cebpa and not EgrNab
# ```
# is translated into a stochastic differential equation ([Wittmann *et al.*, 2009](#ref_wittmann09)). Simulations result
# in branching time series of gene expression, where each branch corresponds to a
# certain cell fate of common myeloid progenitors (megakaryocytes, erythrocytes,
# granulocytes and monocytes).

# Instead of simulating the data as in the next line, you can also retrieve it from the builtin examples
# ```
# adata = sc.datasets.krumsiek11()
# ```

# This uses the default parameters from [here](https://github.com/theislab/scanpy/tree/master/scanpy/sim_models/krumsiek11_params.txt). To change parameters, read in a different parameter file or simply pass them to `sc.tl.sim`.

# In[2]:


adata = sc.tl.sim('krumsiek11')


# Plot the four realizations of time series.

# In[3]:


sc.pl.sim(adata)


# Compute further visualizations.

# In[4]:


sc.tl.tsne(adata)


# In[7]:


sc.pp.neighbors(adata, n_neighbors=30)
sc.tl.draw_graph(adata)


# Inspecting the genes of the fixed poitns, we can make the following annotation.

# In[8]:


fate_labels = {0: 'progenitor', 159: 'monocyte', 319: 'erythrocyte', 
               459: 'megacaryocyte', 619: 'neutrophil'}
adata.uns['highlights'] = fate_labels
cell_type = np.array(['progenitor' for i in range(adata.n_obs)])                                                                                                               
cell_type[80:160] = 'monocyte'                                                                                                                                                  
cell_type[240:320] = 'erythrocyte'                                                                                                                                              
cell_type[400:480] = 'megakaryocyte'                                                                                                                                            
cell_type[560:640] = 'neutrophil'
adata.obs['cell_type'] = cell_type


# In[9]:


sc.pl.tsne(adata)
sc.pl.draw_graph(adata)


# In[10]:


sc.pl.tsne(adata, color='cell_type')
sc.pl.draw_graph(adata, color='cell_type')


# ### Reconstructing progression and branching using DPT

# In[13]:


adata.uns['iroot'] = 0
sc.tl.dpt(adata, n_branchings=2)


# In[14]:


sc.pl.tsne(adata, color='dpt_pseudotime')
sc.pl.draw_graph(adata, color='dpt_pseudotime')
sc.pl.diffmap(adata, color='dpt_pseudotime', projection='3d')


# The "cuts" into branches are quite arbitrary. Use PAGA instead.

# In[15]:


sc.pl.tsne(adata, color='dpt_groups')
sc.pl.draw_graph(adata, color='dpt_groups', title='simulated data: DPT groups')
sc.pl.diffmap(adata, color='dpt_groups', projection='3d')