This is a detailed tutorial for dot plot of regulation marker TFs. For a brief tutorial on every step, see main.ipynb.
import numpy as np
from dictys.net import network
from dictys.plot.static import compute_spec,fig_heatmap_reg_spec
from dictys.plot import dotplot,heatmap
d0=network.from_file('../../data/static.h5')
Dot plot of regulatory activity for top regulation marker TFs
#Color map
cmap='spring_r'
#Compute regulatory activity, expression, and their specificity for TFs
n,v,cpm,cpm_v,reg,reg_s=compute_spec(d0)
fig,ax=dotplot(n.loc[reg].iloc[::-1],v.loc[reg].iloc[::-1],cmap=cmap)
Dot plot of expression for top regulation marker TFs
fig2,ax2=dotplot(cpm.loc[reg].iloc[::-1],cpm_v.loc[reg].iloc[::-1],cmap=cmap)
#Node sizes to draw legend in dot plot for regulation
node_size_target=[20,100,200,500]
#Transformation from target count to node size for regulation
node_size_map_target=lambda x:x*(x<=10)+(10+(x-10)*500/10/490)*(x>10)
#Node sizes to draw legend in dot plot for expression (CPM)
node_size_cpm=[20,100,500,1000]
#Transformation from target count to node size for regulation
node_size_map_cpm=lambda x:x*(x<=10)+(10+(x-10)*1000/20/990)*(x>10)
fig,ax=dotplot(n.loc[reg].iloc[::-1],v.loc[reg].iloc[::-1],size_transform=node_size_map_target,sizes=np.array(node_size_target),cmap=cmap)
#Top labels
ax.tick_params(top=True,labeltop=True)
fig2,ax2=dotplot(cpm.loc[reg].iloc[::-1],cpm_v.loc[reg].iloc[::-1],size_transform=node_size_map_cpm,sizes=np.array(node_size_cpm),cmap=cmap)
ax2.tick_params(top=True,labeltop=True)
Cell cluster/type/state names selection & ordering
select_state='Progenitor,Erythroid,GMP,Mono,pDC,CLP,PreB,B,CD4M,CD8CM,CD8N,NK'.split(',')
n,v,cpm,cpm_v,reg,reg_s=compute_spec(d0,select_state=select_state)
fig,ax=dotplot(n.loc[reg].iloc[::-1],v.loc[reg].iloc[::-1],size_transform=node_size_map_target,sizes=np.array(node_size_target),cmap=cmap)
ax.tick_params(top=True,labeltop=True)
fig2,ax2=dotplot(cpm.loc[reg].iloc[::-1],cpm_v.loc[reg].iloc[::-1],size_transform=node_size_map_cpm,sizes=np.array(node_size_cpm),cmap=cmap)
ax2.tick_params(top=True,labeltop=True)
Parameters for regulation marker gene selection: stronger criteria as an example
#Specificity entropy level required (relative to random assignment) to select regulator. Lower means more specific.
min_entropy=0.5
#Minimum probability required to be selected
ncut=0.5
#Minimum number of targets required
vmin=25
#Number of top regulation marker TFs to discover for each state
nmax=8
n,v,cpm,cpm_v,reg,reg_s=compute_spec(d0,select_state=select_state,min_entropy=min_entropy,ncut=ncut,vmin=vmin,nmax=nmax)
fig,ax=dotplot(n.loc[reg].iloc[::-1],v.loc[reg].iloc[::-1],size_transform=node_size_map_target,sizes=np.array(node_size_target),cmap=cmap)
ax.tick_params(top=True,labeltop=True)
fig2,ax2=dotplot(cpm.loc[reg].iloc[::-1],cpm_v.loc[reg].iloc[::-1],size_transform=node_size_map_cpm,sizes=np.array(node_size_cpm),cmap=cmap)
ax2.tick_params(top=True,labeltop=True)
#Regulatory activity (target count)
n.head()
Progenitor | Erythroid | GMP | Mono | pDC | CLP | PreB | B | CD4M | CD8CM | CD8N | NK | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
AHR | 245 | 126 | 138 | 134 | 0 | 223 | 0 | 0 | 72 | 0 | 0 | 0 |
AR | 102 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ARID3A | 0 | 9 | 12 | 1 | 11 | 3 | 2 | 0 | 0 | 0 | 0 | 0 |
ARID5B | 0 | 11 | 6 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 1 |
ARNT | 135 | 169 | 67 | 148 | 0 | 47 | 146 | 0 | 0 | 0 | 0 | 0 |
#Regulation cell-type specificity (normalized proportion of targets from each cell type)
v.head()
Progenitor | Erythroid | GMP | Mono | pDC | CLP | PreB | B | CD4M | CD8CM | CD8N | NK | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
AHR | 0.223546 | 0.126423 | 0.132682 | 0.152066 | 0.000000 | 0.261088 | 0.000000 | 0.00000 | 0.104196 | 0.0 | 0.0 | 0.000000 |
AR | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.0 | 0.0 | 0.000000 |
ARID3A | 0.000000 | 0.183605 | 0.234584 | 0.023073 | 0.425682 | 0.071415 | 0.061640 | 0.00000 | 0.000000 | 0.0 | 0.0 | 0.000000 |
ARID5B | 0.000000 | 0.425021 | 0.222149 | 0.000000 | 0.000000 | 0.090172 | 0.058373 | 0.08974 | 0.000000 | 0.0 | 0.0 | 0.114545 |
ARNT | 0.153693 | 0.211575 | 0.080376 | 0.209561 | 0.000000 | 0.068659 | 0.276136 | 0.00000 | 0.000000 | 0.0 | 0.0 | 0.000000 |
#Expression (CPM)
cpm.head()
Progenitor | Erythroid | GMP | Mono | pDC | CLP | PreB | B | CD4M | CD8CM | CD8N | NK | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
A1BG | 53.948733 | 19.833701 | 65.911924 | 70.086891 | 291.099448 | 96.716101 | 105.474197 | 89.578922 | 85.796162 | 117.408482 | 189.475253 | 0.0 |
A1BG-AS1 | 5.475999 | 0.000000 | 7.294939 | 6.371536 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 |
A2M-AS1 | 10.546369 | 7.024436 | 4.977723 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 37.580137 | 77.809172 | 0.000000 | 0.0 |
AAAS | 26.501132 | 32.229764 | 26.175959 | 14.703543 | 0.000000 | 30.320608 | 40.303148 | 0.000000 | 30.135016 | 0.000000 | 0.000000 | 0.0 |
AACS | 12.371702 | 11.259757 | 11.757726 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 |
#Expression cell-type specificity (proportion of CPM from each cell type)
cpm_v.head()
Progenitor | Erythroid | GMP | Mono | pDC | CLP | PreB | B | CD4M | CD8CM | CD8N | NK | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
A1BG | 0.045514 | 0.016733 | 0.055606 | 0.059129 | 0.245585 | 0.081594 | 0.088983 | 0.075573 | 0.072382 | 0.099051 | 0.15985 | 0.0 |
A1BG-AS1 | 0.286065 | 0.000000 | 0.381087 | 0.332848 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.0 |
A2M-AS1 | 0.076457 | 0.050925 | 0.036087 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.272443 | 0.564089 | 0.00000 | 0.0 |
AAAS | 0.132262 | 0.160852 | 0.130639 | 0.073382 | 0.000000 | 0.151324 | 0.201144 | 0.000000 | 0.150397 | 0.000000 | 0.00000 | 0.0 |
AACS | 0.349590 | 0.318169 | 0.332241 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.0 |
#Regulation markers discovered
reg
array(['AR', 'HLF', 'ZNF418', 'THRB', 'YY2', 'ETV2', 'STAT4', 'HESX1', 'GATA1', 'GATA2', 'ZNF713', 'NR1H3', 'TAL1', 'KLF1', 'HOXB4', 'GFI1B', 'ZBTB7B', 'CREB5', 'KLF5', 'VDR', 'MAFB', 'SNAI1', 'CEBPE', 'PRDM1', 'BATF3', 'ZNF467', 'TCF7L2', 'PPARA', 'IRF4', 'SP4', 'ZBTB17', 'SMAD1', 'E2F7', 'SPIB', 'MAF', 'MAFF', 'ASCL2', 'GATA3'], dtype='<U12')
#Corresponding cell cluster/type/state name of each regulation marker
reg_s
array(['Progenitor', 'Progenitor', 'Progenitor', 'Progenitor', 'Progenitor', 'Progenitor', 'Progenitor', 'Progenitor', 'Erythroid', 'Erythroid', 'Erythroid', 'Erythroid', 'Erythroid', 'Erythroid', 'Erythroid', 'Erythroid', 'GMP', 'GMP', 'GMP', 'GMP', 'Mono', 'Mono', 'Mono', 'Mono', 'Mono', 'Mono', 'Mono', 'pDC', 'pDC', 'CLP', 'CLP', 'CLP', 'PreB', 'B', 'CD8CM', 'NK', 'NK', 'NK'], dtype='<U10')