import os
import numpy as np
import pandas as pd
import anndata
import scanpy as sc
import matplotlib.pyplot as plt
import scgen
import seaborn as sns
from scipy import stats
import matplotlib
import scipy.sparse as sparse
font = {'family' : 'Arial',
'size' : 14}
matplotlib.rc('font', **font)
matplotlib.rc('ytick', labelsize=14)
matplotlib.rc('xtick', labelsize=14)
sc.set_figure_params(dpi_save=300)
/Users/Future/Library/Python/3.6/lib/python/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters Using TensorFlow backend.
path_to_save = "../results/Figures/Figure 2/"
os.makedirs(path_to_save, exist_ok=True)
sc.settings.figdir = path_to_save
pbmc = sc.read("../data/train_pbmc.h5ad")
pbmc.obs["cell_type"].replace("CD4T", "CD4-T", inplace=True)
pbmc.obs["cell_type"].replace("CD8T", "CD8-T", inplace=True)
pbmc.obs["cell_type"].replace("Dendritic", "DC", inplace=True)
pbmc.obs["cell_type"].replace("FCGR3A+Mono", "F-Mono", inplace=True)
pbmc.obs["cell_type"].replace("CD14+Mono", "CD14-Mono", inplace=True)
pbmc
AnnData object with n_obs × n_vars = 16893 × 6998 obs: 'condition', 'n_counts', 'n_genes', 'mt_frac', 'cell_type' var: 'gene_symbol', 'n_cells' uns: 'cell_type_colors', 'condition_colors', 'neighbors' obsm: 'X_pca', 'X_tsne', 'X_umap'
pbmc.obs['condition'].value_counts()
stimulated 8886 control 8007 Name: condition, dtype: int64
pbmc.obs['cell_type'].value_counts()
CD4-T 5564 F-Mono 3601 CD14-Mono 2561 B 1811 NK 1163 CD8-T 1115 DC 1078 Name: cell_type, dtype: int64
pbmc.obs.groupby(['condition', 'cell_type']).size()
condition cell_type control B 818 CD4-T 2437 CD8-T 574 CD14-Mono 1946 DC 615 F-Mono 1100 NK 517 stimulated B 993 CD4-T 3127 CD8-T 541 CD14-Mono 615 DC 463 F-Mono 2501 NK 646 dtype: int64
cd4t = pbmc[pbmc.obs["cell_type"] == "CD4-T"]
cd4t
View of AnnData object with n_obs × n_vars = 5564 × 6998 obs: 'condition', 'n_counts', 'n_genes', 'mt_frac', 'cell_type' var: 'gene_symbol', 'n_cells' uns: 'cell_type_colors', 'condition_colors', 'neighbors' obsm: 'X_pca', 'X_tsne', 'X_umap'
sc.set_figure_params(fontsize=14)
sc.pp.neighbors(pbmc)
sc.tl.umap(pbmc)
sc.pl.umap(pbmc, color=["condition"],
legend_fontsize=14,
palette=["#96a1a3", "#A4E804"],
save=f"_conditions.pdf",
show=True,
frameon=False)
os.rename(src=os.path.join(path_to_save, "umap_conditions.pdf"),
dst=os.path.join(path_to_save, "Fig2a_umap_conditions.pdf"))
... storing 'cell_type' as categorical
saving figure to file ../results/Figures/Figure 2/umap_conditions.pdf
sc.pp.neighbors(pbmc)
sc.tl.umap(pbmc)
sc.pl.umap(pbmc, color=["cell_type"],
legend_fontsize=14,
save=f"_celltypes.pdf",
show=True,
frameon=False)
os.rename(src=os.path.join(path_to_save, "umap_celltypes.pdf"),
dst=os.path.join(path_to_save, "Fig2a_umap_celltypes.pdf"))
saving figure to file ../results/Figures/Figure 2/umap_celltypes.pdf
pbmc_reconstructed = sc.read("../data/reconstructed/scGen/pbmc.h5ad")
pbmc_reconstructed
AnnData object with n_obs × n_vars = 24900 × 6998 obs: 'batch', 'cell_type', 'condition'
network = scgen.VAEArith(z_dimension=100,
x_dimension=pbmc.shape[1],
model_path="../models/scGen/pbmc/CD4T/scgen")
network.restore_model()
INFO:tensorflow:Restoring parameters from ../models/scGen/pbmc/CD4T/scgen
conditions = {"ctrl": "CD4T_ctrl", "pred_stim": "CD4T_pred_stim", "real_stim": "CD4T_real_stim"}
cd4t_reconstructed = pbmc_reconstructed[pbmc_reconstructed.obs["condition"].isin(conditions.values())]
cd4t_reconstructed
View of AnnData object with n_obs × n_vars = 8001 × 6998 obs: 'batch', 'cell_type', 'condition'
cd4t = pbmc[pbmc.obs["cell_type"] == "CD4-T"]
sc.tl.rank_genes_groups(cd4t, groupby="condition", n_genes=100, method="wilcoxon")
diff_genes_cd4t = cd4t.uns["rank_genes_groups"]["names"]["stimulated"]
def replace_conditions(adata):
adata.obs["condition"].replace("CD4T_ctrl", "CD4-T_ctrl", inplace=True)
adata.obs["condition"].replace("CD4T_real_stim", "CD4-T_real_stim", inplace=True)
adata.obs["condition"].replace("CD4T_pred_stim", "CD4-T_pred_stim", inplace=True)
adata.obs["condition"].replace("CD8T_ctrl", "CD8-T_ctrl", inplace=True)
adata.obs["condition"].replace("CD8T_real_stim", "CD8-T_real_stim", inplace=True)
adata.obs["condition"].replace("CD8T_pred_stim", "CD8-T_pred_stim", inplace=True)
adata.obs["condition"].replace("Dendritic_ctrl", "DC_ctrl", inplace=True)
adata.obs["condition"].replace("Dendritic_real_stim", "DC_real_stim", inplace=True)
adata.obs["condition"].replace("Dendritic_pred_stim", "DC_pred_stim", inplace=True)
adata.obs["condition"].replace("FCGR3A+Mono_ctrl", "F-Mono_ctrl", inplace=True)
adata.obs["condition"].replace("FCGR3A+Mono_real_stim", "F-Mono_real_stim", inplace=True)
adata.obs["condition"].replace("FCGR3A+Mono_pred_stim", "F-Mono_pred_stim", inplace=True)
adata.obs["condition"].replace("CD14+Mono_ctrl", "CD14-Mono_ctrl", inplace=True)
adata.obs["condition"].replace("CD14+Mono_real_stim", "CD14-Mono_real_stim", inplace=True)
adata.obs["condition"].replace("CD14+Mono_pred_stim", "CD14-Mono_pred_stim", inplace=True)
def replace_cell_types(adata):
adata.obs["cell_type"].replace("CD4T", "CD4-T", inplace=True)
adata.obs["cell_type"].replace("CD8T", "CD8-T", inplace=True)
adata.obs["cell_type"].replace("Dendritic", "DC", inplace=True)
adata.obs["cell_type"].replace("FCGR3A+Mono", "F-Mono", inplace=True)
adata.obs["cell_type"].replace("CD14+Mono", "CD14-Mono", inplace=True)
conditions = {"ctrl": "CD4-T_ctrl", "pred_stim": "CD4-T_pred_stim", "real_stim": "CD4-T_real_stim"}
replace_conditions(pbmc_reconstructed)
replace_conditions(cd4t_reconstructed)
replace_cell_types(pbmc_reconstructed)
replace_cell_types(cd4t_reconstructed)
sns.set()
scgen.plotting.reg_mean_plot(cd4t_reconstructed,
condition_key="condition",
axis_keys={"x": conditions["pred_stim"], "y": conditions["real_stim"]},
gene_list=diff_genes_cd4t[:5],
top_100_genes=diff_genes_cd4t,
path_to_save=os.path.join(path_to_save, f"Fig2b_reg_mean.pdf"),
legend=False,
labels={"x": "pred stim", "y":"real stim"},
show=True,
fontsize=20,
textsize=14,
title="CD4-T",
x_coeff=0.55,
range=[0, 5, 1])
0.9725512453489008 0.9713736215099469
print(cd4t_reconstructed.obs['condition'].value_counts())
CD4-T_real_stim 3127 CD4-T_pred_stim 2437 CD4-T_ctrl 2437 Name: condition, dtype: int64
def calc_R2(adata, cell_type, n_genes=6998, conditions=None):
if n_genes != adata.shape[1]:
celldata = adata.copy()[adata.obs["cell_type"] == cell_type]
print(celldata.obs["condition"].unique().tolist())
sc.tl.rank_genes_groups(celldata, groupby="condition", n_genes=n_genes, method="wilcoxon")
diff_genes = celldata.uns["rank_genes_groups"]["names"][conditions["real_stim"]]
adata = adata[:, diff_genes.tolist()]
r_values = np.zeros((1, 100))
real_stim = adata[adata.obs["condition"] == conditions["real_stim"]]
pred_stim = adata[adata.obs["condition"] == conditions["pred_stim"]]
for i in range(100):
pred_stim_idx = np.random.choice(range(0, pred_stim.shape[0]), int(0.8 * pred_stim.shape[0]))
real_stim_idx = np.random.choice(range(0, real_stim.shape[0]), int(0.8 * real_stim.shape[0]))
if sparse.issparse(pred_stim.X):
pred_stim.X = pred_stim.X.A
real_stim.X = real_stim.X.A
x = np.average(pred_stim.X[pred_stim_idx], axis=0)
y = np.average(real_stim.X[real_stim_idx], axis=0)
m, b, r_value, p_value, std_err = stats.linregress(x, y)
r_values[0, i] = r_value ** 2
return r_values.mean(), r_values.std()
def calc_R2_mean_var(adata, cell_types, n_genes):
r2_means, r2_vars = [], []
for cell_type in cell_types:
conditions = {"real_stim": cell_type+"_real_stim", "pred_stim": cell_type+"_pred_stim"}
r2_mean, r2_var = calc_R2(adata, cell_type, n_genes=n_genes, conditions=conditions)
r2_means.append(r2_mean)
r2_vars.append(r2_var)
return r2_means, r2_vars
def calc_R2_specific_model(adata, n_genes, conditions):
if n_genes != adata.shape[1]:
sc.tl.rank_genes_groups(adata, groupby="condition", n_genes=n_genes, method="wilcoxon")
diff_genes = adata.uns["rank_genes_groups"]["names"][conditions["real_stim"]]
adata = adata[:, diff_genes.tolist()]
r2_means, r2_vars = [], []
r_values = np.zeros((1, 100))
real_stim = adata[adata.obs["condition"] == conditions["real_stim"]]
pred_stim = adata[adata.obs["condition"] == conditions["pred_stim"]]
for i in range(100):
pred_stim_idx = np.random.choice(range(0, pred_stim.shape[0]), int(0.8 * pred_stim.shape[0]))
real_stim_idx = np.random.choice(range(0, real_stim.shape[0]), int(0.8 * real_stim.shape[0]))
if sparse.issparse(pred_stim.X):
pred_stim.X = pred_stim.X.A
real_stim.X = real_stim.X.A
x = np.average(pred_stim.X[pred_stim_idx], axis=0)
y = np.average(real_stim.X[real_stim_idx], axis=0)
m, b, r_value, p_value, std_err = stats.linregress(x, y)
r_values[0, i] = r_value ** 2
print(r_values.mean(), r_values.std())
return r_values.mean(), r_values.std()
cell_types_colnames = ["B", "CD14-Mono", "CD4-T", "CD8-T", "DC", "F-Mono", "NK"]
cell_types = ["B", "CD14-Mono", "CD4-T", "CD8-T", "DC", "F-Mono", "NK"]
r2_top100_means, r2_top100_vars = calc_R2_mean_var(pbmc_reconstructed, cell_types=cell_types_colnames, n_genes=100)
r2_all_means, r2_all_vars = calc_R2_mean_var(pbmc_reconstructed, cell_types=cell_types_colnames, n_genes=pbmc_reconstructed.shape[1])
... storing 'cell_type' as categorical ... storing 'condition' as categorical
['B_ctrl', 'B_pred_stim', 'B_real_stim']
... storing 'cell_type' as categorical ... storing 'condition' as categorical
['CD14-Mono_ctrl', 'CD14-Mono_pred_stim', 'CD14-Mono_real_stim']
... storing 'cell_type' as categorical ... storing 'condition' as categorical
['CD4-T_ctrl', 'CD4-T_pred_stim', 'CD4-T_real_stim']
... storing 'cell_type' as categorical ... storing 'condition' as categorical
['CD8-T_ctrl', 'CD8-T_pred_stim', 'CD8-T_real_stim']
... storing 'cell_type' as categorical ... storing 'condition' as categorical
['DC_ctrl', 'DC_pred_stim', 'DC_real_stim']
... storing 'cell_type' as categorical ... storing 'condition' as categorical
['F-Mono_ctrl', 'F-Mono_pred_stim', 'F-Mono_real_stim']
... storing 'cell_type' as categorical ... storing 'condition' as categorical
['NK_ctrl', 'NK_pred_stim', 'NK_real_stim']
all_means = np.concatenate((r2_top100_means, r2_all_means), axis=0)
all_vars = np.concatenate((r2_top100_vars, r2_all_vars), axis=0)
all_types = ["top 100 DEGs"] * 7 + ["all genes"] * 7
cell_types_x = 2 * cell_types
df = pd.DataFrame({"R^2 Means": all_means, "R^2 Stddevs": all_vars, "Type": all_types, "Cell Types": cell_types_x})
print(df[df["Type"] == "top 100 DEGs"].mean())
print(df[df["Type"] == "all genes"].mean())
df
R^2 Means 0.936241 R^2 Stddevs 0.002833 dtype: float64 R^2 Means 0.948563 R^2 Stddevs 0.001827 dtype: float64
R^2 Means | R^2 Stddevs | Type | Cell Types | |
---|---|---|---|---|
0 | 0.965182 | 0.002780 | top 100 DEGs | B |
1 | 0.853464 | 0.005439 | top 100 DEGs | CD14-Mono |
2 | 0.987577 | 0.000560 | top 100 DEGs | CD4-T |
3 | 0.985395 | 0.001464 | top 100 DEGs | CD8-T |
4 | 0.929247 | 0.003594 | top 100 DEGs | DC |
5 | 0.860960 | 0.004115 | top 100 DEGs | F-Mono |
6 | 0.971860 | 0.001881 | top 100 DEGs | NK |
7 | 0.961733 | 0.001258 | all genes | B |
8 | 0.915830 | 0.003233 | all genes | CD14-Mono |
9 | 0.970868 | 0.000890 | all genes | CD4-T |
10 | 0.971850 | 0.000942 | all genes | CD8-T |
11 | 0.949802 | 0.002273 | all genes | DC |
12 | 0.907202 | 0.002770 | all genes | F-Mono |
13 | 0.962656 | 0.001423 | all genes | NK |
def autolabel(rects):
"""
Attach a text label above each bar displaying its height
"""
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x() + rect.get_width() / 2., 1.01 * height,
'%.2f' % float(height),
ha='center', va='bottom', fontsize=18)
def grouped_barplot(df, cat, subcat, val, err, filename, put_label=False, legend=False, offset=0.375):
plt.close("all")
# import matplotlib
matplotlib.rc('ytick', labelsize=25)
matplotlib.rc('xtick', labelsize=30)
u = df[cat].unique()
x_pos = np.arange(0, 2*len(u), 2)
subx = df[subcat].unique()
plt.figure(figsize=(12, 10))
# g = sns.catplot(x=cat, y=val, hue=subcat, data=df, kind='bar', palette="muted", height=6, legend=False)
# g.despine(left=True)
# plt.yticks(np.arange(0, 1.2, 0.2))
# g.set_xticklabels(rotation=90)
# g.set_xlabels("")
for i, gr in enumerate(subx):
dfg = df[df[subcat] == gr]
b = plt.bar(x_pos + i/1.25, dfg[val].values, capsize=10, alpha=0.95, label=f"{gr}", yerr=dfg[err].values)
a=np.random.normal(dfg[val].values, dfg[err].values, (10, len(u)))
# print(a.shape)
# dfc=pd.DataFrame({'x': x_pos + i/1.25, 'y': a[0]})
plt.plot(x_pos + i/1.25, a.T, '.', color='black', alpha=0.5)
if put_label:
autolabel(b)
plt.ylabel(r"$\mathrm{R^2}$", fontsize=25)
plt.xticks(x_pos+offset, u, rotation=90)
if legend:
plt.legend(bbox_to_anchor=(1.05,0.5), loc="center left", borderaxespad=0, prop={'size': 18})
plt.tight_layout()
plt.savefig(os.path.join(path_to_save, filename), dpi=300)
plt.show()
sns.set()
grouped_barplot(df, "Cell Types", "Type", "R^2 Means", "R^2 Stddevs", legend=True, filename="Fig2c_celltypes_barplots.pdf")
stgan_reconstructed = sc.read("../data/reconstructed/CGAN/CGAN_CD4T.h5ad")
pca_reconstructed = sc.read("../data/reconstructed/PCAVecArithm/PCA_CD4T.h5ad")
ges_reconstructed = sc.read("../data/reconstructed/VecArithm/VecArithm_CD4T.h5ad")
cvae_reconstructed = sc.read("../data/reconstructed/CVAE/CVAE_CD4T.h5ad")
replace_conditions(cvae_reconstructed)
ctrl = cd4t_reconstructed[cd4t_reconstructed.obs["condition"] == "CD4-T_ctrl"]
real_stim = cd4t_reconstructed[cd4t_reconstructed.obs["condition"] == "CD4-T_real_stim"]
scgen_pred_stim = cd4t_reconstructed[cd4t_reconstructed.obs["condition"] == "CD4-T_pred_stim"]
stgan_pred_stim = stgan_reconstructed[stgan_reconstructed.obs["condition"] == "pred_stim"]
ges_pred_stim = ges_reconstructed[ges_reconstructed.obs["condition"] == "pred_stim"]
pca_pred_stim = pca_reconstructed[pca_reconstructed.obs["condition"] == "pred_stim"]
cvae_pred_stim = cvae_reconstructed[cvae_reconstructed.obs["condition"] == "CD4-T_pred_stim"]
if sparse.issparse(cvae_pred_stim.X):
cvae_pred_stim.X = cvae_pred_stim.X.A
ctrl.obs["condition"] = "control"
real_stim.obs["condition"] = "stimulation real"
scgen_pred_stim.obs["condition"] = "scGen"
cvae_pred_stim.obs["condition"] = "CVAE"
stgan_pred_stim.obs["condition"] = "style transfer GAN"
ges_pred_stim.obs["condition"] = "Vec. Arithm."
pca_pred_stim.obs["condition"] = "PCA + Vec. Arithm."
models = ctrl.concatenate(real_stim, scgen_pred_stim, cvae_pred_stim, stgan_pred_stim, ges_pred_stim, pca_pred_stim)
models.obs["condition"].unique()
array(['control', 'stimulation real', 'scGen', 'CVAE', 'style transfer GAN', 'Vec. Arithm.', 'PCA + Vec. Arithm.'], dtype=object)
var_temp = models.var_names.tolist()
var_temp[var_temp.index("ISG15")] = "ISG15 in CD4-T"
models.var_names = var_temp
font = {'family' : 'Arial',
'size' : 30}
sc.set_figure_params(fontsize=25)
sns.set()
matplotlib.rc('ytick', labelsize=14)
matplotlib.rc('xtick', labelsize=14)
sc.pl.violin(models,
keys="ISG15 in CD4-T",
groupby="condition",
rotation=90,
show=True,
order=models.obs["condition"].unique().tolist(),
color = "#ee0ef0",
save="_model_cmpr.pdf"
)
os.rename(src=os.path.join(path_to_save, "violin_model_cmpr.pdf"),
dst=os.path.join(path_to_save, "Fig2d_model_comparison.pdf"))
... storing 'cell_type' as categorical ... storing 'condition' as categorical
saving figure to file ../results/Figures/Figure 2/violin_model_cmpr.pdf
models.obs['condition'].value_counts()
stimulation real 3127 style transfer GAN 2437 scGen 2437 control 2437 Vec. Arithm. 2437 PCA + Vec. Arithm. 2437 CVAE 2437 Name: condition, dtype: int64
conditions = {"real_stim": "CD4-T_real_stim", "pred_stim": "CD4-T_pred_stim"}
scgen_r2_mean, scgen_r2_var = calc_R2_specific_model(cd4t_reconstructed, n_genes=6998, conditions=conditions)
cvae_r2_mean, cvae_r2_var = calc_R2_specific_model(cvae_reconstructed, n_genes=6998, conditions=conditions)
0.9707041000432634 0.0007938444616574701 0.9125767915273829 0.0007355002324178982
if sparse.issparse(cvae_reconstructed.X):
cvae_reconstructed.X = cvae_reconstructed.X.A
conditions = {"real_stim": "real_stim", "pred_stim": "pred_stim"}
ges_r2_mean, ges_r2_var = calc_R2_specific_model(ges_reconstructed, n_genes=6998, conditions=conditions)
pca_r2_mean, pca_r2_var = calc_R2_specific_model(pca_reconstructed, n_genes=6998, conditions=conditions)
stgan_r2_mean, stgan_r2_var = calc_R2_specific_model(stgan_reconstructed, n_genes=6998, conditions=conditions)
0.8939591026286667 0.001230348250776631 0.8963193518092095 0.0012488944098179996 0.13091020539366757 0.0005310207637818714
all_means = np.array([scgen_r2_mean, cvae_r2_mean, stgan_r2_mean, ges_r2_mean, pca_r2_mean])
all_vars = np.array([scgen_r2_var, cvae_r2_var, stgan_r2_var, ges_r2_var, pca_r2_var])
all_types = ["All genes"] * 5
models = ["scGen", "CVAE", "style transfer GAN", "Vec. Arithm.", "PCA + Vec. Arithm.", ]
df = pd.DataFrame({"R^2 Means": all_means, "R^2 Stddevs": all_vars, "Type": all_types, "Models": models})
df
R^2 Means | R^2 Stddevs | Type | Models | |
---|---|---|---|---|
0 | 0.970814 | 0.000820 | All genes | scGen |
1 | 0.912753 | 0.000748 | All genes | CVAE |
2 | 0.130864 | 0.000506 | All genes | style transfer GAN |
3 | 0.893962 | 0.001219 | All genes | Vec. Arithm. |
4 | 0.896281 | 0.001219 | All genes | PCA + Vec. Arithm. |
font = {'family' : 'Arial',
'size' : 2}
matplotlib.rc('font', **font)
matplotlib.rc('ytick', labelsize=20)
matplotlib.rc('xtick', labelsize=20)
grouped_barplot(df, "Models", "Type", "R^2 Means", "R^2 Stddevs", put_label=True, legend=False, filename="Fig2e_models_barplots.pdf", offset=0.0)
gene_list = ["CD3D", "CCL5", "GNLY", "CD79A", "FCGR3A", "S100A9", "HLA-DQA1",
"ISG15", "IFI6", "IFIT1", "CXCL10", "CXCL11", "APOBEC3A", "DEFB1",
"CCL8", "TARBP1"]
pbmc_reconstructed.obs["condition"].replace("CD14+Mono_ctrl", "CD14-Mono_ctrl", inplace=True)
pbmc_reconstructed.obs["condition"].replace("CD14+Mono_real_stim", "CD14-Mono_real_stim", inplace=True)
pbmc_reconstructed.obs["condition"].replace("CD14+Mono_pred_stim", "CD14-Mono_pred_stim", inplace=True)
pbmc_reconstructed.obs["condition"].replace("Dendritic_ctrl", "DC_ctrl", inplace=True)
pbmc_reconstructed.obs["condition"].replace("Dendritic_real_stim", "DC_real_stim", inplace=True)
pbmc_reconstructed.obs["condition"].replace("Dendritic_pred_stim", "DC_pred_stim", inplace=True)
pbmc_reconstructed.obs["condition"].replace("FCGR3A+Mono_ctrl", "F-Mono_ctrl", inplace=True)
pbmc_reconstructed.obs["condition"].replace("FCGR3A+Mono_real_stim", "F-Mono_real_stim", inplace=True)
pbmc_reconstructed.obs["condition"].replace("FCGR3A+Mono_pred_stim", "F-Mono_pred_stim", inplace=True)
sc.set_figure_params(fontsize=14)
sc.pl.dotplot(pbmc_reconstructed, var_names=gene_list, groupby="condition", save=".pdf", show=True)
os.rename(src=os.path.join(path_to_save, "dotplot.pdf"),
dst=os.path.join(path_to_save, "Fig2f_dotplot.pdf"))
... storing 'cell_type' as categorical ... storing 'condition' as categorical
saving figure to file ../results/Figures/Figure 2/dotplot.pdf
pbmc_reconstructed.obs['condition'].value_counts()
CD4-T_real_stim 3127 F-Mono_real_stim 2501 CD4-T_ctrl 2437 CD4-T_pred_stim 2437 CD14-Mono_pred_stim 1946 CD14-Mono_ctrl 1946 F-Mono_pred_stim 1100 F-Mono_ctrl 1100 B_real_stim 993 B_pred_stim 818 B_ctrl 818 NK_real_stim 646 CD14-Mono_real_stim 615 DC_ctrl 615 DC_pred_stim 615 CD8-T_pred_stim 574 CD8-T_ctrl 574 CD8-T_real_stim 541 NK_pred_stim 517 NK_ctrl 517 DC_real_stim 463 Name: condition, dtype: int64