import os,sys
import scanpy as sc
import anndata
import numpy as np
import pandas as pd
import re
%load_ext rpy2.ipython
%%R
library(tidyverse)
library(ggplot2)
R[write to console]: ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ── R[write to console]: ✔ ggplot2 3.3.2 ✔ purrr 0.3.4 ✔ tibble 3.0.4 ✔ dplyr 1.0.2 ✔ tidyr 1.1.2 ✔ stringr 1.4.0 ✔ readr 1.4.0 ✔ forcats 0.5.0 R[write to console]: ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ✖ dplyr::filter() masks stats::filter() ✖ dplyr::lag() masks stats::lag()
Annotations were saved on the GDrive in Pan_fetal/annotations
.
Original files are in original_files
. These come from different sources:
fetal_gut_anno.csv
was provided by Rasafetal_thymus_anno.csv
was saved from the obs
slot in the anndata object saved as /nfs/team205/ig7/Projects/Pan_fetal/JP_archive/18_Pan_fetal/HTA08.v01.A05.Science_human_fig1.h5ad
. I use as labels the most detailed annotation level here (Anno_level_5
)fetal_kidney_anno.csv
was saved from the obs
slot in the anndata downloaded from https://cellgeni.cog.sanger.ac.uk/BenKidney_v2.1/Fetal_full.h5ad
. I use as labels the column celltype
fetal_skin.annotation.20200829.for_Issac.tsv
was provided by NiThe original files are restricted to annotations only in Pan_fetal/annotations/*_annot.csv
files.
As started by Chenqu, we are translating detailed cell labels for each organ to a uniform labelling to use to guide data integration. Translations from old to new labels are saved in Pan_fetal/annotations/*_annot_lvls.csv
files.
annot_dir = '/home/jovyan/mount/gdrive/Pan_fetal/annotations/'
Making uniform labels for thymus cells
th_annot_lvls = {
"SP T CELL":['CD8+T', 'CD4+T', 'CD8αα(I)', 'αβT(entry)', 'CD8αα(II)', 'Th17', 'γδT', 'CD8+Tmem','CD4+Tmem', 'CD4+CTL', 'Tfh', 'CD4+PD1'],
"DP T CELL":['DP(P)', 'DP(Q)'],
"DN T CELL":['DN(Q)', 'DN(P)', "DN(early)"],
"TREG":['Treg(diff)', 'T(agonist)', "Treg"],
"MONOCYTE":["Mono"],
"B CELL":["B_naive", 'B_memory', 'B_plasma'],
"PRE PRO B CELL":["B_pro/pre"],
"DC":['DC2', 'DC1', 'aDC1', 'aDC3', 'aDC2'],
"PDC":['pDC'],
"NK T": ['NKT'],
"MACROPHAGE":["Mac"],
"NEUTROPHIL-MYELOID PROGENITOR":['NMP'],
"FIBROBLAST":["Fb_1", "Fb_2", "Fb_cycling"],
"NK":["NK"],
"MAST CELL":["Mast"],
"LYMPHOID PROGENITOR":["ETP"],
"ILC":["ILC3"],
'ERYTHROCYTE':["Ery"],
'SMOOTH MUSCLE/PERICYTE':["VSMC"],
'EPITHELIUM':['mTEC(I)','TEC(neuro)', 'mTEC(II)','cTEC','mcTEC', 'TEC(myo)', 'mTEC(III)','mTEC(IV)', 'Epi_GCM2'],
'ENDOTHELIAL CELL':["Endo"],
'MEGAKARYOCYTE':['Mgk'],
'LYMPHATIC':["Lymph"],
}
th_lvls_df = pd.DataFrame(pd.concat({k: pd.Series(v) for k, v in th_annot_lvls.items()})).reset_index()
th_lvls_df = th_lvls_df[[0,"level_0"]]
th_lvls_df.columns = ['original_cell_type',"updated_cell_type"]
th_lvls_df.index=th_lvls_df["original_cell_type"]
th_lvls_df = th_lvls_df.drop("original_cell_type",1)
th_lvls_df.to_csv(annot_dir + 'th_annot_lvls.csv')
Making uniform labels for gut cells
gu_annot = pd.read_csv(annot_dir + "gu_annot.csv")
gu_lvls_df = pd.DataFrame(gu_annot["cell.labels"].unique(), columns=['original_cell_type'])
gut_labels = [x.upper() for x in gu_annot["cell.labels"].unique()]
gut_labels = ["ENDOTHELIAL CELL" if "ENDOTHELIAL" in x else x for x in gut_labels]
gut_labels = ["ERYTHROCYTE" if "ERYTHROBLAST" in x else x for x in gut_labels]
gut_labels = ["NEUTROPHIL" if "NEUTROPHIL" in x else x for x in gut_labels]
gut_labels = ["DC" if "DC" in x and x!="PDC" else x for x in gut_labels]
gut_labels = ["FIBROBLAST" if "STROMA" in x else x for x in gut_labels]
gut_labels = ['LYMPHOID PROGENITOR' if x=="CLP" else x for x in gut_labels]
gut_labels = ["ENTEROENDOCRINE CELL" if "EEC" in x else x for x in gut_labels]
gut_labels = ["GLIA" if "GLIA" in x else x for x in gut_labels]
gut_labels = ["NEURON" if "NEURON" in x else x for x in gut_labels]
gut_labels = ["ENDOTHELIAL CELL" if "ENDOTHELIUM" in x else x for x in gut_labels]
gut_labels = ["ENTEROCYTE" if "ENTEROCYTE" in x else x for x in gut_labels]
gut_labels = ["CD16 MYELOID" if "MYELOID" in x else x for x in gut_labels]
gut_labels = ["SP T CELL" if x in ["CYCLING T", 'SELL+ CD4 T', 'SELL+ CD8 T'] else x for x in gut_labels]
gut_labels = ["NEURAL CREST" if "ENCC" in x else x for x in gut_labels]
gut_labels = ["ILC PRECURSOR" if "ILCP" in x else x for x in gut_labels]
gut_labels = ['LTI/ILC3' if 'LTI-LIKE ILC3' in x else x for x in gut_labels]
gut_labels = ['NK T' if 'NK/T' in x else x for x in gut_labels]
gut_labels = ['NK T' if 'NK T CELL' in x else x for x in gut_labels]
gut_labels = ['NK' if 'NK CELL' in x else x for x in gut_labels]
gut_labels = ['MACROPHAGE' if 'MACROPHAGE' in x else x for x in gut_labels]
gut_labels = ['MONOCYTE' if 'MONOCYTE' in x else x for x in gut_labels]
gut_labels = ['MAST CELL' if 'MAST' in x else x for x in gut_labels]
gut_labels = ['SCHWANN CELLS' if 'SCHWANN' in x else x for x in gut_labels]
gut_labels = ['SCHWANN CELLS' if 'SCP' in x else x for x in gut_labels]
gut_labels = ['B CELL' if 'IMMATURE B' in x else x for x in gut_labels]
gut_labels = ['PRE B CELL' if 'PRE-B' in x else x for x in gut_labels]
gut_labels = ['PRO B CELL' if 'PRO-B' in x else x for x in gut_labels]
gut_labels = ['SMOOTH MUSCLE/PERICYTE' if 'SMC' in x else x for x in gut_labels]
gu_lvls_df["updated_cell_type"] = gut_labels
gu_lvls_df.index = gu_lvls_df["original_cell_type"]
gu_lvls_df = gu_lvls_df.drop("original_cell_type",1)
gu_lvls_df.to_csv(annot_dir + 'gu_annot_lvls.csv')
Making uniform labels for kidney cells: I use the compartment annotation from the original file to group nephron annotations
N.B. in the original file there was a grand total of 12 cells with duplicated barcodes: I decided to exclude these
ki_annot = pd.read_csv(annot_dir + "ki_annot.csv")
ki_lvls_df = pd.DataFrame(ki_annot["cell.labels"].unique(), columns=['original_cell_type'])
ki_labels = [x.upper() for x in ki_annot["cell.labels"].unique()]
## Pick kidney specific labels
ki_anno_df = pd.read_csv(annot_dir + "original_files/fetal_kidney_anno.csv", index_col=0)
ki_anno_df = ki_anno_df[["celltype", "compartment"]].drop_duplicates()
ki_anno_df.celltype = [x.upper() for x in ki_anno_df.celltype]
fetal_nephron_labels = ki_anno_df[ki_anno_df.compartment=="fetal_nephron"]["celltype"].values
ki_labels = ['MACROPHAGE' if 'MACROPHAGE' in x else x for x in ki_labels]
ki_labels = ["SP T CELL" if " T CELL" in x else x for x in ki_labels]
ki_labels = ["NK" if "NK" in x else x for x in ki_labels]
ki_labels = ["B CELL" if "B CELL" in x else x for x in ki_labels]
ki_labels = ['FIBROBLAST' if x in ['PROLIFERATING FIBROBLAST', 'FIBROBLAST 1', 'FIBROBLAST 2'] else x for x in ki_labels]
ki_labels = ['MYOFIBROBLAST' if x in ['PROLIFERATING MYOFIBROBLAST', 'MYOFIBROBLAST 1', 'MYOFIBROBLAST 2'] else x for x in ki_labels]
ki_labels = ['DC' if "CDC" in x else x for x in ki_labels]
ki_labels = ['PDC' if "PDC" in x else x for x in ki_labels]
ki_labels = ['MONOCYTE' if "MONOCYTE" in x else x for x in ki_labels]
ki_labels = ['STROMA PROGENITOR' if "STROMA PROGENITOR" in x else x for x in ki_labels]
ki_labels = ['MAST CELL' if "MAST" in x else x for x in ki_labels]
ki_labels = ['ILC' if x=="INNATE LIKE LYMPHOCYTE" in x else x for x in ki_labels]
ki_labels = ['ENDOTHELIAL CELL' if "ENDOTHELIUM" in x else x for x in ki_labels]
ki_labels = ['NEPHRON' if x in fetal_nephron_labels else x for x in ki_labels]
ki_lvls_df["updated_cell_type"] = ki_labels
ki_lvls_df.index = ki_lvls_df["original_cell_type"]
ki_lvls_df = ki_lvls_df.drop("original_cell_type",1)
ki_lvls_df.to_csv(annot_dir + 'ki_annot_lvls.csv')
Updated from skin
sk_annot = pd.read_table(annot_dir + "original_files/fetal_skin.annotation.20200829.for_Issac.tsv", header=None)
sk_labels = [x.upper() for x in sk_annot.iloc[:,1].unique()]
sk_lvls_df = pd.read_csv(annot_dir + "original_files/sk_annot_lvls.csv")
sk_lvls_df.iloc[:,1] = [x.upper() for x in sk_lvls_df.iloc[:,1]]
sk_lvls_df.iloc[:,0] = [x.upper() for x in sk_lvls_df.iloc[:,0]]
sk_labels = [sk_lvls_df.updated_cell_type[sk_lvls_df.original_cell_type==x] if x in sk_lvls_df.original_cell_type else x for x in sk_labels]
sk_lvls_df = pd.DataFrame(sk_annot.iloc[:, 1].unique(), columns=['original_cell_type'])
sk_labels = ['GMP' if 'GRANULOCYTE PROGENITOR' in x else x for x in sk_labels]
sk_labels = ["GLIA" if "GLIA" in x else x for x in sk_labels]
sk_labels = ["FIBROBLAST" if "FIBROBLAST" in x else x for x in sk_labels]
sk_labels = ["ENDOTHELIAL CELL" if "ENDOTHELIUM" in x else x for x in sk_labels]
sk_labels = ["MONOCYTE/DC PRECURSOR" if "MONOCYTE PRECUSOR" in x else x for x in sk_labels]
sk_labels = ["NK" if x=="NK CELL" else x for x in sk_labels]
sk_labels = ["NK T" if x=="NK T CELL" else x for x in sk_labels]
sk_labels = ["SMOOTH MUSCLE/PERICYTE" if x=="SMOOTH MUSCLE" else x for x in sk_labels]
sk_labels = ["MUSCLE" if x=="SKELETAL MUSCLE" else x for x in sk_labels]
sk_labels = ["LTI/ILC3" if x in ["LTI", "ILC3"] else x for x in sk_labels]
sk_labels = ["MONOCYTE/MACROPHAGE" if x in ["MAC-MONO"] else x for x in sk_labels]
sk_labels = ["LANGERHAN CELLS" if x in ['LANGERHANS CELL'] else x for x in sk_labels]
sk_labels = ["NEURON" if x in ['NEURONAL CELLS'] else x for x in sk_labels]
sk_labels = ["SCHWANN CELLS" if x in ['SCHWANN CELL'] else x for x in sk_labels]
sk_labels = ["DC" if x in ['ASDC'] else x for x in sk_labels]
sk_labels = ["KERATINOCYTES" if x in ['SUPRABASAL KC', "HF KC"] else x for x in sk_labels]
sk_lvls_df["updated_cell_type"] = sk_labels
sk_lvls_df.index = sk_lvls_df["original_cell_type"]
sk_lvls_df = sk_lvls_df.drop("original_cell_type",1)
sk_lvls_df.to_csv(annot_dir + 'sk_annot_lvls.csv')
sk_lvls_df
updated_cell_type | |
---|---|
original_cell_type | |
Macrophage | MACROPHAGE |
Monocyte | MONOCYTE |
Granulocyte progenitor | GMP |
DC2 | DC2 |
Microglia | GLIA |
Monocyte precusor | MONOCYTE/DC PRECURSOR |
Fibroblast WNT2+ | FIBROBLAST |
NK cell | NK |
B cell | B CELL |
Lti | LTI/ILC3 |
ILC3 | LTI/ILC3 |
MEMP | MEMP |
CD4 T cell | CD4 T CELL |
HSC | HSC |
Mac-mono | MONOCYTE/MACROPHAGE |
Lymphatic endothelium | ENDOTHELIAL CELL |
NK T cell | NK T |
CD8 T cell | CD8 T CELL |
Skeletal muscle | MUSCLE |
pDC | PDC |
Mast cell | MAST CELL |
DC1 | DC1 |
Vascular endothelium | ENDOTHELIAL CELL |
Neutrophil | NEUTROPHIL |
Langerhans cell | LANGERHAN CELLS |
Megakaryocyte | MEGAKARYOCYTE |
Erythroid | ERYTHROID |
Myofibroblast | FIBROBLAST |
Fibroblast HF | FIBROBLAST |
Fibroblast HOX5+ | FIBROBLAST |
Fibroblast unknown | FIBROBLAST |
Pericyte | PERICYTE |
Fibroblast FRZB+ | FIBROBLAST |
Early erythroid | EARLY ERYTHROID |
Neuronal cells | NEURON |
Schwann cell | SCHWANN CELLS |
Smooth muscle | SMOOTH MUSCLE/PERICYTE |
ILC2 | ILC2 |
Lymphoid progenitor | LYMPHOID PROGENITOR |
Adipocyte | ADIPOCYTE |
Treg | TREG |
ASDC | DC |
Periderm | PERIDERM |
Melanocyte | MELANOCYTE |
Melanoblast | MELANOBLAST |
Basal KC | BASAL KC |
Suprabasal KC | KERATINOCYTES |
Fibroblast dermal papillia | FIBROBLAST |
HF KC | KERATINOCYTES |
annot_files = [x for x in os.listdir(annot_dir) if "annot.csv" in x]
lvls_annot_files = [x for x in os.listdir(annot_dir) if "annot_lvls" in x]
organs = [x.split("_annot")[0] for x in annot_files]
Load all translations to fix errors from manual annotation
lvls_labels_full = pd.DataFrame()
for file in lvls_annot_files:
## Read labels file
organ = file.split("_")[0]
lvls_labels_df = pd.read_csv(annot_dir + file)
lvls_labels_df["organ"] = organ
lvls_labels_full = pd.concat([lvls_labels_full, lvls_labels_df], 0)
Some notes:
lvls_labels_full["uniform_label"] = [x.upper() for x in lvls_labels_full.updated_cell_type]
## Fix some typos
lvls_labels_full["uniform_label"] = ['KUPFFER CELL' if x=='KUPFFER ELL' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['ENDOTHELIAL CELL' if x=='ENDOTHELIUM' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['EO/BASO/MAST PRECURSOR' if x=='EO/BASE/MAST PRECURSOR' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['MAST CELL' if x=='MAST CEL' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['NEUTROPHIL' if x=='NEUTROPHILE' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['NEUTROPHIL-MYELOID PROGENITOR' if x in ['NEUT-MYELOID PRECURSOR', 'NEUTROPHIL MYELOID PROGENITOR'] else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['PRE PRO B CELL' if x=='PRE PRO B' else x for x in lvls_labels_full["uniform_label"]]
# lvls_labels_full["uniform_label"] = ['PRE PRO B CELL' if x=='PRE TO PRO B CELL' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['PRO B CELL' if x=='PRO B' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['PRE B CELL' if x=='PRE B' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['NEURAL CREST' if 'NEURAL CREST' in x else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['GLIA' if 'GLIA' in x else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['SP T CELL' if x in ["CD8 T CELL", "CD4 T CELL", "T CELL"] else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['ERYTHROID' if x in ['ERYTHROID','YS ERYTHROID'] else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['DC' if x in ['DC1','DC2'] else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['ILC' if x=='ILC2' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full["uniform_label"] = ['ILC' if x=='ILC2' else x for x in lvls_labels_full["uniform_label"]]
lvls_labels_full.reset_index(inplace=True)
## Save uniform labelling
lvls_labels_full[['original_cell_type', "organ", "uniform_label"]].to_csv(annot_dir + 'uniform_lvls.csv')
lvls_labels_full = pd.read_csv(annot_dir + 'uniform_lvls.csv')
lvls_labels_full[lvls_labels_full.uniform_label_expanded_merged=="ERYTHROID"]
Unnamed: 0 | original_cell_type | organ | uniform_label | uniform_label_lvl0 | uniform_label_expanded_merged | |
---|---|---|---|---|---|---|
25 | 25 | immature EC | bm | ERYTHROCYTE | ERYTHROID | ERYTHROID |
59 | 59 | sinusoidal EC | bm | ERYTHROCYTE | ERYTHROID | ERYTHROID |
62 | 62 | tip EC | bm | ERYTHROCYTE | ERYTHROID | ERYTHROID |
79 | 79 | Enterocyte | gu | ENTEROCYTE | EPI | ERYTHROID |
83 | 83 | Erythroblast | gu | ERYTHROCYTE | ERYTHROID | ERYTHROID |
96 | 96 | BEST4 enterocyte | gu | ENTEROCYTE | EPI | ERYTHROID |
165 | 165 | Erythroid | ki | ERYTHROID | ERYTHROID | ERYTHROID |
241 | 241 | Erythroid | sk | ERYTHROID | ERYTHROID | ERYTHROID |
270 | 270 | Erythroid | sp | ERYTHROID | ERYTHROID | ERYTHROID |
351 | 351 | Ery | th | ERYTHROCYTE | ERYTHROID | ERYTHROID |
372 | 372 | Erythroid | ys | ERYTHROID | ERYTHROID | ERYTHROID |
%%R -i lvls_labels_full -w 1200 -h 900
lvls_labels_full %>%
arrange(uniform_label) %>%
mutate(original_cell_type=factor(original_cell_type, levels=unique(original_cell_type))) %>%
ggplot(aes(original_cell_type, uniform_label, fill=organ)) +
geom_tile() +
theme_bw(base_size=12) +
theme(axis.text.x=element_text(angle=90, hjust=1))
Add uniform labels to full cell annotation
cell_labels_full = pd.DataFrame()
for organ,file in zip(organs, annot_files):
## Read labels file
orig_labels_df = pd.read_csv(annot_dir + file, index_col=0)
orig_labels_df["organ"] = organ
if os.path.exists(annot_dir + organ + "_annot_lvls.csv"):
## Read labels translations
new_labels = lvls_labels_full[lvls_labels_full.organ==organ]
new_labels.index = new_labels["original_cell_type"]
## Add updated labels
orig_labels_df['uniform_label'] = new_labels.loc[orig_labels_df["cell.labels"]]['uniform_label'].values
cell_labels_full = pd.concat([cell_labels_full, orig_labels_df], 0)
cell_labels_full.to_csv(annot_dir + "uniform_labels_full.csv")
labels_count = cell_labels_full.reset_index()[["uniform_label", "organ", "index"]].groupby(["uniform_label", "organ"]).count().reset_index()
labels_count.columns = ["uniform_label", "organ", "n_cells"]
%%R -i labels_count -h 1000 -w 500
library(ggplot2)
library(dplyr)
labels_count %>%
ggplot(aes(uniform_label, n_cells, fill=organ)) +
geom_col() +
scale_y_log10() +
coord_flip()
lvl0 = pd.read_csv("/home/jovyan/mount/gdrive/Pan_fetal/annotations/lvl_0_unified_labels/uniform_labels_lvl0_20210118.csv", index_col=0)
lvl0.index = lvl0.uniform_label
# lvls_labels_full.drop("index",1, inplace=True)
lvls_labels_full["uniform_label_lvl0"] = lvl0.loc[lvls_labels_full.uniform_label]["uniform_label_lvl0"].values
lvls_labels_full["uniform_label_expanded_merged"] = lvl0.loc[lvls_labels_full.uniform_label]["uniform_label_expanded_merged"].values
lvls_labels_full
original_cell_type | updated_cell_type | organ | uniform_label | uniform_label_lvl0 | uniform_label_expanded_merged | |
---|---|---|---|---|---|---|
0 | adipo-CAR | Adipocyte | bm | ADIPOCYTE | ADIPOSE | ADIPOCYTE |
1 | arteriolar fibroblast | Fibroblast | bm | FIBROBLAST | STROMAL | FIBROBLAST |
2 | basophil | Basophil | bm | BASOPHIL | EO_BASO_MAST | BASOPHIL |
3 | CD14 monocyte | Monocyte | bm | MONOCYTE | MYELOID | MONOCYTE |
4 | CD4 T cell | CD4 T cell | bm | SP T CELL | LYMPHOID | SP T CELL |
... | ... | ... | ... | ... | ... | ... |
390 | MPP2 | MPP | ys | MPP | HSC_MPP | MPP |
391 | Neutrophil_precursor | Neutrophil precursor | ys | NEUTROPHIL PRECURSOR | MYELOID | NEUTROPHIL PRECURSOR |
392 | NK | NK | ys | NK | LYMPHOID | NK |
393 | Promonocyte | Promonocyte | ys | PROMONOCYTE | MYELOID | PROMONOCYTE |
394 | Smooth_Muscle | Muscle | ys | MUSCLE | MUSCLE | MUSCLE |
395 rows × 6 columns
## Save uniform labelling
lvls_labels_full[['original_cell_type', "organ", "uniform_label", "uniform_label_lvl0", "uniform_label_expanded_merged"]].to_csv(annot_dir + 'uniform_lvls.csv')
lvl0_matching = lvls_labels_full[['uniform_label', "uniform_label_lvl0", "uniform_label_expanded_merged"]].drop_duplicates()
lvl0_matching.index = lvl0_matching["uniform_label"]
lvl0_full = lvl0_matching.loc[cell_labels_full["uniform_label"]][["uniform_label_lvl0", "uniform_label_expanded_merged"]]
cell_labels_full["uniform_label_lvl0"] = lvl0_full["uniform_label_lvl0"].values
cell_labels_full["uniform_label_expanded_merged"] = lvl0_full["uniform_label_expanded_merged"].values
cell_labels_full.to_csv(annot_dir + "uniform_labels_full.csv")