import celltypist
from celltypist import models
import scanpy as sc
import pandas as pd
import numpy as np
import anndata
import re
import h5py
import scipy.sparse as scs
import concurrent.futures
import scanpy.external as sce
pip install scrublet
Requirement already satisfied: scrublet in /opt/conda/lib/python3.10/site-packages (0.2.3) Requirement already satisfied: cython in /opt/conda/lib/python3.10/site-packages (from scrublet) (0.29.36) Requirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from scrublet) (1.24.4) Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from scrublet) (1.11.1) Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from scrublet) (1.3.0) Requirement already satisfied: scikit-image in /opt/conda/lib/python3.10/site-packages (from scrublet) (0.21.0) Requirement already satisfied: matplotlib in /opt/conda/lib/python3.10/site-packages (from scrublet) (3.7.2) Requirement already satisfied: annoy in /opt/conda/lib/python3.10/site-packages (from scrublet) (1.17.3) Requirement already satisfied: numba in /opt/conda/lib/python3.10/site-packages (from scrublet) (0.57.1) Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from scrublet) (2.0.3) Requirement already satisfied: umap-learn in /opt/conda/lib/python3.10/site-packages (from scrublet) (0.5.4) Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (1.1.0) Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (4.42.0) Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (1.4.4) Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (23.1) Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (10.0.0) Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (3.0.9) Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.10/site-packages (from matplotlib->scrublet) (2.8.2) Requirement already satisfied: llvmlite<0.41,>=0.40.0dev0 in /opt/conda/lib/python3.10/site-packages (from numba->scrublet) (0.40.1) Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->scrublet) (2023.3.post1) Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->scrublet) (2023.3) Requirement already satisfied: networkx>=2.8 in /opt/conda/lib/python3.10/site-packages (from scikit-image->scrublet) (3.1) Requirement already satisfied: imageio>=2.27 in /opt/conda/lib/python3.10/site-packages (from scikit-image->scrublet) (2.31.1) Requirement already satisfied: tifffile>=2022.8.12 in /opt/conda/lib/python3.10/site-packages (from scikit-image->scrublet) (2023.7.18) Requirement already satisfied: PyWavelets>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-image->scrublet) (1.4.1) Requirement already satisfied: lazy_loader>=0.2 in /opt/conda/lib/python3.10/site-packages (from scikit-image->scrublet) (0.3) Requirement already satisfied: joblib>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->scrublet) (1.3.1) Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->scrublet) (3.2.0) Requirement already satisfied: pynndescent>=0.5 in /opt/conda/lib/python3.10/site-packages (from umap-learn->scrublet) (0.5.10) Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from umap-learn->scrublet) (4.66.1) Requirement already satisfied: tbb>=2019.0 in /opt/conda/lib/python3.10/site-packages (from umap-learn->scrublet) (2021.10.0) Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib->scrublet) (1.16.0) WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv Note: you may need to restart the kernel to use updated packages.
def read_mat(h5_con):
mat = scs.csc_matrix(
(h5_con['matrix']['data'][:], # Count values
h5_con['matrix']['indices'][:], # Row indices
h5_con['matrix']['indptr'][:]), # Pointers for column positions
shape = tuple(h5_con['matrix']['shape'][:]) # Matrix dimensions
)
return mat
def read_obs(h5con):
bc = h5con['matrix']['barcodes'][:]
bc = [x.decode('UTF-8') for x in bc]
# Initialized the DataFrame with cell barcodes
obs_df = pd.DataFrame({ 'barcodes' : bc })
# Get the list of available metadata columns
obs_columns = h5con['matrix']['observations'].keys()
# For each column
for col in obs_columns:
# Read the values
values = h5con['matrix']['observations'][col][:]
# Check for byte storage
if(isinstance(values[0], (bytes, bytearray))):
# Decode byte strings
values = [x.decode('UTF-8') for x in values]
# Add column to the DataFrame
obs_df[col] = values
return obs_df
# define a function to construct anndata object from a h5 file
def read_h5_anndata(h5_file):
h5_con = h5py.File(h5_file, mode = 'r')
# extract the expression matrix
mat = read_mat(h5_con)
# extract gene names
genes = h5_con['matrix']['features']['name'][:]
genes = [x.decode('UTF-8') for x in genes]
# extract metadata
obs_df = read_obs(h5_con)
# construct anndata
adata = anndata.AnnData(mat.T,
obs = obs_df)
# make sure the gene names aligned
adata.var_names = genes
adata.var_names_make_unique()
return adata
def get_last_pattern(inputstr):
pattern = r"[^/]+(?=$)"
match = re.search(pattern, inputstr)
if match:
return match.group(0)
else:
return ""
meta_data=pd.read_csv("hise_meta_data_2023-11-19.csv")
def process_file(file_name):
result = read_h5_anndata(file_name)
sc.external.pp.scrublet(result)
return result.obs[['barcodes','predicted_doublet','doublet_score']]
from concurrent.futures import ThreadPoolExecutor
results = []
with ThreadPoolExecutor(max_workers=20) as executor:
for result in executor.map(process_file, meta_data['file.path']):
results.append(result)
Automatically set threshold at doublet score = 0.35 Detected doublet rate = 1.7% Estimated detectable doublet fraction = 29.1% Overall doublet rate: Expected = 5.0% Estimated = 6.0% Automatically set threshold at doublet score = 0.68 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.7% Overall doublet rate: Expected = 5.0% Estimated = 3.6% Warning: failed to automatically identify doublet score threshold. Run `call_doublets` with user-specified threshold. Automatically set threshold at doublet score = 0.36 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 36.3% Overall doublet rate: Expected = 5.0% Estimated = 3.3% Automatically set threshold at doublet score = 0.72 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 0.3% Overall doublet rate: Expected = 5.0% Estimated = 3.2% Automatically set threshold at doublet score = 0.38 Detected doublet rate = 1.1% Estimated detectable doublet fraction = 27.6% Overall doublet rate: Expected = 5.0% Estimated = 3.9% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 4.1% Overall doublet rate: Expected = 5.0% Estimated = 1.4% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 2.1% Overall doublet rate: Expected = 5.0% Estimated = 2.4% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 2.4% Overall doublet rate: Expected = 5.0% Estimated = 2.0% Automatically set threshold at doublet score = 0.30 Detected doublet rate = 1.9% Estimated detectable doublet fraction = 40.4% Overall doublet rate: Expected = 5.0% Estimated = 4.7% Automatically set threshold at doublet score = 0.33 Detected doublet rate = 1.7% Estimated detectable doublet fraction = 35.3% Overall doublet rate: Expected = 5.0% Estimated = 4.7% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.8% Overall doublet rate: Expected = 5.0% Estimated = 3.0% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 2.0% Estimated detectable doublet fraction = 33.2% Overall doublet rate: Expected = 5.0% Estimated = 5.9% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 0.7% Overall doublet rate: Expected = 5.0% Estimated = 2.0% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.6% Estimated detectable doublet fraction = 42.8% Overall doublet rate: Expected = 5.0% Estimated = 3.8% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 1.8% Overall doublet rate: Expected = 5.0% Estimated = 3.7% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 2.4% Overall doublet rate: Expected = 5.0% Estimated = 2.7% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.9% Estimated detectable doublet fraction = 44.7% Overall doublet rate: Expected = 5.0% Estimated = 4.3% Automatically set threshold at doublet score = 0.30 Detected doublet rate = 2.9% Estimated detectable doublet fraction = 44.4% Overall doublet rate: Expected = 5.0% Estimated = 6.5% Automatically set threshold at doublet score = 0.48 Detected doublet rate = 0.5% Estimated detectable doublet fraction = 19.0% Overall doublet rate: Expected = 5.0% Estimated = 2.9% Automatically set threshold at doublet score = 0.39 Detected doublet rate = 1.0% Estimated detectable doublet fraction = 33.9% Overall doublet rate: Expected = 5.0% Estimated = 2.9% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.8% Estimated detectable doublet fraction = 40.1% Overall doublet rate: Expected = 5.0% Estimated = 4.4% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 1.3% Overall doublet rate: Expected = 5.0% Estimated = 4.0% Automatically set threshold at doublet score = 0.16 Detected doublet rate = 7.6% Estimated detectable doublet fraction = 52.4% Overall doublet rate: Expected = 5.0% Estimated = 14.6% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 0.0% Overall doublet rate: Expected = 5.0% Estimated = 10.0% Automatically set threshold at doublet score = 0.36 Detected doublet rate = 1.3% Estimated detectable doublet fraction = 31.4% Overall doublet rate: Expected = 5.0% Estimated = 4.2% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.6% Estimated detectable doublet fraction = 44.4% Overall doublet rate: Expected = 5.0% Estimated = 3.7% Automatically set threshold at doublet score = 0.43 Detected doublet rate = 0.9% Estimated detectable doublet fraction = 27.1% Overall doublet rate: Expected = 5.0% Estimated = 3.5% Automatically set threshold at doublet score = 0.33 Detected doublet rate = 1.5% Estimated detectable doublet fraction = 43.1% Overall doublet rate: Expected = 5.0% Estimated = 3.4% Automatically set threshold at doublet score = 0.47 Detected doublet rate = 0.7% Estimated detectable doublet fraction = 25.2% Overall doublet rate: Expected = 5.0% Estimated = 2.7% Automatically set threshold at doublet score = 0.44 Detected doublet rate = 0.7% Estimated detectable doublet fraction = 25.8% Overall doublet rate: Expected = 5.0% Estimated = 2.8% Automatically set threshold at doublet score = 0.15 Detected doublet rate = 8.6% Estimated detectable doublet fraction = 58.6% Overall doublet rate: Expected = 5.0% Estimated = 14.7% Automatically set threshold at doublet score = 0.16 Detected doublet rate = 8.6% Estimated detectable doublet fraction = 55.5% Overall doublet rate: Expected = 5.0% Estimated = 15.5% Automatically set threshold at doublet score = 0.68 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.9% Overall doublet rate: Expected = 5.0% Estimated = 1.9% Automatically set threshold at doublet score = 0.25 Detected doublet rate = 2.9% Estimated detectable doublet fraction = 47.5% Overall doublet rate: Expected = 5.0% Estimated = 6.0% Automatically set threshold at doublet score = 0.29 Detected doublet rate = 1.8% Estimated detectable doublet fraction = 44.9% Overall doublet rate: Expected = 5.0% Estimated = 4.0% Automatically set threshold at doublet score = 0.15 Detected doublet rate = 9.1% Estimated detectable doublet fraction = 58.6% Overall doublet rate: Expected = 5.0% Estimated = 15.5% Automatically set threshold at doublet score = 0.36 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 33.3% Overall doublet rate: Expected = 5.0% Estimated = 3.6% Automatically set threshold at doublet score = 0.33 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 40.1% Overall doublet rate: Expected = 5.0% Estimated = 3.0% Automatically set threshold at doublet score = 0.26 Detected doublet rate = 1.9% Estimated detectable doublet fraction = 54.0% Overall doublet rate: Expected = 5.0% Estimated = 3.5% Automatically set threshold at doublet score = 0.28 Detected doublet rate = 2.2% Estimated detectable doublet fraction = 43.0% Overall doublet rate: Expected = 5.0% Estimated = 5.1% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 2.3% Overall doublet rate: Expected = 5.0% Estimated = 3.8% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 0.8% Overall doublet rate: Expected = 5.0% Estimated = 5.5% Automatically set threshold at doublet score = 0.36 Detected doublet rate = 1.0% Estimated detectable doublet fraction = 32.3% Overall doublet rate: Expected = 5.0% Estimated = 3.2% Automatically set threshold at doublet score = 0.25 Detected doublet rate = 2.3% Estimated detectable doublet fraction = 51.7% Overall doublet rate: Expected = 5.0% Estimated = 4.5% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.9% Estimated detectable doublet fraction = 46.4% Overall doublet rate: Expected = 5.0% Estimated = 4.0% Automatically set threshold at doublet score = 0.32 Detected doublet rate = 1.4% Estimated detectable doublet fraction = 39.5% Overall doublet rate: Expected = 5.0% Estimated = 3.5% Automatically set threshold at doublet score = 0.69 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 8.5% Overall doublet rate: Expected = 5.0% Estimated = 1.7% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 1.5% Overall doublet rate: Expected = 5.0% Estimated = 3.1% Automatically set threshold at doublet score = 0.33 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 41.0% Overall doublet rate: Expected = 5.0% Estimated = 2.9% Automatically set threshold at doublet score = 0.32 Detected doublet rate = 1.5% Estimated detectable doublet fraction = 49.0% Overall doublet rate: Expected = 5.0% Estimated = 3.1% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.8% Estimated detectable doublet fraction = 45.4% Overall doublet rate: Expected = 5.0% Estimated = 3.9% Automatically set threshold at doublet score = 0.32 Detected doublet rate = 1.7% Estimated detectable doublet fraction = 42.5% Overall doublet rate: Expected = 5.0% Estimated = 4.0% Automatically set threshold at doublet score = 0.38 Detected doublet rate = 1.1% Estimated detectable doublet fraction = 32.5% Overall doublet rate: Expected = 5.0% Estimated = 3.2% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.6% Estimated detectable doublet fraction = 38.2% Overall doublet rate: Expected = 5.0% Estimated = 4.2% Automatically set threshold at doublet score = 0.47 Detected doublet rate = 0.6% Estimated detectable doublet fraction = 21.1% Overall doublet rate: Expected = 5.0% Estimated = 2.9% Automatically set threshold at doublet score = 0.36 Detected doublet rate = 1.3% Estimated detectable doublet fraction = 34.7% Overall doublet rate: Expected = 5.0% Estimated = 3.9% Automatically set threshold at doublet score = 0.38 Detected doublet rate = 1.0% Estimated detectable doublet fraction = 28.1% Overall doublet rate: Expected = 5.0% Estimated = 3.7% Automatically set threshold at doublet score = 0.52 Detected doublet rate = 0.3% Estimated detectable doublet fraction = 20.4% Overall doublet rate: Expected = 5.0% Estimated = 1.6% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.8% Estimated detectable doublet fraction = 44.0% Overall doublet rate: Expected = 5.0% Estimated = 4.2% Automatically set threshold at doublet score = 0.72 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.5% Overall doublet rate: Expected = 5.0% Estimated = 1.7% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.7% Estimated detectable doublet fraction = 41.0% Overall doublet rate: Expected = 5.0% Estimated = 4.3% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.4% Overall doublet rate: Expected = 5.0% Estimated = 3.0% Automatically set threshold at doublet score = 0.34 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 42.3% Overall doublet rate: Expected = 5.0% Estimated = 2.9% Automatically set threshold at doublet score = 0.67 Detected doublet rate = 0.2% Estimated detectable doublet fraction = 8.0% Overall doublet rate: Expected = 5.0% Estimated = 2.2% Automatically set threshold at doublet score = 0.66 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 7.7% Overall doublet rate: Expected = 5.0% Estimated = 1.8% Automatically set threshold at doublet score = 0.45 Detected doublet rate = 0.7% Estimated detectable doublet fraction = 20.6% Overall doublet rate: Expected = 5.0% Estimated = 3.4% Automatically set threshold at doublet score = 0.21 Detected doublet rate = 3.7% Estimated detectable doublet fraction = 50.2% Overall doublet rate: Expected = 5.0% Estimated = 7.4% Automatically set threshold at doublet score = 0.32 Detected doublet rate = 1.7% Estimated detectable doublet fraction = 36.6% Overall doublet rate: Expected = 5.0% Estimated = 4.6% Automatically set threshold at doublet score = 0.36 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 36.0% Overall doublet rate: Expected = 5.0% Estimated = 3.3% Automatically set threshold at doublet score = 0.29 Detected doublet rate = 2.0% Estimated detectable doublet fraction = 44.3% Overall doublet rate: Expected = 5.0% Estimated = 4.5% Automatically set threshold at doublet score = 0.25 Detected doublet rate = 2.5% Estimated detectable doublet fraction = 45.9% Overall doublet rate: Expected = 5.0% Estimated = 5.5% Automatically set threshold at doublet score = 0.28 Detected doublet rate = 2.3% Estimated detectable doublet fraction = 45.5% Overall doublet rate: Expected = 5.0% Estimated = 5.0% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 2.9% Overall doublet rate: Expected = 5.0% Estimated = 1.8% Automatically set threshold at doublet score = 0.68 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.9% Overall doublet rate: Expected = 5.0% Estimated = 1.7% Automatically set threshold at doublet score = 0.38 Detected doublet rate = 1.0% Estimated detectable doublet fraction = 31.3% Overall doublet rate: Expected = 5.0% Estimated = 3.3% Automatically set threshold at doublet score = 0.34 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 34.9% Overall doublet rate: Expected = 5.0% Estimated = 3.4% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.6% Estimated detectable doublet fraction = 33.5% Overall doublet rate: Expected = 5.0% Estimated = 4.7% Automatically set threshold at doublet score = 0.27 Detected doublet rate = 2.3% Estimated detectable doublet fraction = 43.9% Overall doublet rate: Expected = 5.0% Estimated = 5.3% Automatically set threshold at doublet score = 0.40 Detected doublet rate = 1.0% Estimated detectable doublet fraction = 32.5% Overall doublet rate: Expected = 5.0% Estimated = 3.1% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.8% Overall doublet rate: Expected = 5.0% Estimated = 2.3% Automatically set threshold at doublet score = 0.34 Detected doublet rate = 1.4% Estimated detectable doublet fraction = 36.1% Overall doublet rate: Expected = 5.0% Estimated = 3.8% Automatically set threshold at doublet score = 0.33 Detected doublet rate = 2.0% Estimated detectable doublet fraction = 40.5% Overall doublet rate: Expected = 5.0% Estimated = 5.0% Automatically set threshold at doublet score = 0.67 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 6.7% Overall doublet rate: Expected = 5.0% Estimated = 2.0% Automatically set threshold at doublet score = 0.42 Detected doublet rate = 0.9% Estimated detectable doublet fraction = 31.5% Overall doublet rate: Expected = 5.0% Estimated = 2.8% Automatically set threshold at doublet score = 0.26 Detected doublet rate = 2.7% Estimated detectable doublet fraction = 50.8% Overall doublet rate: Expected = 5.0% Estimated = 5.2% Automatically set threshold at doublet score = 0.47 Detected doublet rate = 0.6% Estimated detectable doublet fraction = 19.4% Overall doublet rate: Expected = 5.0% Estimated = 3.0% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 2.0% Estimated detectable doublet fraction = 41.8% Overall doublet rate: Expected = 5.0% Estimated = 4.8% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 1.5% Overall doublet rate: Expected = 5.0% Estimated = 3.4% Automatically set threshold at doublet score = 0.45 Detected doublet rate = 0.6% Estimated detectable doublet fraction = 18.9% Overall doublet rate: Expected = 5.0% Estimated = 3.2% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.5% Estimated detectable doublet fraction = 46.6% Overall doublet rate: Expected = 5.0% Estimated = 3.3% Automatically set threshold at doublet score = 0.63 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 4.2% Overall doublet rate: Expected = 5.0% Estimated = 2.0% Automatically set threshold at doublet score = 0.35 Detected doublet rate = 1.2% Estimated detectable doublet fraction = 41.7% Overall doublet rate: Expected = 5.0% Estimated = 2.9% Automatically set threshold at doublet score = 0.27 Detected doublet rate = 2.3% Estimated detectable doublet fraction = 42.9% Overall doublet rate: Expected = 5.0% Estimated = 5.4% Automatically set threshold at doublet score = 0.34 Detected doublet rate = 1.0% Estimated detectable doublet fraction = 34.6% Overall doublet rate: Expected = 5.0% Estimated = 3.0% Automatically set threshold at doublet score = 0.72 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 1.9% Overall doublet rate: Expected = 5.0% Estimated = 2.7% Automatically set threshold at doublet score = 0.40 Detected doublet rate = 0.7% Estimated detectable doublet fraction = 25.3% Overall doublet rate: Expected = 5.0% Estimated = 2.8% Automatically set threshold at doublet score = 0.37 Detected doublet rate = 1.3% Estimated detectable doublet fraction = 31.8% Overall doublet rate: Expected = 5.0% Estimated = 4.2% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.6% Overall doublet rate: Expected = 5.0% Estimated = 2.1% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 2.3% Overall doublet rate: Expected = 5.0% Estimated = 3.2% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 2.1% Overall doublet rate: Expected = 5.0% Estimated = 1.9% Automatically set threshold at doublet score = 0.71 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 1.4% Overall doublet rate: Expected = 5.0% Estimated = 4.2% Automatically set threshold at doublet score = 0.73 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 2.7% Overall doublet rate: Expected = 5.0% Estimated = 3.0% Automatically set threshold at doublet score = 0.69 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.7% Overall doublet rate: Expected = 5.0% Estimated = 1.9% Automatically set threshold at doublet score = 0.31 Detected doublet rate = 1.8% Estimated detectable doublet fraction = 40.8% Overall doublet rate: Expected = 5.0% Estimated = 4.3% Automatically set threshold at doublet score = 0.68 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 3.3% Overall doublet rate: Expected = 5.0% Estimated = 2.6% Automatically set threshold at doublet score = 0.70 Detected doublet rate = 0.1% Estimated detectable doublet fraction = 4.3% Overall doublet rate: Expected = 5.0% Estimated = 1.8% Automatically set threshold at doublet score = 0.73 Detected doublet rate = 0.0% Estimated detectable doublet fraction = 1.5% Overall doublet rate: Expected = 5.0% Estimated = 2.6%
final_result = pd.concat(results, ignore_index=True)
final_result.to_parquet('doublet_score.parquet')
final_result['predicted_doublet'].value_counts()
predicted_doublet False 2065817 True 27261 Name: count, dtype: int64
final_result['predicted_doublet'].value_counts()
predicted_doublet False 2066388 True 27399 Name: count, dtype: int64
27399/(2066388+27399)
0.013085858303638336