import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from astropy.cosmology import FlatLambdaCDM
from astropy.coordinates import SkyCoord
from astropy.stats import bootstrap
from astropy import units as u
import cmasher as cmr
from easyquery import Query
from functools import partial
from pathlib import Path
from scipy.ndimage import gaussian_filter1d
from tqdm.notebook import tqdm
cosmo = FlatLambdaCDM(H0=70, Om0=0.3)
ROOT = Path('.').resolve()
data_dir = ROOT / "data"
results_dir = ROOT / "results/xSAGA"
figures_dir = results_dir / "paper-figures"
from astropy.utils import NumpyRNGContext
rng_context = NumpyRNGContext(42)
rng = np.random.default_rng()
import sys
sys.path.append(f'{ROOT}/xsaga')
from utils import mass2color, gap2color
from colossus.halo import mass_so
from colossus.cosmology import cosmology
cosmo_params = {'flat': True, 'H0': 70.0, 'Om0': 0.3, 'Ob0': 0.049, 'sigma8': 0.81, 'ns': 0.95}
colossus_cosmo = cosmology.setCosmology('myCosmo', cosmo_params)
M_r_lim = -15
HOST_QUERY = Query("mass_GSE > 9.5", "mass_GSE < 11", "z_NSA >= 0.01", "z_NSA <= 0.03")
SAT_QUERY = Query(f"M_r < {M_r_lim}")
plt.rcParams['font.size'] = 14
plt.rcParams['figure.dpi'] = 300
plt.rcParams['axes.labelsize'] = 'large'
plt.rcParams['legend.frameon'] = False
plt.rcParams['legend.framealpha'] = 1
plt.rcParams['lines.markersize'] = 3
plt.rcParams['xtick.minor.visible'] = False
plt.rcParams['ytick.minor.visible'] = False
plt.rcParams['xtick.major.size'] = 5
plt.rcParams['ytick.major.size'] = 5
from PIL import Image
import requests
from io import BytesIO
import os
params_dict = dict(pixscale=0.262, size=144, layer="ls-dr9")
base_url = "http://legacysurvey.org/viewer-dev/jpeg-cutout/"
cache_dir = f"{ROOT}/.cache/"
def load_image(*args, **kwargs):
response = requests.get(*args, **kwargs)
return response.content
def get_satellite_image(objid, ra, dec):
fname = f"{objid}.jpg"
fpath = os.path.join(cache_dir, fname)
try:
img = open(fpath, "rb").read()
except FileNotFoundError:
params_dict["ra"] = ra
params_dict["dec"] = dec
img = load_image(base_url, params_dict)
open(fpath, "wb").write(img)
return Image.open(BytesIO(img))
saga = pd.read_csv(ROOT / 'data/saga_redshifts_2021-02-19.csv', index_col='OBJID', dtype={'OBJID': np.int64})
xsaga = pd.read_parquet(ROOT / 'results/xSAGA/lowz-p0_5.parquet')
xsaga.set_index('objID', inplace=True)
preds = pd.read_csv(
ROOT / 'results/predictions-dr9.csv',
index_col='objID',
dtype={'objID': str},
usecols=['objID', 'ra', 'dec', 'p_CNN']
)
A few spectroscopically confirmed examples of $z < 0.03$ and $z > 0.03$ galaxies from the SAGA survey.
fig, axes = plt.subplots(2, 3, figsize=(9, 6), dpi=300)
# top row of figures
for ax, [objid, (ra, dec)] in zip(
axes[0].flat,
saga.loc[[901716660000005714, 903470840000004935, 902830140000001924]][['RA', 'DEC']].iterrows()
):
ax.imshow(get_satellite_image(objid, ra, dec))
ax.set_axis_off()
ax.text(0.5, 0.1, r'high-$z$', ha='center', transform=ax.transAxes, color='white', fontsize=16)
# bottom row of figures
for ax, [objid, (ra, dec)] in zip(
axes[1].flat,
saga.loc[[901678900000005438, 903613300000002892, 903440480000002696]][['RA', 'DEC']].iterrows()
):
ax.imshow(get_satellite_image(objid, ra, dec))
ax.set_axis_off()
ax.text(0.5, 0.1, r'low-$z$', ha='center', transform=ax.transAxes, color='white', fontsize=16)
fig.subplots_adjust(
left=0.05,
right=0.95,
bottom=0.025,
top=0.925,
wspace=0.025,
hspace=0.025
)
fig.suptitle("SAGA (spectroscopic)", fontsize=20);
fig.savefig(figures_dir / "examples-saga.pdf")
A few examples of CNN-identified low-$z$ and high-$z$ candidates.
fig, axes = plt.subplots(2, 3, figsize=(9, 6), dpi=300)
# top row of figures
for ax, [objid, (ra, dec, p_CNN)] in zip(
axes[0].flat,
preds.loc[[1237667225546391759, 1237668315384709716, 1237674285925270037]][['ra', 'dec', 'p_CNN']].iterrows()
):
ax.imshow(get_satellite_image(objid, ra, dec))
ax.set_axis_off()
ax.text(0.5, 0.1, r'$p_{\rm CNN}$' f' = {p_CNN:0.2f}', ha='center', transform=ax.transAxes, color='white', fontsize=16)
# bottom row of figures
for ax, [objid, (ra, dec, p_CNN)] in zip(
axes[1].flat,
xsaga.loc[[1237655107834151579, 1237667781733844044, 1237661949719019863]][['ra', 'dec', 'p_CNN']].iterrows()
):
ax.imshow(get_satellite_image(objid, ra, dec))
ax.set_axis_off()
ax.text(0.5, 0.1, r'$p_{\rm CNN}$' f' = {p_CNN:0.2f}', ha='center', transform=ax.transAxes, color='white', fontsize=16)
fig.subplots_adjust(
left=0.05,
right=0.95,
bottom=0.025,
top=0.925,
wspace=0.025,
hspace=0.025
)
fig.suptitle("xSAGA (CNN-identified)", fontsize=20);
fig.savefig(figures_dir / "examples-xsaga.pdf")
The Receiving Operating Characteristic curve, used to show how a model trades off true positives and false positives. We'd like to maximize the area under the curve, or AUC.
from cnn_evaluation import *
cv = load_saga_crossvalidation()
cv["ranking"] = cv.p_CNN.rank(ascending=False)
cv["M_r"] = cv.r_mag - cosmo.distmod(z=cv.Z)
def plot_roc_curve(
df,
K=4,
color="#003f5c",
label="FL-hdxresnet34",
fig=None,
ax=None,
figname="roc-curve.png",
):
"""Plot one or more ROC curves.
"""
if (fig is None) or (ax is None):
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=300)
for k in range(K):
k = k + 1
q = Query(f"kfold == {k}")
fpr, tpr, thresholds = roc_curve(q.filter(df).low_z, q.filter(df).p_CNN)
ax.plot(fpr, tpr, c=color, lw=1, alpha=0.5, zorder=1)
# plot mean trend and mean scores
score = roc_auc_score(df.low_z, df.p_CNN)
fpr, tpr, thresholds = roc_curve(df.low_z, df.p_CNN)
ax.plot(fpr, tpr, c=color, lw=3, zorder=5, label=f"{label}\n(AUC = {score:.3f})")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.grid(alpha=0.15)
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc="lower right", fontsize=14)
ax.plot([0, 1], [0, 1], ls="--", lw=1, c="k")
return fig, ax
# start with p_sat from SAGA II
saga_psat = cv.copy()
saga_psat["p_CNN"] = saga_psat.p_sat_approx
fig, ax = plot_roc_curve(
saga_psat, color="#7a5195", label=r"SAGA $\mathcal{R}_{\rm sat}$", figname=None
)
# get resnet cross-validation and add to ROC curve
saga_resnet_cv = load_saga_crossvalidation(
catalogname="saga-validation_resnet34.csv"
)
fig, ax = plot_roc_curve(
saga_resnet_cv, color="#ef5675", label="resnet34", fig=fig, ax=ax, figname=None
)
# finally include highly optimized CNN
plot_roc_curve(
cv,
color="#ffa600",
label="FL-hdxresnet34",
fig=fig,
ax=ax,
figname="roc-curve.png",
)
plt.tight_layout()
plt.savefig(figures_dir / "roc-curve.pdf")
The completeness, purity, and geometric mean of the two, for CNN predictions cross-validated from the SAGA survey. These metrics are plotted in bins of various galaxy photometric properties, such as $r$, $g-r$, $\mu_{r,\rm eff}$, and $M_r$.
def plot_comparison_by_X(
df,
X,
X_min,
X_max,
delta_X,
X_label,
figname,
z_thresh=0.03,
p_cnn_thresh=0.5,
N_boot=100,
label="fraction",
legend=True,
):
"""Compare metrics as a function of column X.
If N_boot is an integer, then it will bootstrap resample each metric `N_boot` times.
The param `label` can be either "count" or "fraction", depending on whether the
total number or fraction of low-z galaxies should be labeled at the top of the
figure.
"""
q_true = Query(f"Z < {z_thresh}")
q_pred = Query(f"p_CNN > {p_cnn_thresh}")
X_bins = np.arange(X_min, X_max, delta_X)
X_queries = [
Query(f"{X} >= {x1}", f"{X} < {x2}") for x1, x2 in zip(X_bins, X_bins + delta_X)
]
fig, ax = plt.subplots(1, 1, figsize=(5, 3), dpi=300)
# get the True/False mask for labels and predictions for each q (magnitude bin)
def completeness_score_bootfunc(x):
return recall_score(q_true.mask(x), q_pred.mask(x))
def purity_score_bootfunc(x):
return precision_score(q_true.mask(x), q_pred.mask(x))
boot_completeness = np.array(
[
[
completeness_score_bootfunc(q.filter(df).sample(frac=1, replace=True))
for _ in range(N_boot)
]
for q in X_queries
]
)
boot_purity = np.array(
[
[
purity_score_bootfunc(q.filter(df).sample(frac=1, replace=True))
for _ in range(N_boot)
]
for q in X_queries
]
)
boot_geometric_mean = np.sqrt(boot_completeness * boot_purity)
ax.plot(
X_bins + delta_X / 2,
boot_completeness.mean(1),
c="#ff6361",
lw=2,
label="Completeness",
zorder=1,
)
ax.fill_between(
X_bins + delta_X / 2,
*np.quantile(boot_completeness, [0.16, 0.84], axis=1),
color="#ff6361",
lw=0,
zorder=1,
alpha=0.3,
)
ax.plot(
X_bins + delta_X / 2,
boot_purity.mean(1),
c="#ffa600",
lw=2,
label="Purity",
zorder=2,
)
ax.fill_between(
X_bins + delta_X / 2,
*np.quantile(boot_purity, [0.16, 0.84], axis=1),
color="#ffa600",
lw=0,
zorder=2,
alpha=0.3,
)
ax.plot(
X_bins + delta_X / 2,
boot_geometric_mean.mean(1),
c="#003f5c",
lw=2,
label="Geometric mean",
zorder=3,
)
ax.fill_between(
X_bins + delta_X / 2,
*np.quantile(boot_geometric_mean, [0.16, 0.84], axis=1),
color="#003f5c",
lw=0,
zorder=3,
alpha=0.3,
)
if label == "fraction":
fracs = np.array([q_true.count(q.filter(df)) / q.count(df) for q in X_queries])
for x, l in zip(X_bins + delta_X / 2, fracs):
ax.text(x, 1.08, f"{l:.3f}", rotation=60, fontsize=8, color="k")
elif label == "count":
counts = np.array([q_true.count(q.filter(df)) for q in X_queries])
for x, l in zip(X_bins + delta_X / 2, counts):
ax.text(x, 1.08, l, rotation=60, fontsize=8, color="k")
else:
raise ValueError("Please specify a valid `label`.")
ax.set_xlabel(X_label, fontsize=12)
ax.set_ylim(-0.06, 1.06)
# ax.set_ylabel("Evaluation metric", fontsize=14)
ax.tick_params(which="both", labelsize=12)
ax.grid(alpha=0.15)
if legend:
ax.legend(
framealpha=1,
loc="lower left" if X not in ("sb_r", "DEC") else "lower right",
fontsize=12,
)
fig.tight_layout()
plot_comparison_by_X(
cv,
X="r_mag",
X_min=14,
X_max=21,
delta_X=0.5,
X_label=r"$r$ [mag]",
figname="magnitude-comparison.png",
)
plt.savefig(figures_dir / "magnitude-comparison.pdf")
plot_comparison_by_X(
cv,
X="sb_r",
X_min=20,
X_max=26,
delta_X=0.5,
legend=False,
X_label=r"$\mu_{r,\rm eff}$ [mag arcsec$^{-2}$]",
figname="surface_brightness-comparison.png",
)
plt.savefig(figures_dir / "surface_brightness-comparison.pdf")
plot_comparison_by_X(
cv,
X="gr",
X_min=-0.05,
X_max=0.9,
delta_X=0.1,
legend=False,
X_label=r"$g-r$ [mag]",
figname="gmr-comparison.png",
)
plt.savefig(figures_dir / "gmr-comparison.pdf")
plot_comparison_by_X(
cv,
X="M_r",
X_min=-21,
X_max=-12,
delta_X=0.5,
legend=False,
X_label=r"$M_{r}$ [mag]",
figname="M_r-comparison.png",
)
plt.savefig(figures_dir / "M_r-comparison.pdf")
from uncertainties import *
best_fit_params = fit_logistic_model(cv)
def plot_purity_completeness_radial_trends(
cv, saga_sats, saga_hosts, params, figname="purity_completeness_radial_trends"
):
"""Given the cross-validated data, SAGA satellite and host catalogs,
and best-fit model parameters from above, plots the radial and magnitude dependence
for candidates colored by the photometric model predictions for low-z rate,
purity, and completeness.
"""
cv_coords = SkyCoord(cv.RA, cv.DEC, unit="deg")
s2_coords = SkyCoord(saga_sats.RA, saga_sats.DEC, unit="deg")
idx, sep, _ = s2_coords.match_to_catalog_sky(cv_coords)
df = pd.concat(
[
saga_sats.reset_index(),
cv.iloc[idx][["Z", "low_z", "p_CNN", "r_mag", "sb_r", "gr"]].reset_index(),
],
axis=1,
)
df = df[sep < 10 * u.arcsec].copy()
predictions = dict()
for hostname, host in saga_hosts.set_index("INTERNAL_HOSTID", drop=True).iterrows():
host_coord = SkyCoord(host.RA, host.DEC, unit="deg")
sep = host_coord.separation(cv_coords)
max_angsep = ((0.3 / host.DIST) * u.rad).to(u.arcsec)
sub_cv = cv[sep < max_angsep].copy()
sub_cv["D_PROJ"] = sep[sep < max_angsep].to(u.rad).value * host.DIST * 1e3
predictions[hostname] = sub_cv
pred_df = pd.concat(predictions)
# include scaled photometric quantities and compute model predictions
pred_df["r_mag_scaled"] = (pred_df.r_mag - 19.30038351) / 1.63920353
pred_df["sb_r_scaled"] = (pred_df.sb_r - 22.23829513) / 1.60764197
pred_df["gr_scaled"] = (pred_df.gr - 0.97987122) / 1.16822956
pred_df["p_model_rate"] = lowz_rate_model(
params["rate"], pred_df.r_mag_scaled, pred_df.sb_r_scaled, pred_df.gr_scaled
)
pred_df["p_model_completeness"] = lowz_rate_model(
params["completeness"],
pred_df.r_mag_scaled,
pred_df.sb_r_scaled,
pred_df.gr_scaled,
)
pred_df["p_model_purity"] = lowz_rate_model(
params["purity"], pred_df.r_mag_scaled, pred_df.sb_r_scaled, pred_df.gr_scaled
)
# make plots
fig, axes = plt.subplots(
3, 1, sharex=True, constrained_layout=True, figsize=(10, 8), dpi=300
)
is_lowz = Query("low_z == True")
cnn_selected = Query("p_CNN > 0.5")
TP = (is_lowz & cnn_selected).filter(pred_df)
FP = (~is_lowz & cnn_selected).filter(pred_df)
FN = (is_lowz & ~cnn_selected).filter(pred_df)
for ax, metric, (df1, df2) in zip(
axes.flat, ["rate", "completeness", "purity"], [(TP, FP), (TP, FN), (TP, FP)]
):
ax.scatter(
df2.D_PROJ,
df2.r_mag,
c=df2[f"p_model_{metric}"],
marker="x",
edgecolor="none",
s=30,
vmin=0,
vmax=1,
cmap="viridis_r",
)
sc = ax.scatter(
df1.D_PROJ,
df1.r_mag,
c=df1[f"p_model_{metric}"],
vmin=0,
vmax=1,
s=30,
cmap="viridis_r",
edgecolor="k",
marker="o",
)
ax.set_xlim(15, 300)
ax.set_ylim(12, 22)
ax.set_ylabel("$r$ [mag]")
ax.grid(alpha=0.15)
ax.text(18, 20.7, f"Modeled {metric}", fontsize=16)
axes.flat[-1].set_xlabel("$r$ [projected kpc]")
fig.colorbar(sc, ax=axes.flat, aspect=50, pad=-0.01)
plot_purity_completeness_radial_trends(cv, saga2_sats, saga2_hosts, best_fit_params)
plt.savefig(figures_dir / "purity_completeness_radial_trends.pdf")
def plot_model_fit_statistic(cv, params, statistic, figname="model_fit_lowz-stat.png"):
"""Given data, fit parameters, and some desired statistic (`rate`, `purity`,
`completeness`, makes a plot)
"""
# scale photometric parameters and corresponding bins
X = np.array([cv.r_mag, cv.sb_r, cv.gr]).T
X_scaled = (X - X.mean(0)) / X.std(0)
y_true = cv.low_z
y_pred = cv.p_CNN > 0.5
bins_scaled = [
(r0_range - X.mean(0)[0]) / X.std(0)[0],
(sb_range - X.mean(0)[1]) / X.std(0)[1],
(gmr_range - X.mean(0)[2]) / X.std(0)[2],
]
# prepare plots
fig, axes = plt.subplots(1, 3, figsize=(12, 4), dpi=300, sharey=True)
if statistic == "rate":
X_ = X_scaled
y_ = y_true
color = "#58508d"
label = "SAGA"
elif statistic == "completeness":
X_ = X_scaled[y_true]
y_ = y_pred[y_true]
color = "#ff6361"
label = "CNN"
elif statistic == "purity":
X_ = X_scaled[y_pred]
y_ = y_true[y_pred]
color = "#ffa600"
label = "CNN"
else:
raise ValueError("Statistic must be one of 'purity', 'completeness', or 'rate'")
binned_statistic_result = binned_statistic_dd(X_, y_, bins=bins_scaled)
lowz_rate = binned_statistic_result.statistic
model_preds = lowz_rate_model(params, *X_.T)
binned_statistic_result = binned_statistic_dd(X_, model_preds, bins=bins_scaled)
model_rate = binned_statistic_result.statistic
xbins = [r0_bins, sb_bins, gmr_bins]
xlabels = [
r"$r$ [mag]",
r"$\mu_{r, \rm eff}$ [mag arcsec$^{-2}$]",
r"$g-r$ [mag]",
]
for i, (ax, xbins, xlabel) in enumerate(zip(axes.flat, xbins, xlabels)):
axis = tuple(j for j in [0, 1, 2] if j != i)
p_data = np.nanmean(lowz_rate, axis=axis)
p_model = np.nanmean(model_rate, axis=axis)
N = np.isfinite(model_rate).sum(axis=axis)
def _wald_interval(p, N):
return 2 * [np.sqrt(p * (1 - p) / N)]
yerr = _wald_interval(p_data, N)
width = 0.95 * (xbins[1] - xbins[0])
ax.bar(xbins, p_data, width=width, alpha=0.5, color=color, label=label)
ax.errorbar(xbins, p_data, yerr=yerr, ls="none", color=color)
ax.plot(xbins, p_model, c="k", label="Model")
ax.set_ylim(1e-2, 1)
ax.set_yscale("log")
ax.set_xlabel(xlabel)
ax.grid(color="white", lw=0.5, alpha=0.5)
axes.flat[0].set_ylabel(f"Low-$z$ {statistic}")
axes.flat[0].legend(fontsize=16, loc='lower left')
fig.tight_layout()
for statistic in ["rate", "completeness", "purity"]:
plot_model_fit_statistic(
cv,
best_fit_params[statistic],
statistic,
figname=f"model_fit_lowz-{statistic}.png",
)
plt.savefig(figures_dir / f"model_fit_lowz-{statistic}.pdf")
from scipy.stats import binned_statistic
pred_df["r_mag_scaled"] = (pred_df.r_mag - 19.30038351) / 1.63920353
pred_df["sb_r_scaled"] = (pred_df.sb_r - 22.23829513) / 1.60764197
pred_df["gr_scaled"] = (pred_df.gr - 0.97987122) / 1.16822956
pred_df['p_model_rate'] = lowz_rate_model(
best_fit_params['rate'],
pred_df.r_mag_scaled ,
pred_df.sb_r_scaled,
pred_df.gr_scaled
)
pred_df['p_model_completeness'] = lowz_rate_model(
best_fit_params['completeness'],
pred_df.r_mag_scaled ,
pred_df.sb_r_scaled,
pred_df.gr_scaled
)
pred_df['p_model_purity'] = lowz_rate_model(
best_fit_params['purity'],
pred_df.r_mag_scaled ,
pred_df.sb_r_scaled,
pred_df.gr_scaled
)
N_bins = 12
q = Query("p_CNN > 0.5", "D_PROJ > 36")
## compute corrections
correction_factor_with_radius = binned_statistic(
q.filter(pred_df).D_PROJ,
(q.filter(pred_df).p_model_purity / q.filter(pred_df).p_model_completeness),
bins=N_bins,
statistic='median'
).statistic
correction_factor_with_radius_q16 = binned_statistic(
q.filter(pred_df).D_PROJ,
(q.filter(pred_df).p_model_purity / q.filter(pred_df).p_model_completeness),
statistic=partial(np.quantile, q=0.16), bins=N_bins,
).statistic
correction_factor_with_radius_q84 = binned_statistic(
q.filter(pred_df).D_PROJ,
(q.filter(pred_df).p_model_purity / q.filter(pred_df).p_model_completeness),
statistic=partial(np.quantile, q=0.84), bins=N_bins,
).statistic
## PLOT
plt.figure(figsize=(4,4))
plt.plot(
np.linspace(36, 300, N_bins),
correction_factor_with_radius,
color='#003f5c',
)
plt.fill_between(
np.linspace(36, 300, N_bins),
correction_factor_with_radius_q16,
correction_factor_with_radius_q84,
color='#003f5c',
lw=0,
alpha=0.2
)
plt.grid(alpha=0.15)
plt.ylim(0.6, 2.);
plt.ylabel(r'$P_{\rm model}/C_{\rm model}$')
plt.xlabel('$r$ [projected kpc]');
plt.text(33, 1.85, 'SAGA cross-validation data')
plt.tight_layout()
plt.savefig(figures_dir / "model-purity-completeness_host-separation.pdf")
xsaga = pd.read_parquet(ROOT / 'results/xSAGA/sats-nsa_p0_5.parquet')
xsaga["r_mag_scaled"] = (xsaga.r0 - 19.30038351) / 1.63920353
xsaga["sb_r_scaled"] = (xsaga.mu_eff - 22.23829513) / 1.60764197
xsaga["gr_scaled"] = (xsaga.gmr - 0.97987122) / 1.16822956
xsaga['p_model_rate'] = lowz_rate_model(
best_fit_params['rate'],
xsaga.r_mag_scaled ,
xsaga.sb_r_scaled,
xsaga.gr_scaled
)
xsaga['p_model_completeness'] = lowz_rate_model(
best_fit_params['completeness'],
xsaga.r_mag_scaled ,
xsaga.sb_r_scaled,
xsaga.gr_scaled
)
xsaga['p_model_purity'] = lowz_rate_model(
best_fit_params['purity'],
xsaga.r_mag_scaled ,
xsaga.sb_r_scaled,
xsaga.gr_scaled
)
plt.figure(figsize=(5, 5))
mass_bins = np.arange(9.5, 11.5, 0.25)
for m1, m2 in zip(mass_bins, mass_bins+0.25):
q = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
plt.hist(
q.filter(xsaga).p_model_purity / q.filter(xsaga).p_model_completeness,
bins=24, range=[0.5, 3.5], histtype='step',
density=True,
color=mass2color((m1 + m2) / 2, mass_max=11.25),
lw=3 if m2 <= 11 else 2,
ls='-' if m2 <= 11 else '--',
label=f'{m1} - {m2}'
)
# print((q.filter(xsaga).p_model_purity / q.filter(xsaga).p_model_completeness).quantile([0.16, 0.5, 0.84]).values)
plt.grid(alpha=0.15)
plt.xlim(0.8, 3.0)
plt.ylim(0, 2.0)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], fontsize=13, title=r'$\log(M_★/M_\odot)$')
plt.xlabel(r'$P_{\rm model} / C_{\rm model}$')
plt.tight_layout()
plt.savefig(figures_dir / "model-purity-completeness_host-mass.pdf")
N_bins = np.arange(0, 6, 1)
plt.figure(figsize=(5, 5))
for n1, n2 in zip(N_bins, N_bins+1):
q = Query(f"SERSIC_N_NSA > {n1}", f"SERSIC_N_NSA < {n2}", "mass_GSE < 11")
plt.hist(
q.filter(xsaga).p_model_purity / q.filter(xsaga).p_model_completeness,
bins=24, range=[0.5, 3.5], histtype='step',
density=True,
color=mass2color((n1 + n2) / 2, cmap=cmr.amber, mass_min=0, mass_max=6.0), lw=3,
label=f"{n1} $-$ {n2}"
)
# print((q.filter(xsaga).p_model_purity / q.filter(xsaga).p_model_completeness).quantile([0.16, 0.5, 0.84]).values)
plt.grid(alpha=0.15)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], fontsize=13, title='Sersic $N$')
plt.xlabel(r'$ P_{\rm model} / C_{\rm model}$')
plt.xlim(0.8, 3.0)
plt.ylim(0, 2.0)
plt.tight_layout()
plt.savefig(figures_dir / "model-purity-completeness_host-morphology.pdf")
saga2_hosts = pd.read_csv(ROOT / "data/saga_stage2_hosts.csv")
saga2_sats = pd.read_csv(ROOT / "data/saga_stage2_sats.csv")
cv_coords = SkyCoord(cv.RA, cv.DEC, unit='deg')
s2_coords = SkyCoord(saga2_sats.RA, saga2_sats.DEC, unit='deg')
# joining CNN predictions with SAGA labels
predictions = dict()
for hostname, host in saga2_hosts.set_index("INTERNAL_HOSTID", drop=True).iterrows():
host_coord = SkyCoord(host.RA, host.DEC, unit='deg')
sep = host_coord.separation(cv_coords)
max_angsep = ((0.3 / host.DIST) * u.rad).to(u.arcsec)
sub_cv = cv[sep < max_angsep].copy()
sub_cv['D_PROJ'] = sep[sep < max_angsep].to(u.rad).value * host.DIST * 1e3 # in pkpc
predictions[hostname] = sub_cv
pred_df = pd.concat(predictions)
# confusion matrix elements
FP = (~pred_df.low_z & (pred_df.p_CNN > 0.5)).groupby(level=0).sum()
TP = (pred_df.low_z & (pred_df.p_CNN > 0.5)).groupby(level=0).sum()
TN = (~pred_df.low_z & (pred_df.p_CNN < 0.5)).groupby(level=0).sum()
FN = (pred_df.low_z & (pred_df.p_CNN < 0.5)).groupby(level=0).sum()
N_highz = FP + TN
FP.mean() / (np.pi * 0.5**2)
3.5721442782847617
plt.figure(figsize=(5, 3), dpi=300)
FPR_correction = bootstrap(
((TP+FP) - (FP/(TP+FP+TN+FN)).mean()*(TP+FP+TN+FN) ) / TP,
bootfunc=np.mean, bootnum=10000
)
plt.hist(
FPR_correction, bins=51, range=(0.5, 1.5), color='C1',alpha=0.5, density=True,
label=r"$N_{\rm pred} - f_{\rm FP} N_{\rm cand}$",
)
FP_correction = bootstrap(
((TP+FP) - FP.mean()) / TP,
bootfunc=np.mean, bootnum=10000
)
plt.hist(
FP_correction, bins=51, range=(0.5, 1.5), color='C0', alpha=0.5, density=True,
label=r"$N_{\rm pred} - \Sigma_{\rm FP} \Omega$",
)
purity_correction = bootstrap(
(TP+FP) * (TP/(TP+FP)).mean() / TP,
bootfunc=np.mean, bootnum=10000
)
plt.hist(
purity_correction, bins=51, range=(0.5, 1.5), color='C2', alpha=0.5, density=True,
label=r"$N_{\rm pred} \times \langle \mathcal{P} \rangle$",
);
plt.axvline(1, c='k')
plt.legend(fontsize=13)
plt.xlabel("(Corrected TP) / (Actual TP)")
# plt.ylabel("Number of SAGA halos", fontsize=14)
plt.grid(alpha=0.15)
plt.ylim(0,7)
plt.tight_layout()
plt.savefig(figures_dir / "TP-corrections.pdf")
# only SAGA II selection
FP = (~pred_df.low_z & (pred_df.p_CNN > 0.5) & (pred_df.saga_sel == 2)).groupby(level=0).sum()
TP = (pred_df.low_z & (pred_df.p_CNN > 0.5) & (pred_df.saga_sel == 2)).groupby(level=0).sum()
TN = (~pred_df.low_z & (pred_df.p_CNN < 0.5) & (pred_df.saga_sel == 2)).groupby(level=0).sum()
FN = (pred_df.low_z & (pred_df.p_CNN < 0.5) & (pred_df.saga_sel == 2)).groupby(level=0).sum()
np.mean(FP / (FP + TP + TN + FN))
0.0161348323666571
FP.mean() / (np.pi * 0.5**2)
3.0416278013117775
from radial_profiles import *
from satellites import *
def corrected_satellite_richness(sats, hosts, max_radius=300, min_radius=36, nonsatellite_volume_density=0.0142, FP_surface_density=3.04):
"""Returns the correct N_sat per host, such that the end result has the
same shape as `hosts`.
Corections:
- Completeness correction is now split into an extra (equal)
contribution per host: 1/0.600 - 1 = 2/3 the mean value.
Note: this is done in order to make the completeness correction
"commute" with taking the mean or std.
- Interlopers appear at a rate of 2.34 per sq deg
- Non-satellite low-z galaxies appear at a volume density
of 0.0142 per Mpc^-3.
"""
# get data ready
sats = Query(f"sep < {max_radius}").filter(sats)
sats = sats[sats.NSAID.isin(hosts.index)]
# count number of satellites
N_sats = (
sats.value_counts("NSAID")
.append(
pd.Series(
{nsaid: 0 for nsaid in hosts.index[~hosts.index.isin(sats.NSAID)]},
dtype=np.float64
)
)
)
# add contribution for completeness
N_incomplete = np.full(len(hosts), N_sats.mean() * 2/3)
# count number of interlopers
N_interlopers = compute_interloper_numbers(hosts.z_NSA, max_radius=max_radius, FP_surface_density=FP_surface_density)
# count number of non-satellites
N_unrelated_lowz = compute_nonsatellite_numbers(hosts.z_NSA, max_radius=max_radius, volume_density=nonsatellite_volume_density)
return (N_sats + N_incomplete - N_interlopers - N_unrelated_lowz).values
def f(s, h, q, N_boot=100, r=300):
"""Helper function for bootstrapping satellite number counts"""
return bootstrap(
corrected_satellite_richness(q.filter(s), q.filter(h), max_radius=r),
bootnum=N_boot,
bootfunc=np.mean
)
def compute_nonsatellite_numbers(redshifts, delta_z=0.005, volume_density=0.0142, max_radius=300, min_radius=36):
"""Compute the number of low-z galaxies that *aren't* satellites, for hosts
at given redshifts. `volume_density` should be in Mpc^-3.
"""
z_upper = redshifts + delta_z
z_lower = redshifts - delta_z
z_upper = np.where(z_upper > 0.03, 0.03, z_upper)
z_lower = np.where(z_lower < 0, 0, z_lower)
total_volumes = (
cosmo.comoving_volume(0.03) - cosmo.comoving_volume(z_upper)
) / 1.03 ** 3 + (cosmo.comoving_volume(z_lower)) / (1 + z_lower) ** 3
volumes = (
(((max_radius * u.kpc)**2 - (min_radius * u.kpc)**2) / (cosmo.kpc_proper_per_arcmin(redshifts)**2))
.to(u.steradian)
.value
/ (4 * np.pi)
* total_volumes
)
return volume_density * volumes.value
def compute_interloper_numbers(redshifts, max_radius=300, min_radius=36, FP_surface_density=3.04):
"""Compute the number of interlopers that the CNN gets wrong, on average,
per square degree.
"""
sq_deg_per_sq_kpc = (
cosmo.arcsec_per_kpc_proper(redshifts).to(u.deg / u.kpc).value
) ** 2
surface_area = np.pi * (max_radius ** 2 - min_radius ** 2) * sq_deg_per_sq_kpc
return surface_area * FP_surface_density
def plot_radial_profile_by_host_mass(
hosts,
sats,
corrected=True,
include_saga=True,
radial_bins=np.arange(36, 300, 1),
cumulative=True,
normalize=False,
areal_density=False,
N_boot=None,
sigma_smooth=None,
mass_min=9.5,
mass_max=11.0,
dmass=0.5,
fname="radial_profile-by-host_mass",
):
"""Creates a radial profile plot for satellites colored by host mass.
Parameters
hosts : pd.DataFrame
DataFrame of hosts, which is assumed to be pre-filtered using
cuts at top of this script.
sats : pd.DataFrame
DataFrame of satellites, also assumed to be filtered.
corrected : bool
If true, correct by dividing by the completeness (0.600) and subtracting
the number of interlopers based on a constant false positive surface
density (3.04 per deg^2).
radial_bins : 1d array-like
A list or array of (maximum) projected radii.
cumulative : bool
Toggles whether to plot the satellite number within a given projected
radius versus at a given radius.
normalize : bool
Toggles whether to divide the curves by the total number of satellites
areal_density : bool
Toggles whether to divide by the annular area at each radius.
N_boot : int or None
The number of bootstrap resamples (for estimating uncertainties).
Can also be `None` if boostrapping is not wanted.
sigma_smooth : float or None
The sigma for 1d Gaussian convolution. Can be None.
include_saga : bool
If true, then include the 16-84th percentile range for SAGA mean profile.
mass_min : float
The minimum host (log) mass.
mass_max : float
The maximum host (log) mass.
dmass : float
The interval per mass bin.
fname : str
The name of the output figure (JPG format), to be saved in the directory
`./results/xSAGA/plots/profiles/`.
"""
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=300)
mass_bins = np.arange(mass_min, mass_max, dmass)
dr = (radial_bins.max() - radial_bins.min()) / len(radial_bins)
# plot saga curve
if include_saga and cumulative and (not areal_density):
boot_saga_profile = bootstrap_saga_profile(radial_bins=radial_bins, N_boot=100)
if normalize:
boot_saga_profile /= boot_saga_profile.max(1, keepdims=True)
ax.fill_between(
radial_bins,
*np.quantile(boot_saga_profile, [0.16, 0.84], axis=0),
color="0.5",
lw=0,
alpha=0.5,
label="SAGA II",
)
for m1, m2 in zip(mass_bins, mass_bins + dmass):
q = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
satellite_separations = q.filter(sats).sep.values
try:
# compute sat cumulative profile or bootstrapped profile
if N_boot is None:
profile = compute_radial_profile(satellite_separations, radial_bins)
profile = profile / q.count(hosts)
if sigma_smooth is not None:
profile = smooth(profile, sigma_smooth)
# correct using completeness and interlopers
if corrected:
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz, radial_bins
)
profile = (
profile / 0.600 - interloper_profile - nonsatellite_profile
)
if areal_density:
# kpc^-2 --> Mpc^-2
surface_area = (
np.pi
* (
(radial_bins + dr / 2) ** 2
- (radial_bins.min() - dr / 2) ** 2
)
/ 1e6
)
profile = profile / surface_area
if not cumulative:
profile = np.gradient(profile, radial_bins, edge_order=2)
if normalize:
profile /= profile.max()
ax.plot(
radial_bins,
profile.mean(0),
c=mass2color((m1 + m2) / 2),
label=f"${m1}-{m2}$",
lw=3,
)
else:
assert isinstance(N_boot, int), "Please enter an integer `N_boot`."
profile_bootstrapped = bootstrap(
satellite_separations,
bootfunc=partial(compute_radial_profile, radial_bins=radial_bins),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts)
if sigma_smooth is not None:
profile_bootstrapped = smooth(profile_bootstrapped, sigma_smooth)
if corrected:
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
)
interloper_profile_bootstrapped = rng.choice(
interloper_profile, size=N_boot, replace=True
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz, radial_bins
)
nonsatellite_profile_bootstrapped = rng.choice(
nonsatellite_profile, size=N_boot, replace=True
)
profile_bootstrapped = (
profile_bootstrapped / 0.600
- interloper_profile_bootstrapped
- nonsatellite_profile_bootstrapped
)
if not cumulative:
profile_bootstrapped = np.gradient(
profile_bootstrapped, radial_bins, axis=1, edge_order=2
)
if areal_density:
surface_area = (
np.pi
* (
(radial_bins + dr / 2) ** 2
- (radial_bins.min() - dr / 2) ** 2
)
/ 1e6
)
profile_bootstrapped = profile_bootstrapped / surface_area
if normalize:
profile_bootstrapped /= profile_bootstrapped.max(1, keepdims=True)
ax.fill_between(
radial_bins,
*np.quantile(profile_bootstrapped, [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
label=f"${m1}-{m2}$",
lw=0,
alpha=0.7,
)
except ValueError as e:
raise e
xlabel = "$r$ [projected kpc]"
ylabel = (
("Normalized " if normalize else "")
+ (r"$\Sigma_{\rm sat}$" if areal_density else r"$N_{\rm sat}$")
+ ("(r)" if not cumulative else "(<r)")
+ (" [Mpc$^{-2}$]" if areal_density else "")
)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.grid(alpha=0.15)
legend_location = (
"upper right" if (areal_density and (not cumulative)) else "upper left"
)
handles, labels = ax.get_legend_handles_labels()
ax.legend(
handles[::-1], labels[::-1], loc=legend_location, fontsize=12, title=r"log($M_★/M_\odot$)", title_fontsize=14
)
fig.tight_layout()
N_boot = 100
hosts, sats = load_hosts_and_sats()
print("No cuts")
N_hosts = len(hosts)
N_sats_CNN = len(sats)
print(f" Number of hosts: {N_hosts}")
print(f" Number of CNN-detected satellites: {N_sats_CNN}")
No cuts Number of hosts: 12769 Number of CNN-detected satellites: 21492
print("After mass, redshift, and completeness cuts")
# impose cuts
hosts = HOST_QUERY.filter(hosts)
sats = (HOST_QUERY & SAT_QUERY).filter(sats)
N_hosts = len(hosts)
N_sats_CNN = len(sats)
print(f" Number of hosts: {N_hosts}")
print(f" Number of CNN-detected satellites: {N_sats_CNN}")
After mass, redshift, and completeness cuts Number of hosts: 12053 Number of CNN-detected satellites: 14661
print("After mass/redshift/completeness + isolation cuts")
# isolate hosts and sats
hosts = isolate_hosts(hosts, delta_mass=0.0, delta_z=0.005, delta_d=1.0)
sats = sats[sats.NSAID.isin(hosts.index) & (sats.M_r < M_r_lim)].copy()
N_hosts = len(hosts)
N_sats_CNN = len(sats)
print(f" Number of hosts: {N_hosts}")
print(f" Number of CNN-detected satellites: {N_sats_CNN}")
After mass/redshift/completeness + isolation cuts Number of hosts: 7542 Number of CNN-detected satellites: 11449
Without bootstrap resampling
corrected_satellite_richness(sats, hosts).sum()
16241.42335083025
corrected_satellite_richness(sats, hosts).mean()
2.1534637166309003
With bootstrap resampling
boot_N_sat = bootstrap(
corrected_satellite_richness(sats, hosts),
bootnum=100,
bootfunc=np.mean
)
print(f'{boot_N_sat.mean():.3f} +/- {boot_N_sat.std():.3f}')
2.153 +/- 0.021
q = Query("mass_GSE > 10", "mass_GSE < 11")
corrected_satellite_richness(q.filter(sats), q.filter(hosts)).mean()
2.5415410801876317
np.percentile(
corrected_satellite_richness(q.filter(sats), q.filter(hosts)),
[16, 50, 84]
)
array([0.83529598, 1.89545913, 3.94710504])
Host-to-host scatter ranges from 0.84 to 3.95!
q = Query("mass_GSE > 10", "mass_GSE < 11")
corrected_satellite_richness(q.filter(sats), q.filter(hosts), max_radius=150).mean()
0.8818137862704115
q = Query("mass_GSE > 10", "mass_GSE < 11", "SERSIC_N_NSA < 2.5")
corrected_satellite_richness(q.filter(sats), q.filter(hosts)).mean()
2.144722733260225
np.percentile(
corrected_satellite_richness(q.filter(sats), q.filter(hosts)),
[16, 50, 84]
)
array([0.66932708, 1.70810792, 3.76386005])
mass_bins = np.arange(9.5, 11, 0.25)
for m1, m2 in zip(mass_bins, mass_bins + 0.25):
print(Query(f'mass_GSE > {m1}', f'mass_GSE < {m2}').count(hosts), Query(f'mass_GSE > {m1}', f'mass_GSE < {m2}').count(sats))
970 898 1238 1238 1447 1860 1590 2358 1388 2774 909 2321
with rng_context:
plot_radial_profile_by_host_mass(
hosts,
sats,
corrected=True,
include_saga=False,
dmass=0.25,
N_boot=N_boot,
fname="radial_profile-by-host_mass",
)
plt.savefig(figures_dir / "radial_profile-by-host_mass.pdf")
with rng_context:
plot_radial_profile_by_host_mass(
hosts,
sats,
corrected=True,
include_saga=False,
normalize=True,
dmass=0.25,
N_boot=N_boot,
fname="normalized-radial_profile-by-host_mass",
)
plt.gca().get_legend().remove()
plt.savefig(figures_dir / "normalized-radial_profile-by-host_mass.pdf")
def plot_xsaga_saga_comparison():
m1 = 10
m2 = 11
r_min = 36
r_max = 300
dr = 1
N_boot = 1000
fig, [ax1, ax2] = plt.subplots(1, 2, figsize=(10, 5))
### Satellite profiles
### ==================
q = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
satellite_separations = q.filter(sats).sep.values
radial_bins = np.arange(r_min, r_max, dr)
# xSAGA profile
profile_bootstrapped = bootstrap(
satellite_separations,
bootfunc=partial(compute_radial_profile, radial_bins=radial_bins),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts)
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
interloper_surface_density=3.04,
)
interloper_profile_bootstrapped = rng.choice(
interloper_profile, size=N_boot, replace=True
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz, radial_bins
)
nonsatellite_profile_bootstrapped = rng.choice(
nonsatellite_profile, size=N_boot, replace=True
)
profile_bootstrapped = (
profile_bootstrapped / 0.600
- interloper_profile_bootstrapped
- nonsatellite_profile_bootstrapped
)
x1, = ax1.plot(
radial_bins,
profile_bootstrapped.mean(0),
c=mass2color((m1 + m2) / 2),
lw=1,
zorder=3,
)
x2 = ax1.fill_between(
radial_bins,
*np.quantile(profile_bootstrapped, [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
lw=0,
alpha=0.5,
zorder=3,
)
# saga profile
boot_saga_profile = bootstrap_saga_profile(radial_bins, N_boot=N_boot)
s1, = ax1.plot(
radial_bins,
boot_saga_profile.mean(0),
c='C0',
lw=1,
ls='--',
zorder=3,
)
s2 = ax1.fill_between(
radial_bins,
*np.quantile(boot_saga_profile, [0.16, 0.84], axis=0),
color='C0',
alpha=0.5,
lw=0,
)
### Normalized distributions
### ------------------------
ax2.plot(
radial_bins,
(profile_bootstrapped / profile_bootstrapped.max(1, keepdims=True)).mean(0),
c=mass2color((m1 + m2) / 2),
lw=1,
zorder=3,
)
ax2.fill_between(
radial_bins,
*np.quantile((profile_bootstrapped / profile_bootstrapped.max(1, keepdims=True)), [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
lw=0,
alpha=0.5,
zorder=3,
)
ax2.plot(
radial_bins,
(boot_saga_profile / boot_saga_profile.max(1, keepdims=True)).mean(0),
c='C0',
lw=1,
ls='--',
zorder=3,
)
ax2.fill_between(
radial_bins,
*np.quantile((boot_saga_profile / boot_saga_profile.max(1, keepdims=True)), [0.16, 0.84], axis=0),
color='C0',
alpha=0.5,
lw=0,
)
N_sat_saga = boot_saga_profile.max(1).mean()
N_sat_saga_err = boot_saga_profile.max(1).std()
N_sat = profile_bootstrapped.max(1).mean()
N_sat_err = profile_bootstrapped.max(1).std()
ax1.grid(alpha=0.15)
ax1.set_xlabel("$r$ [projected kpc]")
ax1.set_ylabel(r"$N_{\rm sat}(<r)$")
ax1.text(0.04, 0.8, r'$N_{\rm sat}$' + f' = {N_sat_saga:.2f} $\pm$ {N_sat_saga_err:.2f}', color='C0', transform=ax1.transAxes, fontsize=16)
ax1.text(0.04, 0.9, r'$N_{\rm sat}$' + f' = {N_sat:.2f} $\pm$ {N_sat_err:.2f}', color=mass2color(10.5), transform=ax1.transAxes, fontsize=16)
ax2.legend(
[(x1, x2), (s1, s2)],
[f'xSAGA (This work)\n$N_{{\\rm host}}={q.count(hosts)}$', 'SAGA (Mao et al. 2021)\n$N_{\\rm host}=36$'],
loc='upper left', fontsize=14, framealpha=0
)
ax2.grid(alpha=0.15)
ax2.set_xlabel("$r$ [projected kpc]")
ax2.set_ylabel(r"Normalized $N_{\rm sat}(<r)$");
fig.tight_layout()
plot_xsaga_saga_comparison()
plt.savefig(figures_dir / 'xSAGA-SAGA_comparison.pdf')
def plot_radial_profile_by_host_morphology_alt(
hosts,
sats,
radial_bins=np.arange(36, 300, 1),
corrected=True,
cumulative=True,
normalize=False,
areal_density=False,
N_boot=None,
sigma_smooth=None,
sersic_n_low=(0, 2.5),
sersic_n_high=(3, 6),
mass_min=9.5,
mass_max=11.0,
dmass=0.5,
fname="radial_profile-by-host_morphology-alt",
):
"""Creates two satellite radial profile plots based on a split in host morphology.
Parameters
hosts : pd.DataFrame
DataFrame of hosts, which is assumed to be pre-filtered using
cuts at top of this script.
sats : pd.DataFrame
DataFrame of satellites, also assumed to be filtered.
radial_bins : 1d array-like
A list or array of (maximum) projected radii.
corrected : bool
If true, correct by dividing by the completeness (0.600) and subtracting
the number of interlopers based on a constant false positive surface
density (3.04 per deg^2).
cumulative : bool
Toggles whether to plot the satellite number within a given projected
radius versus at a given radius.
normalize : bool
Toggles whether to divide the curves by the total number of satellites
areal_density : bool
Toggles whether to divide by the annular surface area at each radius.
N_boot : int
The number of bootstrap resamples (for estimating uncertainties).
Can also be `None` if boostrapping is not wanted.
sersic_n_low : tuple(float, float)
A pair of min and max Sersic indices to define the disk morphology.
sersic_n_high : tuple(float, float)
A pair of min and max Sersic indices to define the elliptical morphology.
mass_min : float
The minimum host (log) mass.
mass_max : float
The maximum host (log) mass.
dmass : float
The interval per mass bin.
fname : str
The name of the output figure (JPG format), to be saved in the directory
`./results/xSAGA/plots/profiles/`.
"""
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=300, sharey=True)
mass_bins = np.arange(mass_min, mass_max, dmass)
for sersic_n_range, label in zip([sersic_n_low, sersic_n_high], ['disk', 'elliptical']):
n1, n2 = sersic_n_range
q_sersic_n = Query(f"SERSIC_N_NSA > {n1}", f"SERSIC_N_NSA < {n2}")
for m1, m2 in zip(mass_bins, mass_bins + dmass):
q_mass = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
# combine morphology and mass queries
q = q_sersic_n & q_mass
satellite_separations = q.filter(sats).sep.values
try:
# compute sat cumulative profile or bootstrapped profile
if N_boot is None:
profile = compute_radial_profile(satellite_separations, radial_bins)
profile = profile / q.count(hosts)
if sigma_smooth is not None:
profile = smooth(profile, sigma_smooth)
# correct using completeness and interlopers
if corrected:
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
interloper_surface_density=3.04,
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz,
radial_bins,
)
profile = (
profile / 0.600 - interloper_profile - nonsatellite_profile
)
if areal_density:
surface_area = (
np.pi * np.gradient(radial_bins ** 2, edge_order=2) * 1e-6
)
profile = profile / surface_area
if not cumulative:
profile = np.gradient(profile, radial_bins)
if normalize:
profile /= profile.max()
ax.plot(
radial_bins,
profile,
c=mass2color((m1 + m2) / 2),
label=f"${m1}-{m2}$",
lw=3,
)
else:
assert isinstance(N_boot, int), "Please enter an integer `N_boot`."
profile_bootstrapped = bootstrap(
satellite_separations,
bootfunc=partial(
compute_radial_profile, radial_bins=radial_bins
),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts)
if sigma_smooth is not None:
profile_bootstrapped = smooth(
profile_bootstrapped, sigma_smooth
)
if corrected:
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
)
interloper_profile_bootstrapped = rng.choice(
interloper_profile, size=N_boot, replace=True
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz,
radial_bins,
)
nonsatellite_profile_bootstrapped = rng.choice(
nonsatellite_profile, size=N_boot, replace=True
)
profile_bootstrapped = (
profile_bootstrapped / 0.600
- interloper_profile_bootstrapped
- nonsatellite_profile_bootstrapped
)
if areal_density:
surface_area = (
np.pi * np.gradient(radial_bins ** 2, edge_order=2) * 1e-6
)
profile_bootstrapped = profile_bootstrapped / surface_area
if not cumulative:
profile_bootstrapped = np.gradient(
profile_bootstrapped, radial_bins, axis=1
)
if normalize:
profile_bootstrapped /= profile_bootstrapped.max(1)[
:, np.newaxis
]
ax.fill_between(
radial_bins,
*np.quantile(profile_bootstrapped, [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
label=f"{label}, ${m1}-{m2}$",
lw=0.3,
hatch=None if label == "disk" else '//////',
facecolor=mass2color((m1 + m2) / 2) if label == "disk" else 'white',
alpha=0.7,
)
except ValueError:
continue
ax.grid(alpha=0.15)
xlabel = "$r$ [projected kpc]"
ylabel = (
("Normalized " if normalize else "")
+ (r"$\Sigma_{\rm sat}$" if areal_density else r"$N_{\rm sat}$")
+ ("(r)" if not cumulative else "(<r)")
+ (" [Mpc$^{-2}$]" if areal_density else "")
)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc="upper left", fontsize=12)
fig.tight_layout()
fig.savefig(results_dir / f"plots/profiles/{fname}.png")
with rng_context:
plot_radial_profile_by_host_morphology_alt(
hosts,
sats,
radial_bins=np.arange(36, 300, 1),
cumulative=True,
N_boot=N_boot,
fname="radial_profile-by-host_morphology-alt",
)
plt.savefig(figures_dir / "radial_profile-by-host_morphology-alt.pdf")
fig, axes = plt.subplots(1, 3, figsize=(12, 4.5), dpi=300, sharey=True)
N_boot = 100
radial_bins = np.arange(36, 300, 1)
mass_bins = np.arange(9.5, 11, 0.5)
for i, (ax, m1, m2) in enumerate(zip(axes.flat, mass_bins, mass_bins + 0.5)):
q_mass = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
for n1, n2, label in zip([0, 3], [2.5, 6], ['disk', 'elliptical']):
q = Query(f"SERSIC_N_NSA > {n1}", f"SERSIC_N_NSA < {n2}") & q_mass
satellite_separations = q.filter(sats).sep.values
profile_bootstrapped = bootstrap(
satellite_separations,
bootfunc=partial(compute_radial_profile, radial_bins=radial_bins),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts)
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
)
interloper_profile_bootstrapped = rng.choice(
interloper_profile, size=N_boot, replace=True
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz, radial_bins
)
nonsatellite_profile_bootstrapped = rng.choice(
nonsatellite_profile, size=N_boot, replace=True
)
profile_bootstrapped = (
profile_bootstrapped / 0.600
- interloper_profile_bootstrapped
- nonsatellite_profile_bootstrapped
)
# normalize
profile_bootstrapped /= profile_bootstrapped.max(1, keepdims=True)
ax.fill_between(
radial_bins,
*np.quantile(profile_bootstrapped, [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
label=f"{label}, ${m1}-{m2}$",
lw=0.3,
hatch=None if label == "disk" else '//////',
facecolor=mass2color((m1 + m2) / 2) if label == "disk" else 'white',
alpha=0.7,
)
ax.grid(alpha=0.15)
ax.set_xlabel("$r$ [projected kpc]")
ax.text(
0.5,
0.92,
r"${0:g} < \log(M_★/M_\odot) < {1:g}$".format(m1, m2),
transform=ax.transAxes,
ha="center",
fontsize=16,
)
axes.flat[0].set_ylabel(r"Normalized $N_{\rm sat}(<r)$")
# fig.legend(
# *axes.flat[-1].get_legend_handles_labels(),
# loc="upper left",
# fontsize=14,
# title="$\Delta m_{r,*}$",
# title_fontsize=16,
# bbox_to_anchor=(0.075, 0.885)
# )
fig.tight_layout()
plt.savefig(figures_dir / "normalized-radial_profile-by-morphology.pdf")
def plot_areal_density_by_morphology():
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=300, sharey=False)
N_boot = 100
dmass = 0.5
mass_bins = np.arange(9.5, 11, dmass)
COMPLETENESS = 0.600
dr = 25
radial_bins = np.arange(36, 300, dr)
surface_area = np.pi * ((radial_bins+dr/2)**2 - (radial_bins.min() - dr/2)**2) / 1e6
morphology_queries = [Query('SERSIC_N_NSA < 2.5'), Query('SERSIC_N_NSA > 3')]
for q_morph, label in zip(morphology_queries, ['disk', 'elliptical']):
for i, [m1, m2] in enumerate(zip(mass_bins, mass_bins + dmass)):
q_mass = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
q = q_morph & q_mass
satellite_separations = q.filter(sats).sep.values
profile_bootstrapped = bootstrap(
satellite_separations,
bootfunc=partial(compute_radial_profile, radial_bins=radial_bins),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts)
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
)
interloper_profile_bootstrapped = rng.choice(
interloper_profile, size=N_boot, replace=True
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz, radial_bins
)
nonsatellite_profile_bootstrapped = rng.choice(
nonsatellite_profile, size=N_boot, replace=True
)
profile_bootstrapped = (
profile_bootstrapped / COMPLETENESS
- interloper_profile_bootstrapped
- nonsatellite_profile_bootstrapped
)
pdf = np.gradient(profile_bootstrapped, radial_bins, axis=1, edge_order=2)
areal_pdf = pdf / surface_area * dr
ax.fill_between(
radial_bins,
*np.quantile(areal_pdf, [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
label=f"{label}, ${m1}-{m2}$",
lw=0.3,
hatch=None if label == "disk" else '//////',
facecolor=mass2color((m1 + m2) / 2) if label == "disk" else 'white',
alpha=0.7,
zorder=3,
)
# ax.plot(radial_bins, profile_bootstrapped.mean(0), color=mass2color((m1+m2)/2), lw=2, ls='--')
ax.grid(alpha=0.2)
ax.set_xlabel('$r$ [projected kpc]', fontsize=14)
ax.set_xlim(20, 305)
# ax.set_ylim(-1, 41)
# ax.text(0.5, 0.9, label, ha='center', fontsize=16, transform=ax.transAxes)
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc="upper right", fontsize=12)
ax.set_ylabel(r'$\Sigma_{\rm sat}(r)$ [Mpc$^{-2}$]', fontsize=14)
# ax.set_yscale('log')
# ax.set_xscale('log')
fig.tight_layout()
with rng_context:
plot_areal_density_by_morphology()
plt.savefig(figures_dir / "areal_density_pdf-by-host_morphology.pdf")
hosts, sats = load_hosts_and_sats()
# impose cuts
hosts = HOST_QUERY.filter(hosts)
sats = (HOST_QUERY & SAT_QUERY).filter(sats)
# isolate hosts and sats
hosts = isolate_hosts(hosts, delta_mass=0.0, delta_z=0.005, delta_d=1.0)
sats = sats[sats.NSAID.isin(hosts.index)].copy()
N_boot = 100
sats["r_abs"] = sats.r0 - cosmo.distmod(sats.z_NSA)
sats_subset = sats.join(
sats.groupby("NSAID").M_r.min().rename("M_r_sat"),
on="NSAID",
)
hosts_subset = hosts.join(
sats_subset.set_index("NSAID", drop=True).M_r_sat, on="NSAID", how="left"
).drop_duplicates()
hosts_subset['magnitude_gap'] = hosts_subset.M_r_sat - hosts_subset.M_r_NSA
sats_subset['magnitude_gap'] = sats_subset.M_r_sat - sats_subset.M_r_NSA
plt.figure(figsize=(5, 5), dpi=300)
dmass = 0.25
mass_bins = np.arange(9.5, 11, dmass)
for i, [m1, m2] in enumerate(zip(mass_bins, mass_bins + dmass)):
q_mass = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
boot_mag_gap = []
magnitude_gap_bins = np.arange(0, 6, 1)
for j, [mag1, mag2] in enumerate(zip(magnitude_gap_bins, magnitude_gap_bins + 1)):
q_mag = Query(f"magnitude_gap > {mag1}", f"magnitude_gap < {mag2}")
number_of_hosts = (q_mag & q_mass).count(hosts_subset)
if mag2 > (i + 2):
boot_mag_gap.append(np.full((N_boot,), np.nan))
else:
boot_mag_gap.append(
f(sats_subset, q_mag.filter(hosts_subset), q_mass)
)
boot_mag_gap = np.array(boot_mag_gap)
plt.errorbar(
magnitude_gap_bins[:len(boot_mag_gap)] + 0.5,
boot_mag_gap.mean(1),
boot_mag_gap.std(1),
c=mass2color((m1+m2)/2),
markersize=5,
ls='-',
lw=2,
fmt='o',
label=f'${m1} - {m2}$'
)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], fontsize=12, framealpha=1, title=r'$\log(M_★/M_\odot)$', title_fontsize=14,)
plt.grid(alpha=0.15)
plt.xlim(-0.05, 6.05)
# plt.ylim(0.9, 8.1)
plt.xlabel(r'Magnitude gap $\equiv \Delta m_{r,*}$')
plt.ylabel(r'$N_{\rm sat}$')
plt.tight_layout()
plt.savefig(figures_dir / "satellite_richness-by-magnitude_gap.pdf")
fig, axes = plt.subplots(1, 3, figsize=(12, 4.5), dpi=300, sharey=True)
N_boot = 100
radial_bins = np.arange(36, 300, 1)
mass_bins = np.arange(9.5, 11, 0.5)
magnitude_gap_bins = np.arange(0, 6, 1)
for i, (ax, m1, m2) in enumerate(zip(axes.flat, mass_bins, mass_bins + 0.5)):
q_mass = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
for mag1, mag2 in zip(magnitude_gap_bins, magnitude_gap_bins + 1):
q = Query(f"magnitude_gap > {mag1}", f"magnitude_gap < {mag2}") & q_mass
satellite_separations = q.filter(sats_subset).sep.values
if (len(satellite_separations) < 50) or (mag2 > 2 * (i + 1)):
continue
profile_bootstrapped = bootstrap(
satellite_separations,
bootfunc=partial(compute_radial_profile, radial_bins=radial_bins),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts_subset)
interloper_profile = compute_interloper_profile(
q.filter(sats_subset).z_NSA,
radial_bins,
)
interloper_profile_bootstrapped = rng.choice(
interloper_profile, size=N_boot, replace=True
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts_subset.loc[q.filter(sats_subset).NSAID].N_unrelated_lowz, radial_bins
)
nonsatellite_profile_bootstrapped = rng.choice(
nonsatellite_profile, size=N_boot, replace=True
)
profile_bootstrapped = (
profile_bootstrapped / 0.600
- interloper_profile_bootstrapped
- nonsatellite_profile_bootstrapped
)
# xSAGA
ax.fill_between(
radial_bins,
*np.quantile(profile_bootstrapped / profile_bootstrapped.max(1, keepdims=True), [0.16, 0.84], axis=0),
color=gap2color((mag1 + mag2) / 2),
label=f"${mag1}-{mag2}$",
lw=0,
alpha=0.7,
zorder=10-mag1,
)
ax.grid(alpha=0.15)
ax.set_xlabel("$r$ [projected kpc]")
ax.text(
0.5,
0.92,
r"${0:g} < \log(M_★/M_\odot) < {1:g}$".format(m1, m2),
transform=ax.transAxes,
ha="center",
fontsize=16,
)
axes.flat[0].set_ylabel(r"Normalized $N_{\rm sat}(<r)$")
fig.legend(
*axes.flat[-1].get_legend_handles_labels(),
loc="upper left",
fontsize=14,
title="$\Delta m_{r,*}$",
title_fontsize=16,
bbox_to_anchor=(0.075, 0.885)
)
fig.tight_layout()
plt.savefig(figures_dir / "normalized-radial_profile-by-magnitude_gap.pdf")
# fit only to central galaxies (see second row in Table J1 of Behroozi+19)
UniverseMachine_params = {
"alpha": 1.957,
"beta": 0.474,
"gamma": -1.065,
"delta": 0.386,
"epsilon": -1.435,
"M0": 12.081,
}
def SHMR(M_halo, params):
"""Stellar Mass Halo Mass Relation from Behroozi+19. See Appendix J."""
x = M_halo - params["M0"]
return (
params["M0"] + params["epsilon"]
- np.log10(10**(-params["alpha"]*x) + 10**(-params["beta"]*x))
+ 10**params["gamma"]*np.exp(-0.5 * (x / params["delta"])**2)
)
# tabulate
class StellarMassHaloMassRelation():
def __init__(self):
self.halo_masses = np.arange(11, 15, 0.001)
self.stellar_masses = SHMR(self.halo_masses, UniverseMachine_params)
def stellar_mass(self, M_halo):
return self.stellar_masses[np.argmin((M_halo - self.halo_masses)**2)]
def halo_mass(self, M_star):
return self.halo_masses[np.argmin((M_star - self.stellar_masses)**2)]
def R_vir(self, M_star=None, M_halo=None):
if M_halo is None:
if M_star is None:
raise ValueError("Please enter M_star or M_halo")
else:
M_halo = self.halo_mass(M_star)
return self.R_vir(M_halo=M_halo)
else:
return mass_so.M_to_R(10**(M_halo), z=0, mdef='vir')
B19 = StellarMassHaloMassRelation()
print(B19.R_vir(M_star=9.5))
print(B19.R_vir(M_star=11))
print(B19.R_vir(M_halo=12.))
140.13671215656834 396.78196511752026 204.11985790378375
lowz_file = results_dir / "lowz-p0_5.parquet"
lowz = pd.read_parquet(lowz_file)
hosts_file = results_dir / "hosts-nsa.parquet"
nsa = pd.read_parquet(hosts_file)
hosts = Query("mass_GSE > 9.5").filter(nsa)
lowz, hosts_x_lowz = remove_hosts_from_lowz(hosts, lowz, savefig=False)
hosts = Query("z_NSA <= 0.03").filter(hosts)
hosts = isolate_hosts(hosts)
hosts = Query("mass_GSE < 11").filter(hosts)
sats = assign_satellites_to_hosts(
hosts, lowz, rank_by="mass_GSE", z_min=0.005, sep_max=400., savefig=False
)
sats['M_halo'] = [B19.halo_mass(M_star) for M_star in sats.mass_GSE]
sats['R_vir'] = [B19.R_vir(M_halo=M_halo) for M_halo in sats.M_halo]
hosts['M_halo'] = [B19.halo_mass(M_star) for M_star in hosts.mass_GSE]
hosts['R_vir'] = [B19.R_vir(M_halo=M_halo) for M_halo in hosts.M_halo]
sats = sats[sats.sep < sats.R_vir].copy()
fig, ax = plt.subplots(1, 1, figsize=(5, 5), dpi=300)
dmass = 0.25
mass_bins = np.arange(9.5, 11, dmass)
for i, [m1, m2] in enumerate(zip(mass_bins, mass_bins + dmass)):
q = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
satellite_separations = q.filter(sats).sep.values
normalized_separations = (q.filter(sats).sep / q.filter(sats).R_vir).values
radial_bins = np.arange(0.26, 1, 0.01)
profile_bootstrapped = bootstrap(
normalized_separations,
bootfunc=partial(compute_radial_profile, radial_bins=radial_bins),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts)
ax.fill_between(
radial_bins,
*np.quantile((profile_bootstrapped - profile_bootstrapped.min(1, keepdims=True)) / (profile_bootstrapped.max(1, keepdims=True) - profile_bootstrapped.min(1, keepdims=True)), [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
label=f"${m1:g} - {m2:g}$",
lw=0,
alpha=0.7,
zorder=3,
)
plt.xlabel('$r/R_{\\rm vir}$')
plt.ylabel('Normalized $N_{\\rm sat}$')
plt.legend(title=r'$\log(M_★/M_\odot)$', title_fontsize=16, loc='upper left')
plt.grid(alpha=0.15)
sats_subset.shape, hosts_subset.shape
((11449, 28), (7542, 24))
sats["M_r"] = sats.r0 - cosmo.distmod(sats.z_NSA).value
sats_subset = sats.join(
sats.groupby("NSAID").M_r.min().rename("M_r_sat"),
on="NSAID",
)
hosts_subset = hosts.join(
sats_subset.set_index("NSAID", drop=True).M_r_sat, on="NSAID", how="left"
).drop_duplicates()
hosts_subset['magnitude_gap'] = hosts_subset.M_r_sat - hosts_subset.M_r_NSA
plt.figure(figsize=(5, 5), dpi=300)
dmass = 0.25
mass_bins = np.arange(9.5, 11, dmass)
for i, [m1, m2] in enumerate(zip(mass_bins, mass_bins + dmass)):
q_mass = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
boot_mag_gap = []
magnitude_gap_bins = np.arange(0, 6, 1)
for j, [mag1, mag2] in enumerate(zip(magnitude_gap_bins, magnitude_gap_bins + 1)):
q_mag = Query(f"magnitude_gap > {mag1}", f"magnitude_gap < {mag2}")
number_of_hosts = (q_mag & q_mass).count(hosts_subset)
if mag2 > (i + 2):
boot_mag_gap.append(np.full((N_boot,), np.nan))
else:
boot_mag_gap.append(
f(sats_subset, q_mag.filter(hosts_subset), q_mass)
)
boot_mag_gap = np.array(boot_mag_gap)
plt.errorbar(
magnitude_gap_bins[:len(boot_mag_gap)] + 0.5,# + 0.05*(i-2.5),
boot_mag_gap.mean(1),
boot_mag_gap.std(1),
c=mass2color((m1+m2)/2),
markersize=5,
ls='-',
lw=2,
fmt='o',
label=f'${m1} - {m2}$'
)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], fontsize=13, framealpha=1, title='log($M_★/M_\odot$)', title_fontsize=14,
)
plt.grid(alpha=0.15)
plt.xlim(-0.05, 6.05)
# plt.ylim(1.5, 13.5)
plt.xlabel(r'Magnitude gap $\equiv \Delta m_{r,*}$')
plt.ylabel(r'$N_{\rm sat}(<R_{\rm vir})$')
plt.tight_layout()
plt.savefig(figures_dir / "satellite_richness-by-magnitude_gap-stellar_mass.pdf")
plt.figure(figsize=(5, 5), dpi=300)
dmass = 0.25
mass_bins = np.arange(11.5, 12.75, dmass)
for i, [m1, m2] in enumerate(zip(mass_bins, mass_bins + dmass)):
q_mass = Query(f"M_halo > {m1}", f"M_halo < {m2}")
boot_mag_gap = []
magnitude_gap_bins = np.arange(0, 6, 1)
for j, [mag1, mag2] in enumerate(zip(magnitude_gap_bins, magnitude_gap_bins + 1)):
q_mag = Query(f"magnitude_gap > {mag1}", f"magnitude_gap < {mag2}")
number_of_hosts = (q_mag & q_mass).count(hosts_subset)
if mag2 > (i + 2):
boot_mag_gap.append(np.full((N_boot,), np.nan))
else:
boot_mag_gap.append(
f(sats_subset, q_mag.filter(hosts_subset), q_mass)
)
boot_mag_gap = np.array(boot_mag_gap)
plt.errorbar(
magnitude_gap_bins[:len(boot_mag_gap)] + 0.5,
boot_mag_gap.mean(1),
boot_mag_gap.std(1),
c=mass2color(B19.stellar_mass((m1+m2)/2)),
markersize=5,
ls='-',
lw=2,
fmt='o',
label=f'${m1} - {m2}$'
)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], fontsize=13, framealpha=1, title=r'log($M_{\rm halo}/M_\odot$)', title_fontsize=14,
)
plt.grid(alpha=0.15)
plt.xlim(-0.05, 6.05)
# plt.ylim(1.5, 13.5)
plt.xlabel(r'Magnitude gap $\equiv \Delta m_{r,*}$')
plt.ylabel(r'$N_{\rm sat}(<R_{\rm vir})$')
plt.tight_layout()
plt.savefig(figures_dir / "satellite_richness-by-magnitude_gap-halo_mass.pdf")
plt.figure(figsize=(5, 5), dpi=300)
dmass = 0.25
mass_bins = np.arange(9.5, 11, dmass)
for i, [m1, m2] in enumerate(zip(mass_bins, mass_bins + dmass)):
q_mass = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
boot_mag_gap_original = []
boot_mag_gap_limited = []
magnitude_gap_bins = np.arange(0, 6, 1)
for j, [mag1, mag2] in enumerate(zip(magnitude_gap_bins, magnitude_gap_bins + 1)):
q_mag = Query(f"magnitude_gap > {mag1}", f"magnitude_gap < {mag2}")
number_of_hosts = (q_mag & q_mass).count(hosts_subset)
if mag2 > (i + 2):
boot_mag_gap_original.append(np.full((N_boot,), np.nan))
boot_mag_gap_limited.append(np.full((N_boot,), np.nan))
else:
q_sep = Query(f"sep > 0.3*R_vir")
boot_mag_gap_original.append(
f(sats_subset, q_mag.filter(hosts_subset), q_mass)
)
boot_mag_gap_limited.append(
f(q_sep.filter(sats_subset), q_mag.filter(hosts_subset), q_mass)
)
boot_mag_gap_original = np.array(boot_mag_gap_original)
boot_mag_gap_limited = np.array(boot_mag_gap_limited)
plt.errorbar(
magnitude_gap_bins[:len(boot_mag_gap)] + 0.5,# + 0.05*(i-2.5),
boot_mag_gap_original.mean(1),
boot_mag_gap_original.std(1),
c=mass2color((m1+m2)/2),
markersize=5,
ls='-',
lw=2,
fmt='o',
# label=f'${m1} - {m2}$'
)
plt.errorbar(
magnitude_gap_bins[:len(boot_mag_gap)] + 0.5,# + 0.05*(i-2.5),
boot_mag_gap_limited.mean(1),
boot_mag_gap_limited.std(1),
c=mass2color((m1+m2)/2),
markersize=5,
ls='--',
lw=2,
fmt='o',
label=f'${m1} - {m2}$'
)
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], fontsize=13, framealpha=1, title='log($M_★/M_\odot$)', title_fontsize=14,
)
plt.grid(alpha=0.15)
plt.xlim(-0.05, 6.05)
# plt.ylim(1.5, 13.5)
plt.xlabel(r'Magnitude gap $\equiv \Delta m_{r,*}$')
plt.ylabel(r'$N_{\rm sat}(<R_{\rm vir})$')
plt.tight_layout()
hosts, sats = load_hosts_and_sats()
# impose cuts
hosts = HOST_QUERY.filter(hosts)
sats = (HOST_QUERY & SAT_QUERY).filter(sats)
# isolate hosts and sats
hosts = isolate_hosts(hosts, delta_mass=0.0, delta_z=0.005, delta_d=1.0)
sats = sats[sats.NSAID.isin(hosts.index)].copy()
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
N_boot = 30
radial_bins = np.arange(36, 300, 1)
mass_bins = np.arange(10., 11, 1)
# compute profile and correct for each mass bin
for m1, m2 in zip(mass_bins, mass_bins+1):
q = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
satellite_separations = q.filter(sats).sep.values
profile_bootstrapped = bootstrap(
satellite_separations,
bootfunc=partial(compute_radial_profile, radial_bins=radial_bins),
bootnum=N_boot,
)
profile_bootstrapped = profile_bootstrapped / q.count(hosts)
# corrections
interloper_profile = compute_interloper_profile(
q.filter(sats).z_NSA,
radial_bins,
)
interloper_profile_bootstrapped = rng.choice(
interloper_profile, size=N_boot, replace=True
)
nonsatellite_profile = compute_nonsatellite_profile(
hosts.loc[q.filter(sats).NSAID].N_unrelated_lowz, radial_bins
)
nonsatellite_profile_bootstrapped = rng.choice(
nonsatellite_profile, size=N_boot, replace=True
)
profile_bootstrapped = (
profile_bootstrapped / 0.600
- interloper_profile_bootstrapped
- nonsatellite_profile_bootstrapped
)
ax.fill_between(
radial_bins,
*np.quantile(profile_bootstrapped, [0.16, 0.84], axis=0),
color=mass2color((m1 + m2) / 2),
label=f"xSAGA",
lw=0,
alpha=0.7,
zorder=3,
)
print(f'xSAGA {m1}-{m2}: {profile_bootstrapped.mean(0).max() - profile_bootstrapped.mean(0).min():2f} +/- {profile_bootstrapped.std(0)[-1]:2f}')
# Romulus 25
boot_Rom2_profile = np.load(data_dir / 'Romulus/Romulus-spherical.npy')
ax.fill_between(
radial_bins,
*np.quantile(boot_Rom2_profile, [0.16, 0.84], axis=0) - boot_Rom2_profile.mean(0).min(),
lw=0,
facecolor=cmr.savanna(0.4),
edgecolor='white',
alpha=0.6,
hatch='\\\\\\',
label="Romulus (sphere)",
zorder=4,
)
# FIRE-2
boot_fire_profile_sphere = np.load(data_dir / 'FIRE/FIRE-isolated_bootstrapped_projected-profile_sphere.npy')
boot_fire_profile_sphere = boot_fire_profile_sphere - boot_fire_profile_sphere.min(1, keepdims=True)
ax.fill_between(
radial_bins,
*np.quantile(boot_fire_profile_sphere, [0.16, 0.84], axis=0),
lw=0,
facecolor=cmr.lilac(0.2),
edgecolor='white',
alpha=0.6,
hatch='\\\\',
label="FIRE (sphere)",
zorder=4,
)
boot_fire_profile_cylinder = np.load(data_dir / 'FIRE/FIRE-isolated_bootstrapped_projected-profile_cylinder1000kpc.npy')
boot_fire_profile_cylinder = boot_fire_profile_cylinder - boot_fire_profile_cylinder.min(1, keepdims=True)
ax.fill_between(
radial_bins,
*np.quantile(boot_fire_profile_cylinder, [0.16, 0.84], axis=0),
lw=0,
facecolor=cmr.lilac(0.6),
edgecolor='white',
alpha=0.6,
hatch='\\\\',
label="FIRE (cylinder, $\\pm$1 Mpc)",
zorder=4,
)
ax.set_xlabel("$r$ [projected kpc]")
ax.set_ylabel(r"$N_{\rm sat}(<r)$")
print(f'FIRE sphere: {boot_fire_profile_sphere.max(1).mean():2f} +/- {boot_fire_profile_sphere.max(1).std():2f}')
print(f'FIRE cylinder: {boot_fire_profile_cylinder.max(1).mean():2f} +/- {boot_fire_profile_cylinder.max(1).std():2f}')
print(f'Romulus sphere: {boot_Rom2_profile.max(1).mean():2f} +/- {boot_Rom2_profile.max(1).std():2f}')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1], loc='upper left', fontsize=13)
ax.grid(alpha=0.15)
ax.set_xlim(25, 305)
ax.set_ylim(-0.05, 3.05)
plt.savefig(figures_dir / "compare-sims.pdf")
xSAGA 10.0-11.0: 2.446808 +/- 0.206950 FIRE sphere: 2.497500 +/- 0.066620 FIRE cylinder: 2.690417 +/- 0.236823 Romulus sphere: 2.053551 +/- 0.029326
from easyquery import QueryMaker
from radial_profiles import *
hosts, sats = load_hosts_and_sats()
# impose cuts
hosts = HOST_QUERY.filter(hosts)
sats = (HOST_QUERY & SAT_QUERY).filter(sats)
# isolate hosts and sats
hosts = isolate_hosts(hosts, delta_mass=0.0, delta_z=0.005, delta_d=1.0)
sats = sats[sats.NSAID.isin(hosts.index)].copy()
fig, axes = plt.subplots(1, 3, figsize=(12, 4.5), dpi=150, sharey=True)
radial_bins = np.arange(36, 300, 1)
m1, m2 = 10.0, 11
# compute profile and correct for each mass bin
q = Query(f"mass_GSE > {m1}", f"mass_GSE < {m2}")
satellite_separations = q.filter(sats).sep.values
for a, (ax, N_iterations, N_sample) in enumerate(zip(axes.flat, [1000, 300, 100], [100, 300, 1000])):
boot_profile = np.empty((N_iterations, len(radial_bins)))
for i in range(N_iterations):
hosts_subset = hosts.sample(N_sample, replace=True)
sats_subset = QueryMaker.isin('NSAID', hosts_subset.index).filter(sats)
profile = compute_radial_profile(sats_subset.sep.values, radial_bins)
profile = profile / N_sample
# corrections
interloper_profile = compute_interloper_profile(
sats_subset.z_NSA,
radial_bins,
).mean(0)
nonsatellite_profile = compute_nonsatellite_profile(
hosts_subset.loc[sats_subset.NSAID].N_unrelated_lowz, radial_bins
).mean(0)
profile = (
profile / 0.600
- interloper_profile
- nonsatellite_profile
)
boot_profile[i] = profile
ax.plot(
radial_bins,
profile,
color='C0',
lw=1,
alpha=0.1*(a + 1),
)
ax.plot(radial_bins, boot_profile.mean(0), color='white', lw=3, alpha=1)
ax.plot(radial_bins, boot_profile.mean(0), color='k', lw=2, alpha=1)
ax.grid(alpha=0.15)
ax.set_ylim(-0.2, 3.2)
ax.text(0.03, 0.92, f'{N_sample} hosts, {N_iterations} bootstraps', transform=ax.transAxes);
ax.set_xlabel("$r$ [projected kpc]")
axes.flat[0].set_ylabel(r"$N_{\rm sat}(<r)$")
fig.tight_layout()
plt.savefig(figures_dir / "bootstrap-scatter.pdf")