fig, ax = plt.subplots(1, 1, figsize=(12, 9))
# plot data
ax.scatter(
np.arange(len(samples)) / len(samples), np.sort(samples), marker="+", color="gray"
)
# # plot true quantiless
# ref_qs = np.arange(0, 1.01, 0.01)
# ref_q_vals = np.quantile(samples, ref_qs)
# ax.scatter(ref_qs, ref_q_vals, label="true quantiles", color="green", edgecolor="none", marker="o", s=100)
# ax.set(xlabel="quantile", ylabel="quantile vals")
# plot uniform quantiles
uniform_q_bins = np.linspace(0, 1, 50)
uniform_qs = (uniform_q_bins[:-1] + uniform_q_bins[1:]) / 2
uniform_bin2q_map = dict(zip(np.arange(len(uniform_q_bins)), uniform_qs))
uniform_q_val_bins = np.quantile(samples, uniform_q_bins)
adf = (
pd.DataFrame(
list(zip(np.digitize(samples, uniform_q_val_bins[1:-1]), samples)),
columns=["bin", "val"],
)
.groupby("bin")
.mean()
.sort_values("bin")
.reset_index()
.assign(quantile=lambda df: df["bin"].replace(uniform_bin2q_map))
)
adf.plot.scatter(
x="quantile",
y="val",
marker="o",
color="blue",
edgecolor="none",
label="uniform_bin",
# s=60,
ax=ax,
)
adf_with_head = pd.concat([pd.DataFrame([[0, np.min(samples)]], columns=['quantile', 'val']), adf[["quantile", "val"]]])
for (x1, y1), (x2, y2) in zip(
adf_with_head.values[1:], adf_with_head.values[:-1]
):
ax.plot([x1, x2], [y1, y2], color="blue")
# plot t-digest quantiles
delta = 100
ks = np.arange(-delta / 4, delta / 4 + 1, 1)
t_q_bins = k2q(ks)
t_qs = (t_q_bins[:-1] + t_q_bins[1:]) / 2
t_bin2q_map = dict(zip(np.arange(len(t_qs)), t_qs))
t_q_val_bins = np.quantile(samples, t_q_bins)
# t_q_val_bins[1:-1]: to make sure the returned indices are between 0-49
bdf = (
pd.DataFrame(
list(zip(np.digitize(samples, t_q_val_bins[1:-1]), samples)),
columns=["bin", "val"],
)
.groupby("bin")
.mean()
.sort_values("bin")
.reset_index()
.assign(quantile=lambda df: df["bin"].replace(t_bin2q_map))
)
bdf.plot.scatter(
x="quantile",
y="val",
marker="o",
color="red",
edgecolor="none",
label="t_digest_bins",
# s=30,
ax=ax,
)
bdf_with_head = pd.concat([pd.DataFrame([[0, np.min(samples)]], columns=['quantile', 'val']), bdf[["quantile", "val"]]])
for (x1, y1), (x2, y2) in zip(
bdf_with_head.values[1:], bdf_with_head.values[:-1]
):
ax.plot([x1, x2], [y1, y2], color="red")
ax.set_xlim(-0.001, 0.02)
ax.grid()
ax.legend(loc="upper left", bbox_to_anchor=(1, 1))