VirES - magnetic model benchmark - results viewer¶

Assess perfomance of the VirES magnetic model evaluation and other parameters wrt. the plain data rerival. Get per sample times.

See also VirES Python Client

In [1]:

import pandas as pd
import json

LABELS = [
    "plain request",
    "plain request (cached)",
    "filter: Flags_B != 255",
    "aux.var.: MLT,  QDLat",
    "aux.var.: MLT, QDLat",
    "model: CHAOS",
    "model: CHAOS80",
    "model: CHAOS-Static",
    "model: CHAOS-Static80",
    "model: CHAOS-Core",
    "model: CHAOS-MMA",
    "model: CHAOS-MMA-Primary",
    "model: CHAOS-MMA-Secondary",
    "model: MIO_SHA_2C",
    "model: MIO_SHA_2C-Primary",
    "model: MIO_SHA_2C-Secondary",
]

LABEL_ORDER = {label: index for index, label in enumerate(LABELS, 1)}


def load_data_form_json_log(filename):
    records = []
    with open(filename, encoding="utf8") as input_file:
        for line in input_file:
            record = json.loads(line)
            record["_labelSortingIndex"] = LABEL_ORDER.get(record["description"], 0)
            records.append(record)
    df = pd.DataFrame(records)
    df = df.sort_values('_labelSortingIndex')
    return df

In [2]:

from numpy import isnan
from matplotlib.pyplot import figure, subplot, colorbar
from matplotlib.colors import LogNorm

def plot_data(ax, df, label):
    y = df['description'].values
    x = 1e3 * df['requestDuration'].values / df['numberOfSamples'].values
    c = df['numberOfSamples'].values
    m = df['numberOfSamples'].values > 1e3
    ax.set_axisbelow(True)
    ax.xaxis.grid(color='gray', linestyle='dashed')
    ax.yaxis.grid(color='whitesmoke', linestyle='dashed')
    h = ax.scatter(x[m], y[m], s=3, c=c[m], norm=LogNorm(1e3, 1e6), cmap='viridis_r')
    ax.set_xlabel("request duration per 1k samples / s")
    ax.set_title(label)
    ax.semilogx()
    cax = colorbar(h)
    ax.set_xlim([1e-3, 1e1])

In [4]:

# initialial optimization results

df_production = load_data_form_json_log("results/2022-11-14_production_benchmark.log")
df_production_new = load_data_form_json_log("results/2022-12-19_production_benchmark.log")
df_failover = load_data_form_json_log("results/2022-11-14_failover_benchmark.log")
df_failover_new = load_data_form_json_log("results/2022-12-19_failover_benchmark.log")
df_testing = load_data_form_json_log("results/2022-11-17_testing_benchmark.log")
df_testing_new = load_data_form_json_log("results/2022-12-08_testing_benchmark.log")
df_staging = load_data_form_json_log("results/2022-11-14_staging_benchmark.log")
df_disc = load_data_form_json_log("results/2022-11-14_disc_benchmark.log")
df_disc_new = load_data_form_json_log("results/2022-11-21_disc_benchmark.log")


fig = figure(figsize=(18, 24), dpi=150)

ax = subplot(8, 1, 1)
plot_data(ax, df_production, "https://vires.services (0.10.0)")

ax = subplot(8, 1, 2)
plot_data(ax, df_production_new, "https://vires.services (0.12.0)")

ax = subplot(8, 1, 3)
plot_data(ax, df_disc, "https://staging.viresdisc.vires.services (0.10.0)")

ax = subplot(8, 1, 4)
plot_data(ax, df_disc_new, "https://staging.viresdisc.vires.services (0.11.0)")

ax = subplot(8, 1, 5)
plot_data(ax, df_failover, "https://swarm-serv1a.eox.at (0.10.0)")

ax = subplot(8, 1, 6)
plot_data(ax, df_failover_new, "https://swarm-serv1a.eox.at (0.12.0)")

ax = subplot(8, 1, 7)
plot_data(ax, df_testing, "https://testing.vires.services (0.11.0)")

ax = subplot(8, 1, 8)
plot_data(ax, df_testing_new, "https://testing.vires.services (0.12.0)")

fig.tight_layout()

/tmp/ipykernel_468/2593348926.py:7: RuntimeWarning: divide by zero encountered in true_divide
  x = 1e3 * df['requestDuration'].values / df['numberOfSamples'].values
/tmp/ipykernel_468/2593348926.py:7: RuntimeWarning: divide by zero encountered in true_divide
  x = 1e3 * df['requestDuration'].values / df['numberOfSamples'].values

In [6]:

from numpy import isnan, asarray, log, exp
from matplotlib.pyplot import figure, subplot, colorbar
from matplotlib.colors import LogNorm


def average(df):
    df = df.groupby(['description'])[['description', 'requestDuration']].mean()
    df["_labelSortingIndex"] = list(map(lambda label: LABEL_ORDER.get(label, 0), df.index))
    df = df.sort_values('_labelSortingIndex')
    return df


def median(df):
    df = df.groupby(['description'])[['description', 'requestDuration']].median()
    df["_labelSortingIndex"] = list(map(lambda label: LABEL_ORDER.get(label, 0), df.index))
    df = df.sort_values('_labelSortingIndex')
    return df


def plot_data_1d(ax, df, label, color):
    df = df.copy()
    df['_labelSortingIndex'] = -df['_labelSortingIndex']
    df = df.sort_values('_labelSortingIndex')
    
    y = df['description'].values
    # = 1e3 * df['requestDuration'].values / df['numberOfSamples'].values
    x = df['requestDuration'].values
    c = df['numberOfSamples'].values
    m = (
        df['numberOfSamples'].values > 23 * 60 * 60
    ) & (
        asarray(["model" in value for value in df['description'].values])
        #df['description'].values != "plain request"
    )
    ax.set_axisbelow(True)
    ax.xaxis.grid(color='gray', linestyle='dashed')
    ax.yaxis.grid(color='whitesmoke', linestyle='dashed')
    
    #df_mean = average(df[m])
    df_mean = median(df[m])
    df_mean['_labelSortingIndex'] = -df_mean['_labelSortingIndex']
    df_mean = df_mean.sort_values('_labelSortingIndex')

    x_mean = df_mean.requestDuration.values
    y_mean = df_mean.index.values

    h_mean = ax.scatter(x_mean, y_mean, s=100, c=color, marker='|')
    h = ax.scatter(x[m], y[m], s=3, c=color)
    ax.set_xlabel("request duration / s")
    ax.set_title(label)
    ax.semilogx()
    #cax = colorbar(h)
    ax.set_xlim([1e-1, 1e3])
    
    return h, h_mean, df_mean

Performance improvement on testing/staging instances¶

In [5]:

df_new = load_data_form_json_log("results/2022-12-08_testing_benchmark.log")
df_old = load_data_form_json_log("results/2022-11-14_staging_benchmark.log")

fig = figure(figsize=(12, 6), dpi=150)

ax = subplot(1, 1, 1)
h0, h0_mean, df0_mean = plot_data_1d(ax, df_old, "https://staging.vires.services", color="tab:red")
h1, h1_mean, df1_mean = plot_data_1d(ax, df_new, "https://testing.vires.services", color="tab:blue")

ax.set_title("VirES for Swarm - magnetic model performance (SW_OPER_MAGx_LR_1B, 1 day @ 1Hz)")
ax.legend(
    [h0, h0_mean, h1, h1_mean],
    ["v0.10.0", "v0.10.0 (median)", "v0.12.0", "v0.12.0 (median)" ]

)

df_comp = df0_mean[[]].copy()

df_comp['requestDurationBefore'] = df0_mean['requestDuration']
df_comp['requestDurationAfter'] = df1_mean['requestDuration']
df_comp['speedUp'] = df0_mean['requestDuration'] / df1_mean['requestDuration']
df_comp['_labelSortingIndex'] = df1_mean['_labelSortingIndex']

y_label = df_comp.index
x_label = exp(0.5*log(df_comp['requestDurationBefore'].values) + 0.5*log(df_comp['requestDurationAfter'].values))
v_label = df_comp['speedUp'].values

for i, (x, y, v) in enumerate(zip(x_label, y_label, v_label)):
    if v > 10: v = round(v)
    else: v = round(v, 1)
    ax.text(x, i, f"↓{v:g}x", va="center", ha="center", fontsize=15, color='gray')

df_comp

Out[5]:

	requestDurationBefore	requestDurationAfter	speedUp	_labelSortingIndex
description
model: MIO_SHA_2C-Secondary	17.242855	3.700635	4.659431	-16
model: MIO_SHA_2C-Primary	17.305554	3.637398	4.757674	-15
model: MIO_SHA_2C	33.283798	6.410382	5.192171	-14
model: CHAOS-MMA-Secondary	38.103107	0.876424	43.475649	-13
model: CHAOS-MMA-Primary	38.305080	0.781332	49.025345	-12
model: CHAOS-MMA	75.687931	0.818239	92.500960	-11
model: CHAOS-Core	4.568278	0.927593	4.924874	-10
model: CHAOS-Static80	19.668374	1.848874	10.638029	-9
model: CHAOS-Static	28.859214	5.784846	4.988761	-8
model: CHAOS80	98.388890	1.877605	52.401278	-7
model: CHAOS	106.933664	6.055329	17.659430	-6

In [6]:

df_new = load_data_form_json_log("results/2022-12-08_testing_benchmark.log")
df_old = load_data_form_json_log("results/2022-11-17_testing_benchmark.log")

fig = figure(figsize=(12, 6), dpi=150)

ax = subplot(1, 1, 1)
h0, h0_mean, df0_mean = plot_data_1d(ax, df_old, "https://testing.vires.services", color="tab:red")
h1, h1_mean, df1_mean = plot_data_1d(ax, df_new, "https://testing.vires.services", color="tab:blue")

ax.set_title("VirES for Swarm - magnetic model performance (SW_OPER_MAGx_LR_1B, 1 day @ 1Hz)")
ax.legend(
    [h0, h0_mean, h1, h1_mean],
    ["v0.11.0", "v0.11.0 (median)", "v0.12.0", "v0.12.0 (median)" ]

)

df_comp = df0_mean[[]].copy()

df_comp['requestDurationBefore'] = df0_mean['requestDuration']
df_comp['requestDurationAfter'] = df1_mean['requestDuration']
df_comp['speedUp'] = df0_mean['requestDuration'] / df1_mean['requestDuration']
df_comp['_labelSortingIndex'] = df1_mean['_labelSortingIndex']

y_label = df_comp.index
x_label = exp(0.5*log(df_comp['requestDurationBefore'].values) + 0.5*log(df_comp['requestDurationAfter'].values))
v_label = df_comp['speedUp'].values

for i, (x, y, v) in enumerate(zip(x_label, y_label, v_label)):
    if v > 10: v = round(v)
    else: v = round(v, 1)
    ax.text(x, i, f"↓{v:g}x", va="center", ha="center", fontsize=15, color='gray')

df_comp

Out[6]:

	requestDurationBefore	requestDurationAfter	speedUp	_labelSortingIndex
description
model: MIO_SHA_2C-Secondary	4.533863	3.700635	1.225158	-16
model: MIO_SHA_2C-Primary	4.492581	3.637398	1.235108	-15
model: MIO_SHA_2C	8.421961	6.410382	1.313800	-14
model: CHAOS-MMA-Secondary	0.678560	0.876424	0.774237	-13
model: CHAOS-MMA-Primary	0.720623	0.781332	0.922301	-12
model: CHAOS-MMA	0.726563	0.818239	0.887959	-11
model: CHAOS-Core	0.810234	0.927593	0.873480	-10
model: CHAOS-Static80	3.180733	1.848874	1.720363	-9
model: CHAOS-Static	14.453981	5.784846	2.498594	-8
model: CHAOS80	3.449179	1.877605	1.837010	-7
model: CHAOS	14.954468	6.055329	2.469637	-6

Performance improvement on DISC instance¶

In [8]:

df_old = load_data_form_json_log("results/2022-11-14_disc_benchmark.log")
df_new = load_data_form_json_log("results/2022-11-21_disc_benchmark.log")

fig = figure(figsize=(12, 6), dpi=150)

ax = subplot(1, 1, 1)
h0, h0_mean, df0_mean = plot_data_1d(ax, df_old, "https://staging.viresdisc.vires.services", color="tab:red")
h1, h1_mean, df1_mean = plot_data_1d(ax, df_new, "https://staging.viresdisc.vires.services (optimized)", color="tab:blue")

ax.set_title("VirES for Swarm - magnetic model performance (SW_OPER_MAGx_LR_1B, 1 day @ 1Hz)")
ax.legend(
    [h0, h0_mean, h1, h1_mean],
    ["v0.10.0", "v0.10.0 (median)", "v0.11.0", "v0.11.0 (median)" ]

)

df_comp = df0_mean[[]].copy()

df_comp['requestDurationBefore'] = df0_mean['requestDuration']
df_comp['requestDurationAfter'] = df1_mean['requestDuration']
df_comp['speedUp'] = df0_mean['requestDuration'] / df1_mean['requestDuration']
df_comp['_labelSortingIndex'] = df1_mean['_labelSortingIndex']

y_label = df_comp.index
x_label = exp(0.5*log(df_comp['requestDurationBefore'].values) + 0.5*log(df_comp['requestDurationAfter'].values))
v_label = df_comp['speedUp'].values

for i, (x, y, v) in enumerate(zip(x_label, y_label, v_label)):
    if v > 10: v = round(v)
    else: v = round(v, 1)
    ax.text(x, i, f"↓{v:g}x", va="center", ha="center", fontsize=15, color='gray')

df_comp

Out[8]:

	requestDurationBefore	requestDurationAfter	speedUp	_labelSortingIndex
description
model: MIO_SHA_2C-Secondary	18.762365	4.501881	4.167672	-16
model: MIO_SHA_2C-Primary	18.730200	4.536310	4.128950	-15
model: MIO_SHA_2C	36.922526	8.649441	4.268776	-14
model: CHAOS-MMA-Secondary	45.673081	0.437264	104.451998	-13
model: CHAOS-MMA-Primary	45.669017	0.472442	96.665856	-12
model: CHAOS-MMA	90.678459	0.516874	175.436365	-11
model: CHAOS-Core	4.438892	0.585531	7.580969	-10
model: CHAOS-Static80	22.011524	3.275513	6.720023	-9
model: CHAOS-Static	32.450592	16.339735	1.985993	-8
model: CHAOS80	116.354815	3.685063	31.574713	-7
model: CHAOS	126.782002	16.768842	7.560570	-6

In [7]:

df_old = load_data_form_json_log("results/2022-11-14_disc_benchmark.log")
df_new = load_data_form_json_log("results/2022-12-19_disc_benchmark.log")

fig = figure(figsize=(12, 6), dpi=150)

ax = subplot(1, 1, 1)
h0, h0_mean, df0_mean = plot_data_1d(ax, df_old, "https://staging.viresdisc.vires.services", color="tab:red")
h1, h1_mean, df1_mean = plot_data_1d(ax, df_new, "https://staging.viresdisc.vires.services (optimized)", color="tab:blue")

ax.set_title("VirES for Swarm - magnetic model performance (SW_OPER_MAGx_LR_1B, 1 day @ 1Hz)")
ax.legend(
    [h0, h0_mean, h1, h1_mean],
    ["v0.10.0", "v0.10.0 (median)", "v0.12.0", "v0.12.0 (median)" ]

)

df_comp = df0_mean[[]].copy()

df_comp['requestDurationBefore'] = df0_mean['requestDuration']
df_comp['requestDurationAfter'] = df1_mean['requestDuration']
df_comp['speedUp'] = df0_mean['requestDuration'] / df1_mean['requestDuration']
df_comp['_labelSortingIndex'] = df1_mean['_labelSortingIndex']

y_label = df_comp.index
x_label = exp(0.5*log(df_comp['requestDurationBefore'].values) + 0.5*log(df_comp['requestDurationAfter'].values))
v_label = df_comp['speedUp'].values

for i, (x, y, v) in enumerate(zip(x_label, y_label, v_label)):
    if v > 10: v = round(v)
    else: v = round(v, 1)
    ax.text(x, i, f"↓{v:g}x", va="center", ha="center", fontsize=15, color='gray')

df_comp

Out[7]:

	requestDurationBefore	requestDurationAfter	speedUp	_labelSortingIndex
description
model: MIO_SHA_2C-Secondary	18.762365	3.676505	5.103315	-16
model: MIO_SHA_2C-Primary	18.730200	3.676798	5.094161	-15
model: MIO_SHA_2C	36.922526	6.904632	5.347501	-14
model: CHAOS-MMA-Secondary	45.673081	0.486079	93.962237	-13
model: CHAOS-MMA-Primary	45.669017	0.503774	90.653804	-12
model: CHAOS-MMA	90.678459	0.554604	163.501343	-11
model: CHAOS-Core	4.438892	0.459761	9.654774	-10
model: CHAOS-Static80	22.011524	1.453500	15.143809	-9
model: CHAOS-Static	32.450592	6.187202	5.244793	-8
model: CHAOS80	116.354815	1.798760	64.686130	-7
model: CHAOS	126.782002	6.587135	19.246911	-6

Performance improvement on production instance¶

In [8]:

df_old = load_data_form_json_log("results/2022-11-14_production_benchmark.log")
df_new = load_data_form_json_log("results/2022-12-19_production_benchmark.log")

fig = figure(figsize=(12, 6), dpi=150)

ax = subplot(1, 1, 1)
h0, h0_mean, df0_mean = plot_data_1d(ax, df_old, "https://staging.viresdisc.vires.services", color="tab:red")
h1, h1_mean, df1_mean = plot_data_1d(ax, df_new, "https://staging.viresdisc.vires.services (optimized)", color="tab:blue")

ax.set_title("VirES for Swarm - magnetic model performance (SW_OPER_MAGx_LR_1B, 1 day @ 1Hz)")
ax.legend(
    [h0, h0_mean, h1, h1_mean],
    ["v0.10.0", "v0.10.0 (median)", "v0.12.0", "v0.12.0 (median)" ]

)

df_comp = df0_mean[[]].copy()

df_comp['requestDurationBefore'] = df0_mean['requestDuration']
df_comp['requestDurationAfter'] = df1_mean['requestDuration']
df_comp['speedUp'] = df0_mean['requestDuration'] / df1_mean['requestDuration']
df_comp['_labelSortingIndex'] = df1_mean['_labelSortingIndex']

y_label = df_comp.index
x_label = exp(0.5*log(df_comp['requestDurationBefore'].values) + 0.5*log(df_comp['requestDurationAfter'].values))
v_label = df_comp['speedUp'].values

for i, (x, y, v) in enumerate(zip(x_label, y_label, v_label)):
    if v > 10: v = round(v)
    else: v = round(v, 1)
    ax.text(x, i, f"↓{v:g}x", va="center", ha="center", fontsize=15, color='gray')

df_comp

Out[8]:

	requestDurationBefore	requestDurationAfter	speedUp	_labelSortingIndex
description
model: MIO_SHA_2C-Secondary	18.303380	3.535712	5.176717	-16
model: MIO_SHA_2C-Primary	18.350334	3.572292	5.136851	-15
model: MIO_SHA_2C	36.103458	6.773591	5.330033	-14
model: CHAOS-MMA-Secondary	66.695110	0.406771	163.962385	-13
model: CHAOS-MMA-Primary	66.275737	0.425995	155.578633	-12
model: CHAOS-MMA	129.152582	0.521690	247.565558	-11
model: CHAOS-Core	5.427876	0.441955	12.281509	-10
model: CHAOS-Static80	19.009142	1.480601	12.838798	-9
model: CHAOS-Static	24.325625	6.353833	3.828496	-8
model: CHAOS80	152.515668	1.851345	82.381017	-7
model: CHAOS	155.007844	6.663577	23.261959	-6