import speasy as spz
amda_tree = spz.inventories.tree.amda
%matplotlib widget
# Use this instead if you are not using jupyterlab yet
#%matplotlib notebook
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import time
import numpy as np
First ensure that speasy is setup to use SciQLop cache
spz.config.proxy.url.set('http://sciqlop.lpp.polytechnique.fr/cache-dev')
spz.config.proxy.enabled.set(True)
start_time = datetime(2016, 6, 2)
stop_time = datetime(2016, 6, 8)
reference_data = spz.get_data(amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time, progress=False)
print(f"Data shape: {reference_data.values.shape}")
print(f"Data size in Bytes: {reference_data.nbytes}")
Data shape: (32400, 3) Data size in Bytes: 1039238
def times(f, *args, n=10, **kwargs):
def time_once():
start = time.perf_counter_ns()
f(*args, **kwargs, progress=False)
stop = time.perf_counter_ns()
return (stop - start) / 1e6
return [time_once() for _ in range(n)]
def best_99_percent(times):
return sorted(times)[:int(len(times)*.99)]
def best_90_percent(times):
return sorted(times)[:int(len(times)*.9)]
Then request data several times with all 3 configurations:
durations_without_any_cache = times(spz.get_data, amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time,
disable_cache=True, disable_proxy=True, n=10);
durations_with_remote_cache = times(spz.get_data, amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time,
disable_cache=True, n=1000);
durations_with_local_cache = times(spz.get_data, amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time,
n=10000);
fig, axs = plt.subplots(3, 1, figsize=(6, 10))
for i, data, title in ((0, best_99_percent(durations_without_any_cache), 'Without any cache'),
(1, best_99_percent(durations_with_remote_cache), 'With only SciQLop remote cache'),
(2, best_99_percent(durations_with_local_cache), 'With local on disk cache')):
axs[i].hist(data)
axs[i].set_xlabel('Execution time (ms)')
axs[i].set_title(title)
fig.suptitle('Execution time distributions for each conf', fontsize=16)
plt.tight_layout()
plt.show()
fig, ax = plt.subplots()
ax.violinplot([best_99_percent(durations_without_any_cache), best_99_percent(durations_with_remote_cache), best_99_percent(durations_with_local_cache), ], showmeans=False,
showmedians=True)
ax.set_xticks([1, 2, 3], labels=['without', 'remote', 'local'])
ax.set_ylabel('Execution time (ms)')
plt.semilogy()
fig.suptitle('Comparison plot', fontsize=16)
plt.tight_layout()
plt.show()
start_time = datetime(2016, 6, 2)
def scaling_point(delta):
stop_time = start_time + timedelta(hours=delta)
data = spz.get_data(amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time)
capacity = data.nbytes
t = best_90_percent(times(spz.get_data, amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time, n=200))
return capacity, t
deltas = np.logspace(np.log10(100), np.log10(5000), num=10)
values = [scaling_point(delta) for delta in deltas]
fig, ax = plt.subplots()
stats = [t for c, t in values]
capacities = np.array([c for c, t in values])
ax.violinplot(stats, positions=capacities, widths=np.gradient(capacities), showmeans=False, showmedians=True)
ax.set_ylabel('Execution time (ms)')
ax.set_xlabel('Loaded data size (Bytes)')
fig.suptitle('On disk cache scaling', fontsize=16)
plt.tight_layout()
plt.loglog()
plt.show()
start_time = datetime(2016, 6, 2)
def scaling_point(delta):
stop_time = start_time + timedelta(hours=delta)
data = spz.get_data(amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time, disable_cache=True)
capacity = data.nbytes
t = best_90_percent(times(spz.get_data, amda_tree.Parameters.ACE.MFI.ace_imf_all.imf, start_time, stop_time, disable_cache=True,
n=20))
return capacity, t
deltas = np.logspace(np.log10(100), np.log10(5000), num=10)
values = [scaling_point(delta) for delta in deltas]
fig, ax = plt.subplots()
stats = [t for c, t in values]
capacities = np.array([c for c, t in values])
ax.violinplot(stats, positions=capacities, widths=np.gradient(capacities), showmeans=False, showmedians=True)
ax.set_ylabel('Execution time (ms)')
ax.set_xlabel('Loaded data size (Bytes)')
fig.suptitle('SciQLop remote cache scaling', fontsize=16)
plt.tight_layout()
plt.loglog()
plt.show()