import datetime as dt
import time
import IPython.display
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib_inline.backend_inline
import numpy as np
import pandas as pd
import requests as rq
import seaborn as sns
from ipywidgets import interact
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
def compose_url(**kwargs):
baseurl = "https://lapis.cov-spectrum.org/gisaid/v1/sample/aggregated?fields=date&host=Human&dateFrom=2021-11-01"
for key, value in kwargs.items():
baseurl += "&" + key + "=" + value
return baseurl
def get_data(**kwargs):
url = compose_url(**kwargs)
res = rq.get(url).json()
return res
def get_df(column_name, **kwargs):
data = get_data(**kwargs)
df = pd.DataFrame(data["data"])
df["date"] = pd.to_datetime(df["date"])
df.set_index("date", inplace=True)
df.sort_index(inplace=True)
df.rename(columns={"count": column_name}, inplace=True)
return (df, data["info"]["dataVersion"])
@interact(country="Denmark", baseline="BA.*", query="BA.2")
def ratio_plot(country, baseline, query):
print("Start update")
try:
while True:
base_df, base_version = get_df("baseline", country=country, pangoLineage=baseline)
query_df, query_version = get_df("query", country=country, pangoLineage=query)
if base_version == query_version:
break
else:
print("Data version mismatch, retrying")
time.sleep(1)
df = pd.concat([base_df, query_df], axis=1).fillna(0).astype(int)
df["ratio"] = df["query"] / df["baseline"]
min = df[df["ratio"] != 0].ratio.min()*0.3
max = 1-(1-df[df["ratio"] != 1].ratio.max())*0.7
df["ratio"] = df["ratio"].clip(min,max)
error = False
except:
error = True
print("Error")
if error == False:
print("Update plots")
IPython.display.clear_output()
# print(df)
fig, ax = plt.subplots(
num=None, figsize=(8, 4), facecolor="w", edgecolor="k"
)
sns.scatterplot(
data=df, x="date", y="ratio", hue="baseline", size="baseline"
)
fig.text(
0.51,
0.05,
f"Date: {str(dt.date.today())} | Data source: GISAID via covSpectrum | Viz: @CorneliusRoemer",
size=6,
va="bottom",
ha="center",
)
ax.set_xlim(left=df[df["query"] != 0].index.min() - pd.Timedelta(days=1))
ax.set_yscale("logit")
# ax.set_ylim(
# bottom=min,
# top=max,
# )
ax.set_ylabel(f"Proportion of {baseline} that are {query}")
ax.set_xlabel("Sample date")
ax.set_title(f"Share of {query} of all {baseline} in {country}")
ax.get_legend().set_title(f"{baseline} samples")
locator = mdates.AutoDateLocator()
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=1.0))
ax.grid(True, which="major", linewidth=0.25)
ax.grid(True, which="minor", linewidth=0.1)
ax.set_axisbelow(True)
percent_formatter = ticker.FuncFormatter(
lambda y, _: f'{ np.format_float_positional(100*y, trim="-", precision=6).rstrip(".")}%'
)
ax.yaxis.set_major_formatter(percent_formatter)
ax.yaxis.set_minor_formatter(plt.NullFormatter())
plt.subplots_adjust(left=0.15, right=0.9, top=0.9, bottom=0.20)
plt.show()
# plt.savefig(f"{country}_{query}.png", dpi=300)