#!/usr/bin/env python # coding: utf-8 # In[ ]: import datetime as dt import time import IPython.display import matplotlib.dates as mdates import matplotlib.pyplot as plt import matplotlib.ticker as ticker import matplotlib_inline.backend_inline import numpy as np import pandas as pd import requests as rq import seaborn as sns from ipywidgets import interact matplotlib_inline.backend_inline.set_matplotlib_formats('svg') def compose_url(**kwargs): baseurl = "https://lapis.cov-spectrum.org/gisaid/v1/sample/aggregated?fields=date&host=Human&dateFrom=2021-11-01" for key, value in kwargs.items(): baseurl += "&" + key + "=" + value return baseurl def get_data(**kwargs): url = compose_url(**kwargs) res = rq.get(url).json() return res def get_df(column_name, **kwargs): data = get_data(**kwargs) df = pd.DataFrame(data["data"]) df["date"] = pd.to_datetime(df["date"]) df.set_index("date", inplace=True) df.sort_index(inplace=True) df.rename(columns={"count": column_name}, inplace=True) return (df, data["info"]["dataVersion"]) @interact(country="Denmark", baseline="BA.*", query="BA.2") def ratio_plot(country, baseline, query): print("Start update") try: while True: base_df, base_version = get_df("baseline", country=country, pangoLineage=baseline) query_df, query_version = get_df("query", country=country, pangoLineage=query) if base_version == query_version: break else: print("Data version mismatch, retrying") time.sleep(1) df = pd.concat([base_df, query_df], axis=1).fillna(0).astype(int) df["ratio"] = df["query"] / df["baseline"] min = df[df["ratio"] != 0].ratio.min()*0.3 max = 1-(1-df[df["ratio"] != 1].ratio.max())*0.7 df["ratio"] = df["ratio"].clip(min,max) error = False except: error = True print("Error") if error == False: print("Update plots") IPython.display.clear_output() # print(df) fig, ax = plt.subplots( num=None, figsize=(8, 4), facecolor="w", edgecolor="k" ) sns.scatterplot( data=df, x="date", y="ratio", hue="baseline", size="baseline" ) fig.text( 0.51, 0.05, f"Date: {str(dt.date.today())} | Data source: GISAID via covSpectrum | Viz: @CorneliusRoemer", size=6, va="bottom", ha="center", ) ax.set_xlim(left=df[df["query"] != 0].index.min() - pd.Timedelta(days=1)) ax.set_yscale("logit") # ax.set_ylim( # bottom=min, # top=max, # ) ax.set_ylabel(f"Proportion of {baseline} that are {query}") ax.set_xlabel("Sample date") ax.set_title(f"Share of {query} of all {baseline} in {country}") ax.get_legend().set_title(f"{baseline} samples") locator = mdates.AutoDateLocator() formatter = mdates.ConciseDateFormatter(locator) ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formatter) ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=1.0)) ax.grid(True, which="major", linewidth=0.25) ax.grid(True, which="minor", linewidth=0.1) ax.set_axisbelow(True) percent_formatter = ticker.FuncFormatter( lambda y, _: f'{ np.format_float_positional(100*y, trim="-", precision=6).rstrip(".")}%' ) ax.yaxis.set_major_formatter(percent_formatter) ax.yaxis.set_minor_formatter(plt.NullFormatter()) plt.subplots_adjust(left=0.15, right=0.9, top=0.9, bottom=0.20) plt.show() # plt.savefig(f"{country}_{query}.png", dpi=300)