#!/usr/bin/env python # coding: utf-8 # In[792]: import pandas as pd import numpy as np import matplotlib.pylab as plt import statsmodels.api as sm from statsmodels.tsa.stattools import grangercausalitytests def get_percentage(x): return (np.exp(x) - 1) * 100 SPECIAL_COUNTRIES = ["ISR", "AUS", "CAN", "USA"] def granger_w_zero_lag(df, maxlag, **kwargs): df_copy = df.copy() df_copy.iloc[:, 1] = df_copy.iloc[:, 1].shift(-1) df_copy = df_copy.fillna(0) return grangercausalitytests(df_copy, maxlag, **kwargs) # In[796]: # download vax dataset at "https://github.com/owid/covid-19-data/raw/master/public/data/vaccinations/vaccinations.csv" # And put it in "../data/owid_vaccinations.csv" # nuts = pd.read_csv("../data/NUTS2021.csv") vax = pd.read_csv("../data/owid_vaccinations.csv") vax["date"] = pd.to_datetime(vax["date"]) vax.head() vax["country"] = vax["iso_code"] vax.loc[vax["iso_code"] == "DEU", "country"] = "DEUTNP" vax.loc[vax["iso_code"] == "FRA", "country"] = "FRATNP" vax.loc[vax["iso_code"] == "NZL", "country"] = "NZL_NP" vax.loc[vax["iso_code"] == "OWID_SCT", "country"] = "GBR_SCO" for country in ["SWE", "RUS", "SVK", "FIN", "OWID_ENG", "OWID_WLS", "NLD", "PRT"]: # for country in vax.iso_code.unique(): vax.loc[vax["iso_code"] == country, ["people_vaccinated"]] = vax[vax["iso_code"] == country]["people_vaccinated"].interpolate(method="ffill") vax.loc[vax["iso_code"] == country, ["total_boosters"]] = vax[vax["iso_code"] == country]["total_boosters"].fillna(method="ffill") vax.loc[vax["iso_code"] == country, ["people_fully_vaccinated"]] = vax[vax["iso_code"] == country]["people_fully_vaccinated"].fillna(method="ffill") vax.loc[vax["iso_code"] == country, ["total_vaccinations"]] = vax[vax["iso_code"] == country]["total_vaccinations"].fillna(method="ffill") df_GBRTENW = vax[vax["iso_code"] == "OWID_ENG"].copy() df_GBRTENW.index = pd.to_datetime(df_GBRTENW.date) df_GBRTENW["country"] = "GBRTENW" for col in ["people_vaccinated", "total_boosters", "people_fully_vaccinated", "total_vaccinations"]: other_df = vax[vax["iso_code"] == "OWID_WLS"] other_df.index = pd.to_datetime(other_df.date) df_GBRTENW.loc[other_df.index, col] += other_df[col].values # df_GBRTENW.loc[:, "people_fully_vaccinated"] = df_GBRTENW.loc[:, "total_vaccinations"] vax = pd.concat([vax, df_GBRTENW]) vax.head() # In[794]: # load data from the "all_countries" notebook computing excess mortality all_ages_df = pd.read_csv("../data/mortality_all_ages_normalized.csv") all_ages_df["date"] = pd.to_datetime(all_ages_df.date) all_ages_df["centered_per"] = get_percentage(all_ages_df["centered"].values) all_ages_df["centered_per_low"] = get_percentage(all_ages_df["centered_low"].values) all_ages_df["centered_per_high"] = get_percentage(all_ages_df["centered_high"].values) all_ages_df.head() # In[797]: all_ages_df.country.unique() # In[798]: vax.iso_code.unique() # In[799]: # Calculate weekly doses administered combo = pd.merge(vax, all_ages_df, on=["country", "date"], how='right') def get_weekly_vax(combo): dfs = [] for group, df in combo.groupby(by=["country", "sex", "age_group"]): # if group[0] != "SVK" or group[1] != "b" or group[2] != "adult": # continue df.index = pd.to_datetime(df.date) df = df.sort_index() weekly_vax = df.people_vaccinated.values[1:] - df.people_vaccinated.values[:-1] weekly_vax[weekly_vax == 0] = np.nan weekly_fully_vax = df.people_fully_vaccinated.values[1:] - df.people_fully_vaccinated.values[:-1] weekly_fully_vax[weekly_fully_vax == 0] = np.nan weekly_booster_vax = df.total_boosters.values[1:] - df.total_boosters.values[:-1] weekly_booster_vax[weekly_booster_vax == 0] = np.nan df.loc[:,"weekly_partial_vax"] = np.insert(weekly_vax, 0, df.people_vaccinated.values[0]) df.loc[:,"weekly_fully_vax"] = np.insert(weekly_fully_vax, 0, df.people_fully_vaccinated.values[0]) df.loc[:,"weekly_booster_vax"] = np.insert(weekly_booster_vax, 0, df.total_boosters.values[0]) for col in ["weekly_partial_vax", "weekly_fully_vax", "weekly_booster_vax"]: if group[0] in ["ISL", "LUX"]: pass # df.loc[:, col] = df.loc[:, col].fillna(method="fill") else: df.loc[:, col] = df.loc[:, col].interpolate() df.loc[df["weekly_partial_vax"].isna(), "weekly_partial_vax"] = 0 df.loc[df["weekly_fully_vax"].isna(), "weekly_fully_vax"] = 0 df.loc[df["weekly_booster_vax"].isna(), "weekly_booster_vax"] = 0 df["weekly_total_vax"] = df.loc[:,"weekly_partial_vax"] + df.loc[:,"weekly_fully_vax"] + df.loc[:,"weekly_booster_vax"] df.loc[:, "location"] = df["location"].fillna(method="ffill").fillna(method="bfill") # if group[0] == "ISL" and group[1] == "b" and group[2] == "adult": # break dfs.append(df) combo = pd.concat(dfs) return combo combo = get_weekly_vax(combo) # df.index = pd.to_datetime(df.date) # df = df[~df.iso_code.isnull()] # df.loc[:, col].interpolate()["2021-10":] # In[800]: # Calculate everate excess mortality post covid countries = all_ages_df.country.unique() pd.options.mode.chained_assignment = None # default='warn' avg_mortality_percent = {} specific = {} for country in countries: specific[country] = combo[(combo.country == country) & (combo.date.dt.year.isin([2020, 2021, 2022])) & (combo.age_group == "high") & (combo.sex == "b")] avg_mortality_percent[country] = specific[country].centered_per.mean() # In[833]: # PLOTS n_cols = 2 n_countries = len(SPECIAL_COUNTRIES) n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} for group, age_df in combo.groupby(by=["age_group", "sex"]): age_group, sex = group if sex != "b": continue fig, axs = plt.subplots(n_rows, n_cols, figsize=(14,5)) i = 0 for country, df in age_df.groupby(by=["country"]): if country not in SPECIAL_COUNTRIES: continue j = i % n_cols k = i // n_cols axs[k, j].plot(df.date, df.centered_per, color="black", linewidth=2, label="mortality [%]") axs[k, j].plot(df.date, df.centered_per_low, color="black", linestyle='--', label="95% confidence band") axs[k, j].plot(df.date, df.centered_per_high, color="black", linestyle='--') axs[k, j].set_title(df.loc[:, "location"].values[0], fontsize=20) axs[k, j].set_xlim([np.datetime64("2020-01-01"), np.datetime64("2023-01-01")]) ax2 = axs[k, j].twinx() ax2.plot(df.date, df.weekly_partial_vax, label="1st dose", color="blue", alpha=0.7, linestyle="--") ax2.plot(df.date, df.weekly_fully_vax, label="2nd dose", color="green", alpha=0.7, linestyle="--") ax2.plot(df.date, df.weekly_booster_vax, label="3+ dose", color="red", alpha=0.7, linestyle="--") if j == n_cols - 1: ax2.set_ylabel("# Doses") if k == 0: axs[k, j].legend(loc="upper left") ax2.legend(loc="upper right") i += 1 fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.grid(False) plt.ylabel(f"Excess mortality, age {age_string[age_group]} [% from baseline]") fig.tight_layout() plt.savefig(f"../figures/special_countries_mortality_vs_vax_{age_group}_age_{sex}_sex.jpg", dpi=100) # In[832]: n_cols = 2 n_countries = len(SPECIAL_COUNTRIES) n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} for group, age_df in combo.groupby(by=["age_group", "sex"]): age_group, sex = group if age_group != "adult" or sex != "b": continue fig, axs = plt.subplots(n_rows, n_cols, figsize=(14,5)) i = 0 for country, df in age_df.groupby(by=["country"]): if country not in SPECIAL_COUNTRIES: continue j = i % n_cols k = i // n_cols axs[k, j].plot(df.date, df.centered_per, color="black", linewidth=2, label="mortality [%]") axs[k, j].plot(df.date, df.centered_per_low, color="black", linestyle='--', label="95% confidence band") axs[k, j].plot(df.date, df.centered_per_high, color="black", linestyle='--') axs[k, j].set_title(df.loc[:, "location"].values[0], fontsize=20) axs[k, j].set_xlim([np.datetime64("2020-01-01"), np.datetime64("2023-01-01")]) ax2 = axs[k, j].twinx() any_dose = df.weekly_booster_vax ax2.plot(df.date, any_dose, label="# booster doses", color="red", alpha=0.7) if j == n_cols - 1: ax2.set_ylabel("# Doses") if k == 0: axs[k, j].legend(loc="upper left") ax2.legend(loc="upper right") i += 1 fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.grid(False) plt.ylabel(f"Excess mortality, age {age_string[age_group]} [% from baseline]") fig.tight_layout() plt.savefig(f"../figures/special_countries_mortality_vs_booster_{age_group}_age_{sex}_sex.jpg", dpi=100) # In[803]: n_countries = len(SPECIAL_COUNTRIES) n_cols = 2 n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} for age_group, age_df in combo.groupby(by=["age_group"]): fig, axs = plt.subplots(n_rows, n_cols, figsize=(14,5)) i = 0 for gdf in age_df.groupby(by=["country"]): country, df = gdf if country not in SPECIAL_COUNTRIES: continue df = df[(df["sex"] == "b")] df.index = pd.to_datetime(df.date) j = i % n_cols k = i // n_cols axs[k, j].plot(df.date, df["fit.yearly"].values, color="gold", linewidth=2, label="trend") axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values, color="gold", linewidth=2) axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values + df["centered"].values, color="black", linewidth=1, label="mortality") axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values + df["centered_low"].values, color="black", linestyle='--', linewidth=0.5, label="95 confidence int.") axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values + df["centered_high"].values, color="black", linestyle='--', linewidth=0.5) axs[k, j].set_title(df.loc[:, "location"].values[0], fontsize=20) # axs[k, j].set_xlim([np.datetime64("2017-01-01"), np.datetime64("2023-01-01")]) if j == 0 and k % n_cols == 0: pass # axs[k, j].set_ylabel(f"Mortality, age {age_string[age_group]} [# deaths/100k/week]") if k == 1 and j == 1: axs[k, j].legend(loc="upper left") # ax2.legend(loc="upper right") i += 1 fig.add_subplot(111, frameon=False) plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) plt.grid(False) plt.ylabel(f"Mortality, age {age_string[age_group]} [# deaths/100k/week]") fig.tight_layout() plt.savefig(f"../figures/special_countries_mortality_w_trend_b_gender_{age_group}_age.jpg", dpi=100) # In[806]: # Shift vax with max autocorrelation # df = combo[(combo.age_group == "adult") & (combo.sex == "b") & (combo.country == "USA")] lags = {} max_lags = 12 vax_type = "weekly_booster_vax" for country, df in combo.groupby(by=["country"]): if country in ["RUS", "GBR_NIR", "HRV", "ISL"]: lags[country] = (0, np.nan) continue df = df[(df.age_group == "adult") & (df.sex == "b")] first_dose = np.arange(len(df))[df[vax_type] > 100][0] last_dose = np.arange(len(df))[df[vax_type] > 100][-1] h = plt.xcorr(df.centered_per, df[vax_type], maxlags=13) pos_ind = h[0] >= 0 argmax = np.argmax(h[1][pos_ind]) if country == "NOR": print(argmax) if h[0][pos_ind][argmax] == max_lags: lags[country] = (0, np.nan) else: lags[country] = (h[0][pos_ind][argmax], np.round(h[1][pos_ind][argmax], 2)) # In[895]: # Plot Norway's data shifted 10 weeks country = "NOR" lag = lags[country][0] df = combo[(combo.age_group == "adult") & (combo.sex == "b") & (combo.country == country)] fig, ax = plt.subplots(1,1, figsize=(12,4)) plt.plot(df.date, df.centered_per, color="black", linewidth=2, label="mortality [%]") plt.plot(df.date, df.centered_per_low, color="black", linestyle='--', label="95% confidence band") plt.plot(df.date, df.centered_per_high, color="black", linestyle='--') # plt.title(df.loc[:, "location"].values[0], fontsize=20) plt.xlim([np.datetime64("2020-01-01"), np.datetime64("2023-01-01")]) plt.ylabel(f"Excess mortality, age {age_string[age_group]} [% from baseline]") plt.legend(loc="upper left") # plt.grid() ax2 = ax.twinx() any_dose = df.weekly_total_vax first_dose = np.arange(len(any_dose))[any_dose > 100][0] last_dose = np.arange(len(any_dose))[any_dose > 100][-1] p2_orig, = ax2.plot(df.date, any_dose.values, label="# doses", color="red", alpha=0.2) if lag != 0: p2, = ax2.plot(df.date[lag:], any_dose.values[:-lag], label=f"# doses shifted {lag} weeks", color="blue", alpha=0.7) else: p2, = ax2.plot(df.date, any_dose, label="# doses", color="red", alpha=0.7) # h = granger_w_zero_lag(df.loc[:, ["centered_per", "weekly_booster_vax"]].iloc[first_dose:last_dose,:], lag + 2, verbose=True) title = f"{df.loc[:, 'location'].values[0]} - vaccination shifted {lags[country][0]} weeks" plt.title(title, fontsize=20) ax2.set_ylabel("# Doses") ax2.legend(handles=[p2_orig, p2], loc="upper right") ax2.tick_params(axis='y', colors=p2.get_color(), **tkw) ax2.yaxis.label.set_color(p2.get_color()) plt.savefig(f"../figures/STMF_norway_mortality_vs_vaccine_shifted_{age_group}_age_{sex}_sex.jpg", dpi=100) # In[843]: # Download weekly c19 data (hospitalizations and mortality) from norway c19_nor = pd.read_csv("../data/status-for-pandemien-nas-10-01-23.csv") c19_nor["date"] = pd.to_datetime(c19_nor["Uke"].str.cat(others=["-1"]*len(c19_nor), sep=""), format="%Y-%W-%w") c19_nor.columns = ['Uke', 'c19-hospitalized', 'c19-icu', 'c19-death', 'date'] # load data from the "all_countries" notebook computing excess mortality nor_df = pd.read_csv("../data/mortality_norway_stratified_normalized.csv") nor_df["date"] = pd.to_datetime(nor_df.date) nor_df["centered_per"] = get_percentage(nor_df["centered"].values) nor_df["centered_per_low"] = get_percentage(nor_df["centered_low"].values) nor_df["centered_per_high"] = get_percentage(nor_df["centered_high"].values) nor_df.head() age_group = "adult" nor_df["age_group"] = nor_df["age"] combo_nor = pd.merge(vax, nor_df, on=["country", "date"], how='right') combo_nor = get_weekly_vax(combo_nor) df = combo_nor[(combo_nor.age_group == age_group)] df.index = pd.to_datetime(df.date) nor_df = df.sort_index() # In[896]: N_PPL_15P = 84000 # Number of people of age 15+ fig, ax = plt.subplots(1,1,figsize=(12,4)) country = "NOR" def rate_to_ppl(df): return (np.exp(df["centered"]) - 1) * np.exp(df["fit.yearly"] + df["fit.weekly"]) * (df["pop"] / N_PPL_15P).values def rate_to_ppl_low(df): return (np.exp(df["centered_low"]) - 1) * np.exp(df["fit.yearly"] + df["fit.weekly"]) * (df["pop"] / N_PPL_15P).values def rate_to_ppl_high(df): return (np.exp(df["centered_high"]) - 1) * np.exp(df["fit.yearly"] + df["fit.weekly"]) * (df["pop"] / N_PPL_15P).values lag = lags[country][0] ax.plot(nor_df.date, rate_to_ppl(nor_df), linewidth=2, color="black", label="mortality [# people/week]") ax.plot(nor_df.date, rate_to_ppl_low(nor_df), linestyle="--", color="black", label="95% confidence band") ax.plot(nor_df.date, rate_to_ppl_high(nor_df), linestyle="--", color="black") plt.xlim([np.datetime64("2020-01-01"), np.datetime64("2023-01-01")]) plt.ylabel(f"Excess mortality, age {age_string[age_group]} [# of ppl from baseline]") plt.legend(loc="upper left") # plt.grid() ax2 = ax.twinx() any_dose = nor_df.weekly_total_vax p2_orig, = ax2.plot(df.date, df.weekly_total_vax, label="# doses", color="red", alpha=0.2) if lag != 0: p2, = ax2.plot(df.date[lag:], df.weekly_total_vax.values[:-lag], label=f"# doses shifted {lag} weeks", color="blue", alpha=0.7) else: p2, = ax2.plot(df.date, df.weekly_total_vax, label="# doses", color="red", alpha=0.7) # h = granger_w_zero_lag(df.loc[:, ["centered_per", "weekly_booster_vax"]].iloc[first_dose:last_dose,:], lag + 2, verbose=True) title = f"{df.loc[:, 'location'].values[0]} excess mortality vs vaccination shifted {lags[country][0]} weeks" plt.title(title, fontsize=20) ax2.set_ylabel("# Doses") ax2.legend(handles=[p2_orig, p2], loc="upper right") ax2.tick_params(axis='y', colors=p2.get_color(), **tkw) ax2.yaxis.label.set_color(p2.get_color()) plt.savefig(f"../figures/norway_excess_mortality_ppl_vs_total_vax.jpg", dpi=100) # In[847]: # Ballpark deathrate in Norway within 10 weeks of injection (not including people dying after that): # Assume 50% are due to vaccination from 2021-03 - 2022-01, height 250 ppl excess mortality, height 300k doses mortality_rate = 250 / 300 / 2 print(f"Mortality rate: {mortality_rate * 10} per 10k people" ) # In[878]: # More rigorous mortality rate estimate # Fit an ARMA(2,2) model with weekly vax rates as exogenous variable from statsmodels.tsa.statespace.sarimax import SARIMAX # SARIMAX() start_date = "2021-01-01" end_date = "2022-06-01" subset = nor_df[start_date:end_date] # first_dose = np.arange(len(df.weekly_total_vax))[df.weekly_total_vax > 0][0] # last_dose = np.arange(len(df.weekly_total_vax))[df.weekly_total_vax > 0][-1] # print(f"Fitting from {df.iloc[first_dose].date:%Y-%m-%d} to {df.iloc[last_dose].date:%Y-%m-%d}") response = (np.exp(subset["centered"]) - 1) * np.exp(subset["fit.yearly"] + subset["fit.weekly"]) * (subset["pop"] / N_PPL_15P).values endog = response.values endog += np.abs(np.min(endog)) relevant_vax_first = subset.weekly_fully_vax.values + subset.weekly_partial_vax.values relevant_vax_second = subset.weekly_booster_vax.values fig, ax = plt.subplots(1,2, figsize=(8,3)) acf = sm.graphics.tsa.plot_acf(endog.squeeze(), ax=ax[0]) pacf = sm.graphics.tsa.plot_pacf(endog, lags=20, method="ywm", ax=ax[1]) def get_predictors(raw_signals, cutoff=10, max_lag=14, normalize=True): predictors = [] k = 0 for signal in raw_signals.T: for j in range(cutoff, max_lag): pred_k = np.roll(signal, j) pred_k[:j] = 0 if np.any(pred_k > 0): predictors.append(pred_k) exog = np.array(predictors).T # will have dimensions nobs X n_lags if normalize: sds = np.std(exog, axis=0) exog /= sds return exog exog = get_predictors(np.array([relevant_vax_first, relevant_vax_second]).T) print(exog.shape) # In[849]: model_ols = sm.OLS(endog, exog) res_ols = model_ols.fit() from scipy.ndimage import uniform_filter1d, convolve1d smooth_exog = uniform_filter1d(exog, size=3, origin=-1, axis=0) convolve_exog = convolve1d(exog, weights=[1, 0.5, 0.25], origin=-1, axis=0) model2 = sm.GLSAR(endog, smooth_exog, rho=4) res = model2.iterative_fit(maxiter=6) res.summary() predictions = res.get_prediction(smooth_exog) frame = predictions.summary_frame(alpha=0.05) # convolve model3 = sm.GLSAR(endog, convolve_exog, rho=4) res3 = model3.iterative_fit(maxiter=6) predictions3 = res.get_prediction(convolve_exog) frame3 = predictions.summary_frame(alpha=0.05) res3.summary() # In[841]: # from sklearn.preprocessing import StandardScaler # from sklearn.linear_model import LassoCV, LassoLarsIC # from sklearn.pipeline import make_pipeline # from sklearn.model_selection import TimeSeriesSplit # tssplit = TimeSeriesSplit(gap=0, max_train_size=None, n_splits=6, test_size=None) # model = make_pipeline(LassoCV(cv=tssplit)).fit(X=exog, y=endog) # model_aic = make_pipeline(LassoLarsIC(criterion="aic")).fit(X=exog, y=endog) # lasso = model[-1] # lasso_aic = model[-1] # plt.semilogx(lasso.alphas_, lasso.mse_path_, linestyle=":") # plt.plot( # lasso.alphas_, # lasso.mse_path_.mean(axis=-1), # color="black", # label="Average across the folds", # linewidth=2, # ) # plt.axvline(lasso.alpha_, linestyle="--", color="black", label="alpha: CV estimate") # # plt.axvline(lasso_aic.alpha_, linestyle="--", color="red", label="alpha: AIC estimate") # # plt.ylim(ymin, ymax) # plt.xlabel(r"$\alpha$") # plt.ylabel("Mean square error") # plt.legend() # _ = plt.title( # f"Mean square error on each fold: coordinate descent" # ) # In[850]: plt.figure(figsize=(7,3)) plt.plot(subset.date, endog, label="Excess deaths", color="black", marker="+", linestyle="") # plt.plot(subset.date, exog[:, :5] @ lasso.coef_[:5], label="Lasso") plt.plot(subset.date, res3.fittedvalues, label="GLS-AR", linestyle="-", color="orange") plt.plot(subset.date, frame3.obs_ci_lower, label="GLS-AR Confidence band", linestyle="--", color="orange", linewidth=1) plt.plot(subset.date, frame3.obs_ci_upper, linestyle="--", color="orange", linewidth=1) plt.plot(subset.date, res_ols.fittedvalues, label="OLS", color="green") plt.ylabel("# Deaths / week") plt.ylabel("# Deaths / week") plt.title("Predicting excess mortality in Norway") plt.legend() plt.savefig("../figures/predicting_excess_mortality.jpg", dpi=100) mortality_rate_lasso = sum(exog[:, :5] @ lasso.coef_[:5]) / sum(subset["weekly_total_vax"]) mortality_rate_gls = sum(res.fittedvalues) / sum(subset["weekly_total_vax"]) mortality_rate_gls_low = np.round(sum(frame.obs_ci_lower) / sum(subset["weekly_total_vax"])*10000, 2) mortality_rate_gls_high = np.round(sum(frame.obs_ci_upper) / sum(subset["weekly_total_vax"])*10000, 2) mortality_rate_ols = sum(res_ols.fittedvalues) / sum(subset["weekly_total_vax"]) # print(f"Mortality rate using lasso: {np.round(mortality_rate_lasso * 10000, 2)}") print(f"Mortality rate using Generalized Least Squares: {np.round(mortality_rate_gls * 10000, 2)} [{mortality_rate_gls_low}, {mortality_rate_gls_high}]") print(f"Mortality rate using Ordinary Least Squares: {np.round(mortality_rate_ols * 10000, 2)}") # In[884]: # Fit model to all countries start_date = "2020-12-01" end_date = "2022-06-01" country_fit = {} country_ci = {} for country, df in combo.groupby(by=["country"]): if country in ["RUS", "GBR_NIR", "HRV", "ISL"]: continue df = df[(df.age_group == "adult") & (df.sex == "b")] subset = df[start_date:end_date] response = rate_to_ppl(subset) endog = response.values endog += np.abs(np.min(endog)) relevant_vax_first = subset.weekly_fully_vax.values + subset.weekly_partial_vax.values relevant_vax_second = subset.weekly_booster_vax.values cutoff = np.maximum(0, lags[country][0] - 2) max_lag = lags[country][0] + 2 if np.any(relevant_vax_second > 0): exog = get_predictors(np.array([relevant_vax_first, relevant_vax_second]).T, cutoff=cutoff, max_lag=max_lag) else: exog = get_predictors(np.array([relevant_vax_first]).T) convolve_exog = convolve1d(exog, weights=[1, 0.5, 0.25], origin=-1, axis=0) model3 = sm.GLSAR(endog, convolve_exog, rho=4) res3 = model3.iterative_fit(maxiter=6) predictions3 = res3.get_prediction(convolve_exog) frame3 = predictions3.summary_frame(alpha=0.05) country_fit[country] = res3 country_ci[country] = frame3 # In[894]: n_countries = len(countries) n_cols = 4 n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} fig, axs = plt.subplots(n_rows, n_cols, figsize=(28,28)) i = 0 for country, df in combo.groupby(by=["country"]): if country in ["RUS", "GBR_NIR", "HRV", "ISL"]: continue j = i % n_cols k = i // n_cols df = df[(df.age_group == "adult") & (df.sex == "b")] subset = df[start_date:end_date] response = rate_to_ppl(subset) endog = response.values endog += np.abs(np.min(endog)) axs[k, j].plot(subset.date, endog, label="Excess deaths", color="black", marker="+", linestyle="") axs[k, j].plot(subset.date, country_fit[country].fittedvalues, label="GLS-AR", linestyle="-", color="orange") axs[k, j].plot(subset.date, country_ci[country].obs_ci_lower, label="GLS-AR Confidence band", linestyle="--", color="orange", linewidth=1) axs[k, j].plot(subset.date, country_ci[country].obs_ci_upper, linestyle="--", color="orange", linewidth=1) axs[k, j].set_ylabel("# Deaths / week") axs[k, j].set_title(f"{subset['location'].iloc[0]}") axs[k, j].legend() fig.tight_layout() mortality_rate_gls = sum(country_fit[country].fittedvalues) / sum(subset["weekly_total_vax"]) mortality_rate_gls_low = np.round(sum(country_ci[country].obs_ci_lower) / sum(subset["weekly_total_vax"])*10000, 2) mortality_rate_gls_high = np.round(sum(country_ci[country].obs_ci_upper) / sum(subset["weekly_total_vax"])*10000, 2) i += 1 print(f"{subset['location'].iloc[0]}: Mortality rate using Generalized Least Squares: {np.round(mortality_rate_gls * 10000, 2)} [{mortality_rate_gls_low}, {mortality_rate_gls_high}]") plt.title("Predicting mortality") plt.savefig(f"../figures/predicting_excess_mortality_all_countries.jpg", dpi=100) # In[883]: country # In[882]: country_fit[country].summary() # In[893]: # Appendix n_countries = len(countries) n_cols = 4 n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} for group, age_df in combo.groupby(by=["age_group", "sex"]): age_group, sex = group if sex != "b": continue fig, axs = plt.subplots(n_rows, n_cols, figsize=(28,28)) i = 0 for country, df in age_df.groupby(by=["country"]): j = i % n_cols k = i // n_cols if df.loc[:, "location"].isnull().iloc[0]: continue axs[k, j].plot(df.date, df.centered_per, color="black", linewidth=2, label="mortality [%]") axs[k, j].plot(df.date, df.centered_per_low, color="black", linestyle='--', label="95% confidence band") axs[k, j].plot(df.date, df.centered_per_high, color="black", linestyle='--') axs[k, j].set_title(df.loc[:, "location"].values[0], fontsize=20) axs[k, j].set_xlim([np.datetime64("2020-01-01"), np.datetime64("2023-01-01")]) ax2 = axs[k, j].twinx() ax2.plot(df.date, df.weekly_partial_vax, label="1st dose", color="blue", alpha=0.7, linestyle="--") ax2.plot(df.date, df.weekly_fully_vax, label="2nd dose", color="green", alpha=0.7, linestyle="--") ax2.plot(df.date, df.weekly_booster_vax, label="3+ dose", color="red", alpha=0.7, linestyle="--") if j == n_cols - 1: ax2.set_ylabel("# Doses") if j == 0 and k % n_cols == 0: axs[k, j].set_ylabel(f"Excess mortality, age {age_string[age_group]} [% from baseline]") if k == 0: axs[k, j].legend(loc="upper left") ax2.legend(loc="upper right") i += 1 fig.tight_layout() plt.savefig(f"../figures/all_countries_mortality_vs_vax_{age_group}_age_{sex}_sex.jpg", dpi=100) # In[153]: n_countries = len(countries) n_cols = 4 n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} for group, age_df in combo.groupby(by=["age_group", "sex"]): age_group, sex = group if age_group != "adult" or sex != "b": continue fig, axs = plt.subplots(n_rows, n_cols, figsize=(28,28)) i = 0 for country, df in age_df.groupby(by=["country"]): j = i % n_cols k = i // n_cols if df.loc[:, "location"].isnull().iloc[0]: continue axs[k, j].plot(df.date, df.centered_per, color="black", linewidth=2, label="mortality [%]") axs[k, j].plot(df.date, df.centered_per_low, color="black", linestyle='--') axs[k, j].plot(df.date, df.centered_per_high, color="black", linestyle='--') axs[k, j].set_title(df.loc[:, "location"].values[0], fontsize=20) axs[k, j].set_xlim([np.datetime64("2020-01-01"), np.datetime64("2023-01-01")]) ax2 = axs[k, j].twinx() any_dose = df.weekly_booster_vax ax2.plot(df.date, any_dose, label="# doses", color="red", alpha=0.7) if j == n_cols - 1: ax2.set_ylabel("# Doses") if j == 0 and k % n_cols == 0: axs[k, j].set_ylabel(f"Excess mortality, age {age_string[age_group]} [% from baseline]") if k == 0: axs[k, j].legend(loc="upper left") ax2.legend(loc="upper right") i += 1 fig.tight_layout() plt.savefig(f"../figures/all_countries_mortality_vs_booster_{age_group}_age_{sex}_sex.jpg", dpi=100) # In[154]: n_countries = len(countries) n_cols = 4 n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} for age_group, age_df in combo.groupby(by=["age_group"]): fig, axs = plt.subplots(n_rows, n_cols, figsize=(28,28)) i = 0 for gdf in age_df.groupby(by=["country"]): country, df = gdf df = df[(df["sex"] == "b")] df.index = pd.to_datetime(df.date) if df.loc[:, "location"].isnull().iloc[0]: continue j = i % n_cols k = i // n_cols axs[k, j].plot(df.date, df["fit.yearly"].values, color="gold", linewidth=2, label="trend") axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values, color="gold", linewidth=2) axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values + df["centered"].values, color="black", linewidth=1, label="mortality") axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values + df["centered_low"].values, color="black", linestyle='--', linewidth=0.5, label="95 confidence int.") axs[k, j].plot(df.date, df["fit.yearly"].values + df["fit.weekly"].values + df["centered_high"].values, color="black", linestyle='--', linewidth=0.5) axs[k, j].set_title(df.loc[:, "location"].values[0], fontsize=20) # axs[k, j].set_xlim([np.datetime64("2017-01-01"), np.datetime64("2023-01-01")]) if j == 0 and k % n_cols == 0: axs[k, j].set_ylabel(f"Mortality, age {age_string[age_group]} [# deaths/100k/week]") if k == 0: axs[k, j].legend(loc="upper left") ax2.legend(loc="upper right") i += 1 fig.tight_layout() plt.savefig(f"../figures/all_countries_mortality_w_trend_b_gender_{age_group}_age.jpg", dpi=100) # In[173]: # ESP standard population std_esp_pop = np.array((1000, 4000, 5500, 5500, 5500, 6000, 6000, 6500, 7000, 7000, 7000, 7000, 6500, 6000, 5500, 5000, 4000, 2500, 1500, 800, 200)) esp_group = np.array([0,1] + np.arange(5,95,5).tolist() + [200]) esp = pd.DataFrame({"age": esp_group, "n": std_esp_pop}) esp[esp.age>=15].n.sum() # In[32]: vax_orig = pd.read_csv("../data/owid_vaccinations.csv") df = vax_orig[(vax_orig.iso_code == "SVK")] df.index = pd.to_datetime(df.date) df["2021-10":] # In[95]: # combo[(combo.country == "SVK") & (combo.sex == "b") & (age_group == "high")] df = combo[(combo.country == "SVK") & (combo.sex == "b") & (combo.age_group=="adult")] df.index = pd.to_datetime(df.date) # In[55]: mean_excess = pd.DataFrame(data=avg_mortality_percent.values(), index=avg_mortality_percent.keys(), columns=["excess_mortality"]) mean_excess.sort_values(by="excess_mortality") # In[530]: # NOT USED but maybe relevant - Plot shifted data n_countries = len(countries) n_cols = 4 n_rows = int(np.ceil(n_countries / n_cols)) age_string = {"high": "65+", "mid": "15-64", "adult": "15+"} from statsmodels.tools.sm_exceptions import InfeasibleTestError for group, age_df in combo.groupby(by=["age_group", "sex"]): age_group, sex = group if age_group != "adult" or sex != "b": continue fig, axs = plt.subplots(n_rows, n_cols, figsize=(28,28)) i = 0 for country, df in age_df.groupby(by=["country"]): j = i % n_cols k = i // n_cols if df.loc[:, "location"].isnull().iloc[0] or country == "TWN": continue axs[k, j].plot(df.date, df.centered_per, color="black", linewidth=2, label="mortality [%]") axs[k, j].plot(df.date, df.centered_per_low, color="black", linestyle='--') axs[k, j].plot(df.date, df.centered_per_high, color="black", linestyle='--') axs[k, j].set_xlim([np.datetime64("2020-01-01"), np.datetime64("2023-01-01")]) ax2 = axs[k, j].twinx() any_dose = df.weekly_booster_vax ax2.plot(df.date, any_dose, label="# doses", color="red", alpha=0.7) # Calculate granger causality p-values try: if lags[country][0] >= 0 and not np.isnan(lags[country][1]): h = granger_w_zero_lag(df.loc[:, ["centered_per", "weekly_booster_vax"]], lags[country][0] + 1, verbose=False) min_idx = np.argmin(np.array([h_lag[0]["ssr_ftest"][1] for h_lag in h.values()])) opt_lag, p_val = (min_idx, h[min_idx + 1][0]["ssr_ftest"][1]) print(f"{country}: {p_val}") signif = p_val < 0.05 else: signif = False except InfeasibleTestError as e: signif = False star = "*" if signif else "" title = f"{df.loc[:, 'location'].values[0]} - Shift: {lags[country][0]} week(s), R: {lags[country][1]}{star}" axs[k, j].set_title(title, fontsize=20) if lags[country][0] == 0: ax2.plot(df.date, any_dose, label="shifted", color="blue", alpha=0.7) else: ax2.plot(df.date[lags[country][0]:], any_dose[:-lags[country][0]], label="shifted", color="blue", alpha=0.7) if j == n_cols - 1: ax2.set_ylabel("# Doses") if j == 0 and k % n_cols == 0: axs[k, j].set_ylabel(f"Excess mortality, age {age_string[age_group]} [% from baseline]") if k == 0: axs[k, j].legend(loc="upper left") ax2.legend(loc="upper right") i += 1 fig.tight_layout() # plt.savefig(f"../figures/all_countries_mortality_vs_booster_{age_group}_age_{sex}_sex.jpg", dpi=100) # In[ ]: