import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
rawdf = pd.read_csv('imf-dm-export-20240515.xls - PCPIPCH.csv')
rawdf = rawdf.rename(columns={'Inflation rate, average consumer prices (Annual percent change)':'country'})
rawdf.head()
country | 1980 | 1981 | 1982 | 1983 | 1984 | 1985 | 1986 | 1987 | 1988 | ... | 2020 | 2021 | 2022 | 2023 | 2024 | 2025 | 2026 | 2027 | 2028 | 2029 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | Afghanistan | no data | no data | no data | no data | no data | no data | no data | no data | no data | ... | 5.6 | 7.8 | 10.6 | no data | no data | no data | no data | no data | no data | no data |
2 | Albania | no data | no data | no data | no data | no data | no data | no data | no data | no data | ... | 1.6 | 2 | 6.7 | 4.8 | 3.5 | 3 | 3 | 3 | 3 | 3 |
3 | Algeria | 9.7 | 14.6 | 6.6 | 7.8 | 6.3 | 10.4 | 14 | 5.9 | 5.9 | ... | 2.4 | 7.2 | 9.3 | 9.3 | 7.6 | 6.4 | 6.1 | 5.5 | 5.2 | 5 |
4 | Andorra | no data | no data | no data | no data | no data | no data | no data | no data | no data | ... | 0.1 | 1.7 | 6.2 | 5.6 | 4.3 | 2.4 | 2.1 | 1.7 | 1.7 | 1.7 |
5 rows × 51 columns
g7list = ['United States', 'Canada', 'United Kingdom', 'Germany', 'Italy', 'France', 'Japan']
treated_country = 'United States'
control = [country for country in g7list if country != treated_country]
control
['Canada', 'United Kingdom', 'Germany', 'Italy', 'France', 'Japan']
g7df = rawdf[rawdf['country'].isin(g7list)==1].reset_index(drop=True)
g7df = g7df.melt(id_vars=['country'], var_name='year', value_name='inflation_rate')
g7df['year'] = g7df['year'].astype(int)
g7df['inflation_rate'] = g7df['inflation_rate'].astype(float)
g7df = g7df[(g7df['year'] >= 2000) & (g7df['year'] < 2025)].copy()
g7df['treated'] = np.where(g7df['country']==treated_country,1,0)
df = g7df.groupby(['treated','year'])['inflation_rate'].agg('mean').reset_index()
df['trump_years'] = np.where((df['year'] >= 2017) & (df['year'] < 2021),1,0)
df['biden_years'] = np.where((df['year'] >= 2021) & (df['year'] < 2025),1,0)
treated = df[df['treated']==1]
control = df[df['treated']==0]
# Create line plots for treated and untreated groups
plt.plot(treated['year'], treated['inflation_rate'], label='United States', color='blue')
plt.plot(control['year'], control['inflation_rate'], label='Other G7', color='red')
# Add vertical lines at 2016.5 and 2020.5
plt.axvline(x=2016.5, color='gray', linestyle='--', linewidth=0.5)
plt.axvline(x=2020.5, color='gray', linestyle='--', linewidth=0.5)
# Add labels and title
plt.xlabel('Year')
plt.ylabel('Inflation Rate')
plt.title('USA vs G7 Inflation Rate over Time')
# Add legend
plt.legend()
# Show plot
plt.show()
# difference-in-differences: 2009-2016 as pre-period
df = g7df[g7df['year']>=2009].copy()
df['trump_years'] = np.where((df['year']>=2017) & (df['year'] < 2021),1,0)
df['biden_years'] = np.where((df['year']>=2021) & (df['year'] < 2025),1,0)
df['USA_trump'] = df['treated']*df['trump_years']
df['USA_biden'] = df['treated']*df['biden_years']
formula = 'inflation_rate~treated+trump_years+biden_years+USA_trump+USA_biden'
model = smf.ols(formula,data=df).fit()
print(model.summary())
OLS Regression Results ============================================================================== Dep. Variable: inflation_rate R-squared: 0.432 Model: OLS Adj. R-squared: 0.405 Method: Least Squares F-statistic: 16.11 Date: Wed, 24 Jul 2024 Prob (F-statistic): 8.58e-12 Time: 21:39:29 Log-Likelihood: -205.40 No. Observations: 112 AIC: 422.8 Df Residuals: 106 BIC: 439.1 Df Model: 5 Covariance Type: nonrobust =============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------- Intercept 1.2229 0.225 5.443 0.000 0.777 1.668 treated 0.1521 0.594 0.256 0.799 -1.026 1.331 trump_years 0.0229 0.389 0.059 0.953 -0.749 0.794 biden_years 2.9688 0.389 7.629 0.000 2.197 3.740 USA_trump 0.4771 1.030 0.463 0.644 -1.564 2.518 USA_biden 0.5812 1.030 0.565 0.574 -1.460 2.623 ============================================================================== Omnibus: 13.327 Durbin-Watson: 1.305 Prob(Omnibus): 0.001 Jarque-Bera (JB): 16.104 Skew: 0.675 Prob(JB): 0.000318 Kurtosis: 4.276 Cond. No. 9.43 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
# add fixed effects and cluster standard errors
formula = 'inflation_rate~C(country)+trump_years+biden_years+USA_trump+USA_biden-1'
model = smf.ols(formula,data=df).fit(cov_type='cluster', cov_kwds={'groups': df['country']})
print(model.summary())
OLS Regression Results ============================================================================== Dep. Variable: inflation_rate R-squared: 0.526 Model: OLS Adj. R-squared: 0.479 Method: Least Squares F-statistic: nan Date: Wed, 24 Jul 2024 Prob (F-statistic): nan Time: 21:39:29 Log-Likelihood: -195.30 No. Observations: 112 AIC: 412.6 Df Residuals: 101 BIC: 442.5 Df Model: 10 Covariance Type: cluster ============================================================================================== coef std err z P>|z| [0.025 0.975] ---------------------------------------------------------------------------------------------- C(country)[Canada] 1.4396 0.068 21.190 0.000 1.306 1.573 C(country)[France] 1.0958 0.068 16.130 0.000 0.963 1.229 C(country)[Germany] 1.4521 0.068 21.374 0.000 1.319 1.585 C(country)[Italy] 1.2021 0.068 17.694 0.000 1.069 1.335 C(country)[Japan] -0.0417 0.068 -0.613 0.540 -0.175 0.091 C(country)[United Kingdom] 2.1896 0.068 32.230 0.000 2.056 2.323 C(country)[United States] 1.3750 7.06e-16 1.95e+15 0.000 1.375 1.375 trump_years 0.0229 0.134 0.172 0.864 -0.239 0.285 biden_years 2.9687 0.300 9.902 0.000 2.381 3.556 USA_trump 0.4771 0.134 3.574 0.000 0.215 0.739 USA_biden 0.5812 0.300 1.939 0.053 -0.006 1.169 ============================================================================== Omnibus: 12.648 Durbin-Watson: 0.982 Prob(Omnibus): 0.002 Jarque-Bera (JB): 14.615 Skew: 0.670 Prob(JB): 0.000670 Kurtosis: 4.155 Cond. No. 5.08 ============================================================================== Notes: [1] Standard Errors are robust to cluster correlation (cluster)
print(model.f_test('USA_biden = USA_trump'))
<F test: F=0.07663331389258181, p=0.7912022787430979, df_denom=6, df_num=1>
from scipy.stats import t
hypothesis_matrix = np.zeros((1, len(model.params)))
hypothesis_matrix[0, model.params.index.get_loc('USA_trump')] = -1 # -b10
hypothesis_matrix[0, model.params.index.get_loc('USA_biden')] = 1 # +b11
hypothesis_value = [0] # Testing b11 - b10 > 0
# Perform the Wald test
wald_test = model.wald_test((hypothesis_matrix, hypothesis_value), scalar=True)
# Extract the t-value and p-value for one-sided test
t_value = wald_test.statistic
print('One-sided p-value:', 1 - t.cdf(t_value, df=model.df_resid))
One-sided p-value: 0.46953342781107865