import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf


rawdf = pd.read_csv('imf-dm-export-20240515.xls - PCPIPCH.csv')
rawdf = rawdf.rename(columns={'Inflation rate, average consumer prices (Annual percent change)':'country'})
rawdf.head()


g7list = ['United States', 'Canada', 'United Kingdom', 'Germany', 'Italy', 'France', 'Japan']
treated_country = 'United States'
control = [country for country in g7list if country != treated_country]
control

['Canada', 'United Kingdom', 'Germany', 'Italy', 'France', 'Japan']


g7df = rawdf[rawdf['country'].isin(g7list)==1].reset_index(drop=True)
g7df = g7df.melt(id_vars=['country'], var_name='year', value_name='inflation_rate')
g7df['year'] = g7df['year'].astype(int)
g7df['inflation_rate'] = g7df['inflation_rate'].astype(float)

g7df = g7df[(g7df['year'] >= 2000) & (g7df['year'] < 2025)].copy()

g7df['treated'] = np.where(g7df['country']==treated_country,1,0)


df = g7df.groupby(['treated','year'])['inflation_rate'].agg('mean').reset_index()
df['trump_years'] = np.where((df['year'] >= 2017) & (df['year'] < 2021),1,0)
df['biden_years'] = np.where((df['year'] >= 2021) & (df['year'] < 2025),1,0)

treated = df[df['treated']==1]
control = df[df['treated']==0]

# Create line plots for treated and untreated groups
plt.plot(treated['year'], treated['inflation_rate'], label='United States', color='blue')
plt.plot(control['year'], control['inflation_rate'], label='Other G7', color='red')

# Add vertical lines at 2016.5 and 2020.5
plt.axvline(x=2016.5, color='gray', linestyle='--', linewidth=0.5)
plt.axvline(x=2020.5, color='gray', linestyle='--', linewidth=0.5)

# Add labels and title
plt.xlabel('Year')
plt.ylabel('Inflation Rate')
plt.title('USA vs G7 Inflation Rate over Time')

# Add legend
plt.legend()

# Show plot
plt.show()


# difference-in-differences: 2009-2016 as pre-period 
df = g7df[g7df['year']>=2009].copy()
df['trump_years'] = np.where((df['year']>=2017) & (df['year'] < 2021),1,0)
df['biden_years'] = np.where((df['year']>=2021) & (df['year'] < 2025),1,0)
df['USA_trump'] = df['treated']*df['trump_years']
df['USA_biden'] = df['treated']*df['biden_years']

formula = 'inflation_rate~treated+trump_years+biden_years+USA_trump+USA_biden'
model = smf.ols(formula,data=df).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:         inflation_rate   R-squared:                       0.432
Model:                            OLS   Adj. R-squared:                  0.405
Method:                 Least Squares   F-statistic:                     16.11
Date:                Wed, 24 Jul 2024   Prob (F-statistic):           8.58e-12
Time:                        21:39:29   Log-Likelihood:                -205.40
No. Observations:                 112   AIC:                             422.8
Df Residuals:                     106   BIC:                             439.1
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Intercept       1.2229      0.225      5.443      0.000       0.777       1.668
treated         0.1521      0.594      0.256      0.799      -1.026       1.331
trump_years     0.0229      0.389      0.059      0.953      -0.749       0.794
biden_years     2.9688      0.389      7.629      0.000       2.197       3.740
USA_trump       0.4771      1.030      0.463      0.644      -1.564       2.518
USA_biden       0.5812      1.030      0.565      0.574      -1.460       2.623
==============================================================================
Omnibus:                       13.327   Durbin-Watson:                   1.305
Prob(Omnibus):                  0.001   Jarque-Bera (JB):               16.104
Skew:                           0.675   Prob(JB):                     0.000318
Kurtosis:                       4.276   Cond. No.                         9.43
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


# add fixed effects and cluster standard errors
formula = 'inflation_rate~C(country)+trump_years+biden_years+USA_trump+USA_biden-1'
model = smf.ols(formula,data=df).fit(cov_type='cluster', cov_kwds={'groups': df['country']})
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:         inflation_rate   R-squared:                       0.526
Model:                            OLS   Adj. R-squared:                  0.479
Method:                 Least Squares   F-statistic:                       nan
Date:                Wed, 24 Jul 2024   Prob (F-statistic):                nan
Time:                        21:39:29   Log-Likelihood:                -195.30
No. Observations:                 112   AIC:                             412.6
Df Residuals:                     101   BIC:                             442.5
Df Model:                          10                                         
Covariance Type:              cluster                                         
==============================================================================================
                                 coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------------
C(country)[Canada]             1.4396      0.068     21.190      0.000       1.306       1.573
C(country)[France]             1.0958      0.068     16.130      0.000       0.963       1.229
C(country)[Germany]            1.4521      0.068     21.374      0.000       1.319       1.585
C(country)[Italy]              1.2021      0.068     17.694      0.000       1.069       1.335
C(country)[Japan]             -0.0417      0.068     -0.613      0.540      -0.175       0.091
C(country)[United Kingdom]     2.1896      0.068     32.230      0.000       2.056       2.323
C(country)[United States]      1.3750   7.06e-16   1.95e+15      0.000       1.375       1.375
trump_years                    0.0229      0.134      0.172      0.864      -0.239       0.285
biden_years                    2.9687      0.300      9.902      0.000       2.381       3.556
USA_trump                      0.4771      0.134      3.574      0.000       0.215       0.739
USA_biden                      0.5812      0.300      1.939      0.053      -0.006       1.169
==============================================================================
Omnibus:                       12.648   Durbin-Watson:                   0.982
Prob(Omnibus):                  0.002   Jarque-Bera (JB):               14.615
Skew:                           0.670   Prob(JB):                     0.000670
Kurtosis:                       4.155   Cond. No.                         5.08
==============================================================================

Notes:
[1] Standard Errors are robust to cluster correlation (cluster)


print(model.f_test('USA_biden = USA_trump'))

<F test: F=0.07663331389258181, p=0.7912022787430979, df_denom=6, df_num=1>


from scipy.stats import t

hypothesis_matrix = np.zeros((1, len(model.params)))
hypothesis_matrix[0, model.params.index.get_loc('USA_trump')] = -1  # -b10
hypothesis_matrix[0, model.params.index.get_loc('USA_biden')] = 1   # +b11
hypothesis_value = [0]  # Testing b11 - b10 > 0

# Perform the Wald test
wald_test = model.wald_test((hypothesis_matrix, hypothesis_value), scalar=True)

# Extract the t-value and p-value for one-sided test
t_value = wald_test.statistic
print('One-sided p-value:', 1 - t.cdf(t_value, df=model.df_resid))

One-sided p-value: 0.46953342781107865

	country	1980	1981	1982	1983	1984	1985	1986	1987	1988	...	2020	2021	2022	2023	2024	2025	2026	2027	2028	2029
0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	Afghanistan	no data	no data	no data	no data	no data	no data	no data	no data	no data	...	5.6	7.8	10.6	no data	no data	no data	no data	no data	no data	no data
2	Albania	no data	no data	no data	no data	no data	no data	no data	no data	no data	...	1.6	2	6.7	4.8	3.5	3	3	3	3	3
3	Algeria	9.7	14.6	6.6	7.8	6.3	10.4	14	5.9	5.9	...	2.4	7.2	9.3	9.3	7.6	6.4	6.1	5.5	5.2	5
4	Andorra	no data	no data	no data	no data	no data	no data	no data	no data	no data	...	0.1	1.7	6.2	5.6	4.3	2.4	2.1	1.7	1.7	1.7