Method of Simulated Moments (MSM) for Structural Estimation¶

Steps of MSM estimation¶

Load empirical data
Define a function to calculate empirical moments from the data
Calculate the covariance matrix of the empirical moments (for the weighting matrix)
Define a HARK agent type with the model parameters to be estimated
Define a function to simulate the model and calculate the simulated moments
Estimate the model parameters by minimizing the distance between the empirical and simulated moments

In [32]:

from copy import copy

import estimagic as em
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsmodels.stats.weightstats import DescrStatsW

from HARK.Calibration.Income.IncomeTools import (
    Cagetti_income,
    parse_income_spec,
    parse_time_params,
)
from HARK.Calibration.life_tables.us_ssa.SSATools import parse_ssa_life_table
from HARK.Calibration.SCF.WealthIncomeDist.SCFDistTools import (
    get_scf_distr_stats,
    income_wealth_dists_from_scf,
)
from HARK.ConsumptionSaving.ConsIndShockModel import (
    IndShockConsumerType,
    init_lifecycle,
)
from HARK.estimation import estimate_msm
from HARK.utilities import plot_funcs

1. Load empirical data¶

In [33]:

scf_data = get_scf_distr_stats()
scf_data["Wealth"] = np.exp(scf_data["lnWealth.mean"])
scf_data = scf_data[scf_data["Educ"] == "College"]
scf_data = scf_data[scf_data["YEAR"] != "All"]
scf_data = scf_data[scf_data["Age_grp"] != "All"]
scf_data = scf_data[~scf_data["Age_grp"].isin(["(15,20]", "(20,25]"])]

scf_data

Out[33]:

	Educ	YEAR	Age_grp	w.obs	obs	lnWealth.mean	lnWealth.sd	lnNrmWealth.mean	lnNrmWealth.sd	lnPermIncome.mean	lnPermIncome.sd	BASE_YR	Wealth
290	College	1995	(25,30]	2.765032e+06	478.0	4.126304	1.270230	-0.091128	1.133600	4.217433	0.594117	2019	61.948554
291	College	1995	(30,35]	2.819045e+06	553.0	4.485061	1.226898	0.142599	1.091322	4.342462	0.651507	2019	88.682362
292	College	1995	(35,40]	2.516753e+06	666.0	4.817262	1.522302	0.337608	1.249328	4.479653	0.774831	2019	123.626079
293	College	1995	(40,45]	3.582522e+06	1000.0	5.234296	1.587167	0.600269	1.340885	4.634028	0.670276	2019	187.597079
294	College	1995	(45,50]	3.261238e+06	1114.0	5.712218	1.553071	0.890605	1.197825	4.821613	0.724982	2019	302.541454
...	...	...	...	...	...	...	...	...	...	...	...	...	...
427	College	2019	(70,75]	3.276375e+06	1058.0	6.618749	1.407732	2.126120	1.112098	4.492629	0.957018	2019	749.007589
428	College	2019	(75,80]	1.836669e+06	543.0	6.392050	1.659717	2.061754	1.144179	4.330295	0.901808	2019	597.079225
429	College	2019	(80,85]	1.195969e+06	352.0	6.233572	1.491591	1.943915	1.059349	4.289658	0.820112	2019	509.572465
430	College	2019	(85,90]	6.284131e+05	129.0	6.010967	1.363456	1.792840	0.990714	4.218127	0.744886	2019	407.877637
431	College	2019	(90,95]	2.557529e+05	80.0	6.626294	1.356511	2.464512	1.090929	4.161781	0.788086	2019	754.679931

126 rows × 13 columns

2. Calculate Moments¶

In [34]:

indices = scf_data["Age_grp"].unique()


def calculate_weighted_median(data):
    stats = DescrStatsW(data["Wealth"], weights=data["w.obs"])
    return stats.quantile(0.5, return_pandas=False)[0]


def calculate_moments(data):
    medians = data.groupby(["Age_grp"]).apply(
        calculate_weighted_median,
        include_groups=False,
    )
    return medians.reindex(indices, fill_value=0.0)

In [35]:

empirical_moments = calculate_moments(scf_data)

In [36]:

empirical_moments.plot()

Out[36]:

<Axes: xlabel='Age_grp'>

3. Calculate the covariance matrix of empirical moments¶

In [37]:

moments_cov = em.get_moments_cov(
    scf_data,
    calculate_moments,
    bootstrap_kwargs={"n_draws": 5_000, "seed": 11323, "error_handling": "continue"},
)

moments_cov

Out[37]:

Age_grp	(25,30]	(30,35]	(35,40]	(40,45]	(45,50]	(50,55]	(55,60]	(60,65]	(65,70]	(70,75]	(75,80]	(80,85]	(85,90]	(90,95]
Age_grp
(25,30]	78.737485	-0.371681	-4.040328	-9.422402	13.093843	5.090706	1.389485	-5.709443	10.695572	6.195027	1.978852	-5.239811	-52.026812	13.828528
(30,35]	-0.371681	111.941544	7.594072	2.187824	16.141333	-1.336970	5.330875	-5.597500	-0.134589	8.196271	19.774747	26.793106	37.315401	-42.416020
(35,40]	-4.040328	7.594072	577.131713	-2.877333	37.316779	-0.267094	-4.269315	-10.520714	-56.049443	8.061753	-142.578652	5.639516	-34.492753	14.923283
(40,45]	-9.422402	2.187824	-2.877333	1345.165864	9.970902	-24.375004	-12.797304	7.520201	-20.394739	26.082335	169.630000	12.434961	9.041112	39.198754
(45,50]	13.093843	16.141333	37.316779	9.970902	2213.339739	-13.989562	-1.838498	31.312340	100.712927	-17.609319	17.421268	45.578331	52.952573	89.921968
(50,55]	5.090706	-1.336970	-0.267094	-24.375004	-13.989562	1527.340485	14.733277	56.594261	32.309802	38.808547	-100.367914	-48.581634	42.225114	-7.900472
(55,60]	1.389485	5.330875	-4.269315	-12.797304	-1.838498	14.733277	1708.687643	37.805573	-27.926395	-9.556498	75.322901	84.918302	-39.100823	-11.622967
(60,65]	-5.709443	-5.597500	-10.520714	7.520201	31.312340	56.594261	37.805573	3267.096856	-101.175101	1.004139	-27.080202	20.062823	118.732200	-143.400780
(65,70]	10.695572	-0.134589	-56.049443	-20.394739	100.712927	32.309802	-27.926395	-101.175101	13000.202130	-16.414305	-310.214803	95.090219	838.940789	-446.093482
(70,75]	6.195027	8.196271	8.061753	26.082335	-17.609319	38.808547	-9.556498	1.004139	-16.414305	4347.722505	1.784658	-13.300977	-22.299751	-109.678104
(75,80]	1.978852	19.774747	-142.578652	169.630000	17.421268	-100.367914	75.322901	-27.080202	-310.214803	1.784658	19767.327088	88.919075	-158.097593	388.548889
(80,85]	-5.239811	26.793106	5.639516	12.434961	45.578331	-48.581634	84.918302	20.062823	95.090219	-13.300977	88.919075	4582.977726	192.636538	-149.436556
(85,90]	-52.026812	37.315401	-34.492753	9.041112	52.952573	42.225114	-39.100823	118.732200	838.940789	-22.299751	-158.097593	192.636538	49368.887735	-325.915160
(90,95]	13.828528	-42.416020	14.923283	39.198754	89.921968	-7.900472	-11.622967	-143.400780	-446.093482	-109.678104	388.548889	-149.436556	-325.915160	45577.912602

4. Define an agent type to simulate data¶

In [38]:

birth_age = 25
death_age = 100
adjust_infl_to = 1992
income_calib = Cagetti_income
education = "College"

# Income specification
income_params = parse_income_spec(
    age_min=birth_age,
    age_max=death_age,
    adjust_infl_to=adjust_infl_to,
    **income_calib[education],
    SabelhausSong=True,
)

# Initial distribution of wealth and permanent income
dist_params = income_wealth_dists_from_scf(
    base_year=adjust_infl_to,
    age=birth_age,
    education=education,
    wave=1995,
)

# We need survival probabilities only up to death_age-1, because survival
# probability at death_age is 1.
liv_prb = parse_ssa_life_table(
    female=True,
    cross_sec=True,
    year=2004,
    min_age=birth_age,
    max_age=death_age - 1,
)

# Parameters related to the number of periods implied by the calibration
time_params = parse_time_params(age_birth=birth_age, age_death=death_age)

# Update all the new parameters
params = copy(init_lifecycle)
params.update(time_params)
params.update(dist_params)
params.update(income_params)
params["LivPrb"] = liv_prb
params["AgentCount"] = 10_000
params["T_sim"] = 200
params["track_vars"] = ["aNrm", "bNrm", "cNrm", "pLvl", "t_age", "mNrm"]
params["PermGroFacAgg"] = 1.0

In [39]:

LifeCycleAgent = IndShockConsumerType(**params)
LifeCycleAgent.solve()

In [40]:

LifeCycleAgent.unpack("cFunc")
# Plot the consumption functions
print("Consumption functions")
plot_funcs(LifeCycleAgent.cFunc, 0, 5)

Consumption functions

In [41]:

LifeCycleAgent.update()

# Run the simulations
LifeCycleAgent.initialize_sim()
history = LifeCycleAgent.simulate()

In [42]:

raw_data = {
    "Age": history["t_age"].flatten() + birth_age - 1,
    "pIncome": history["pLvl"].flatten(),
    "nrmM": history["mNrm"].flatten(),
    "nrmC": history["cNrm"].flatten(),
}

sim_data = pd.DataFrame(raw_data)
sim_data["Cons"] = sim_data.nrmC * sim_data.pIncome
sim_data["M"] = sim_data.nrmM * sim_data.pIncome

# Find the mean of each variable at every age
AgeMeans = sim_data.groupby(["Age"]).median().reset_index()

plt.figure()
plt.plot(AgeMeans.Age, AgeMeans.pIncome, label="Permanent Income")
plt.plot(AgeMeans.Age, AgeMeans.M, label="Market resources")
plt.plot(AgeMeans.Age, AgeMeans.Cons, label="Consumption")
plt.legend()
plt.xlabel("Age")
plt.ylabel(f"Thousands of {adjust_infl_to} USD")
plt.title("Variable Medians Conditional on Survival")
plt.grid()

In [43]:

age_groups = [
    list(range(start, start + 5)) for start in range(birth_age + 1, 95 + 1, 5)
]

# generate labels as (25,30], (30,35], ...
age_labels = [f"({group[0]-1},{group[-1]}]" for group in age_groups]

# Generate mappings between the real ages in the groups and the indices of simulated data
age_mapping = dict(zip(age_labels, map(np.array, age_groups)))

In [44]:

age_mapping

Out[44]:

{'(25,30]': array([26, 27, 28, 29, 30]),
 '(30,35]': array([31, 32, 33, 34, 35]),
 '(35,40]': array([36, 37, 38, 39, 40]),
 '(40,45]': array([41, 42, 43, 44, 45]),
 '(45,50]': array([46, 47, 48, 49, 50]),
 '(50,55]': array([51, 52, 53, 54, 55]),
 '(55,60]': array([56, 57, 58, 59, 60]),
 '(60,65]': array([61, 62, 63, 64, 65]),
 '(65,70]': array([66, 67, 68, 69, 70]),
 '(70,75]': array([71, 72, 73, 74, 75]),
 '(75,80]': array([76, 77, 78, 79, 80]),
 '(80,85]': array([81, 82, 83, 84, 85]),
 '(85,90]': array([86, 87, 88, 89, 90]),
 '(90,95]': array([91, 92, 93, 94, 95])}

5. Define a function to calculate simulated moments¶

In [45]:

def simulate_moments(params, agent=None):
    agent.assign_parameters(**params)
    agent.update()

    agent.solve()

    agent.initialize_sim()
    history = agent.simulate()

    raw_data = {
        "age": history["t_age"].flatten() + birth_age - 1,
        "b_nrm": history["bNrm"].flatten(),
        "p_lvl": history["pLvl"].flatten(),
    }

    sim_data = pd.DataFrame(raw_data)
    sim_data["Wealth"] = sim_data.b_nrm * sim_data.p_lvl

    sim_data["Age_grp"] = pd.cut(
        sim_data.age,
        bins=range(birth_age + 1, 97, 5),
        labels=age_labels,
        right=False,
    )

    sim_data = sim_data.dropna()

    return sim_data.groupby("Age_grp", observed=False)["Wealth"].median()

In [46]:

simulate_moments({}, agent=LifeCycleAgent).plot()
empirical_moments.plot()

Out[46]:

<Axes: xlabel='Age_grp'>

6. Estimate the model parameters¶

In [47]:

init_params = {"CRRA": 3.009252, "DiscFac": 1.006925}
lower_bounds = {"CRRA": 1.0, "DiscFac": 0.9}
upper_bounds = {"CRRA": 10.0, "DiscFac": 1.1}

res = estimate_msm(
    LifeCycleAgent,
    init_params,
    empirical_moments,
    moments_cov,
    simulate_moments,
    optimize_options={
        "algorithm": "scipy_lbfgsb",
        "error_handling": "continue",
    },
    estimagic_options={
        "lower_bounds": lower_bounds,
        "upper_bounds": upper_bounds,
    },
)

/home/alujan/micromamba/envs/hark-dev/lib/python3.12/site-packages/optimagic/deprecations.py:247: FutureWarning:

Specifying bounds via the arguments lower_bound, and upper_bound is deprecated and will be removed in optimagic version 0.6.0 and later. Please use the `bounds` argument instead.

In [48]:

pd.concat(res.summary())

Out[48]:

		value	standard_error	ci_lower	ci_upper	p_value	free	stars
CRRA	0	3.009272	1.607353	-0.141083	6.159627	0.06118	True	*
DiscFac	0	1.006919	0.002955	1.001128	1.012710	0.00000	True	***

In [49]:

res.params

Out[49]:

{'CRRA': 3.0092719466298723, 'DiscFac': 1.0069189693502238}

In [50]:

pd.DataFrame(res.cov(method="robust"))

Out[50]:

	CRRA	DiscFac
CRRA	2.583585182547849	-0.0008876283874133502
DiscFac	-0.0008876283874133521	8.730951098326127e-06

In [51]:

res.se()

Out[51]:

{'CRRA': 1.607353471563691, 'DiscFac': 0.0029548182851617333}

In [52]:

LifeCycleAgent.assign_parameters(**res.params)
LifeCycleAgent.update()
LifeCycleAgent.solve()
LifeCycleAgent.initialize_sim()
history = LifeCycleAgent.simulate()

raw_data = {
    "Age": history["t_age"].flatten() + birth_age - 1,
    "pIncome": history["pLvl"].flatten(),
    "nrmM": history["mNrm"].flatten(),
    "nrmC": history["cNrm"].flatten(),
}

sim_data = pd.DataFrame(raw_data)
sim_data["Cons"] = sim_data.nrmC * sim_data.pIncome
sim_data["M"] = sim_data.nrmM * sim_data.pIncome

# Find the mean of each variable at every age
AgeMeans = sim_data.groupby(["Age"]).median().reset_index()

plt.figure()
plt.plot(AgeMeans.Age, AgeMeans.pIncome, label="Permanent Income")
plt.plot(AgeMeans.Age, AgeMeans.M, label="Market resources")
plt.plot(AgeMeans.Age, AgeMeans.Cons, label="Consumption")
plt.legend()
plt.xlabel("Age")
plt.ylabel(f"Thousands of {adjust_infl_to} USD")
plt.title("Variable Medians Conditional on Survival")
plt.grid()

In [53]:

simulate_moments(res.params, agent=LifeCycleAgent).plot()
empirical_moments.plot()

Out[53]:

<Axes: xlabel='Age_grp'>

In [ ]: