#!/usr/bin/env python # coding: utf-8 # # Method of Simulated Moments (MSM) for Structural Estimation # ### Steps of MSM estimation # 1. Load empirical data # 2. Define a function to calculate empirical moments from the data # 3. Calculate the covariance matrix of the empirical moments (for the weighting matrix) # 4. Define a `HARK` agent type with the model parameters to be estimated # 5. Define a function to simulate the model and calculate the simulated moments # 6. Estimate the model parameters by minimizing the distance between the empirical and simulated moments # In[1]: from copy import copy import estimagic as em import matplotlib.pyplot as plt import numpy as np import pandas as pd from statsmodels.stats.weightstats import DescrStatsW from HARK.Calibration.Income.IncomeTools import ( Cagetti_income, parse_income_spec, parse_time_params, ) from HARK.Calibration.life_tables.us_ssa.SSATools import parse_ssa_life_table from HARK.Calibration.SCF.WealthIncomeDist.SCFDistTools import ( get_scf_distr_stats, income_wealth_dists_from_scf, ) from HARK.ConsumptionSaving.ConsIndShockModel import ( IndShockConsumerType, init_lifecycle, ) from HARK.estimation import estimate_msm from HARK.utilities import plot_funcs # ## 1. Load empirical data # In[2]: scf_data = get_scf_distr_stats() scf_data["Wealth"] = np.exp(scf_data["lnWealth.mean"]) scf_data = scf_data[scf_data["Educ"] == "College"] scf_data = scf_data[scf_data["YEAR"] != "All"] scf_data = scf_data[scf_data["Age_grp"] != "All"] scf_data = scf_data[~scf_data["Age_grp"].isin(["(15,20]", "(20,25]"])] scf_data # ## 2. Calculate Moments # In[3]: indices = scf_data["Age_grp"].unique() def calculate_weighted_median(data): stats = DescrStatsW(data["Wealth"], weights=data["w.obs"]) return stats.quantile(0.5, return_pandas=False)[0] def calculate_moments(data): medians = data.groupby(["Age_grp"]).apply( calculate_weighted_median, include_groups=False, ) return medians.reindex(indices, fill_value=0.0) # In[4]: empirical_moments = calculate_moments(scf_data) # In[5]: empirical_moments.plot() # ## 3. Calculate the covariance matrix of empirical moments # In[6]: moments_cov = em.get_moments_cov( scf_data, calculate_moments, bootstrap_kwargs={"n_draws": 5_000, "seed": 11323, "error_handling": "continue"}, ) moments_cov # ## 4. Define an agent type to simulate data # In[16]: birth_age = 25 death_age = 100 adjust_infl_to = 1992 income_calib = Cagetti_income education = "College" # Income specification income_params = parse_income_spec( age_min=birth_age, age_max=death_age, adjust_infl_to=adjust_infl_to, **income_calib[education], SabelhausSong=True, ) # Initial distribution of wealth and permanent income dist_params = income_wealth_dists_from_scf( base_year=adjust_infl_to, age=birth_age, education=education, wave=1995, ) # We need survival probabilities only up to death_age-1, because survival # probability at death_age is 1. liv_prb = parse_ssa_life_table( female=True, cross_sec=True, year=2004, min_age=birth_age, max_age=death_age - 1, ) # Parameters related to the number of periods implied by the calibration time_params = parse_time_params(age_birth=birth_age, age_death=death_age) # Update all the new parameters params = copy(init_lifecycle) params.update(time_params) params.update(dist_params) params.update(income_params) params["LivPrb"] = liv_prb params["AgentCount"] = 10_000 params["T_sim"] = 200 params["track_vars"] = ["aNrm", "bNrm", "cNrm", "pLvl", "t_age", "mNrm"] params["PermGroFacAgg"] = 1.0 params["Rfree"] = [params["Rfree"][0]] * params["T_cycle"] # In[17]: LifeCycleAgent = IndShockConsumerType(**params) LifeCycleAgent.solve() # In[18]: LifeCycleAgent.unpack("cFunc") # Plot the consumption functions print("Consumption functions") plot_funcs(LifeCycleAgent.cFunc, 0, 5) # In[19]: LifeCycleAgent.update() # Run the simulations LifeCycleAgent.initialize_sim() LifeCycleAgent.simulate() # Note: simulate() does not return history; it updates LifeCycleAgent.history history = LifeCycleAgent.history # In[20]: raw_data = { "Age": history["t_age"].flatten() + birth_age - 1, "pIncome": history["pLvl"].flatten(), "nrmM": history["mNrm"].flatten(), "nrmC": history["cNrm"].flatten(), } sim_data = pd.DataFrame(raw_data) sim_data["Cons"] = sim_data.nrmC * sim_data.pIncome sim_data["M"] = sim_data.nrmM * sim_data.pIncome # Find the mean of each variable at every age AgeMeans = sim_data.groupby(["Age"]).median().reset_index() plt.figure() plt.plot(AgeMeans.Age, AgeMeans.pIncome, label="Permanent Income") plt.plot(AgeMeans.Age, AgeMeans.M, label="Market resources") plt.plot(AgeMeans.Age, AgeMeans.Cons, label="Consumption") plt.legend() plt.xlabel("Age") plt.ylabel(f"Thousands of {adjust_infl_to} USD") plt.title("Variable Medians Conditional on Survival") plt.grid() # In[21]: age_groups = [ list(range(start, start + 5)) for start in range(birth_age + 1, 95 + 1, 5) ] # generate labels as (25,30], (30,35], ... age_labels = [f"({group[0] - 1},{group[-1]}]" for group in age_groups] # Generate mappings between the real ages in the groups and the indices of simulated data age_mapping = dict(zip(age_labels, map(np.array, age_groups))) # In[22]: age_mapping # ## 5. Define a function to calculate simulated moments # In[23]: def simulate_moments(params, agent=None): agent.assign_parameters(**params) agent.update() agent.solve() agent.initialize_sim() agent.simulate() history = agent.history raw_data = { "age": history["t_age"].flatten() + birth_age - 1, "b_nrm": history["bNrm"].flatten(), "p_lvl": history["pLvl"].flatten(), } sim_data = pd.DataFrame(raw_data) sim_data["Wealth"] = sim_data.b_nrm * sim_data.p_lvl sim_data["Age_grp"] = pd.cut( sim_data.age, bins=range(birth_age + 1, 97, 5), labels=age_labels, right=False, ) sim_data = sim_data.dropna() return sim_data.groupby("Age_grp", observed=False)["Wealth"].median() # In[24]: simulate_moments({}, agent=LifeCycleAgent).plot() empirical_moments.plot() # ## 6. Estimate the model parameters # In[25]: init_params = {"CRRA": 3.009252, "DiscFac": 1.006925} lower_bounds = {"CRRA": 1.0, "DiscFac": 0.9} upper_bounds = {"CRRA": 10.0, "DiscFac": 1.1} res = estimate_msm( LifeCycleAgent, init_params, empirical_moments, moments_cov, simulate_moments, optimize_options={ "algorithm": "scipy_lbfgsb", "error_handling": "continue", }, estimagic_options={ "lower_bounds": lower_bounds, "upper_bounds": upper_bounds, }, ) # In[26]: pd.concat(res.summary()) # In[27]: res.params # In[28]: pd.DataFrame(res.cov(method="robust")) # In[29]: res.se() # In[30]: LifeCycleAgent.assign_parameters(**res.params) LifeCycleAgent.update() LifeCycleAgent.solve() LifeCycleAgent.initialize_sim() LifeCycleAgent.simulate() history = LifeCycleAgent.history raw_data = { "Age": history["t_age"].flatten() + birth_age - 1, "pIncome": history["pLvl"].flatten(), "nrmM": history["mNrm"].flatten(), "nrmC": history["cNrm"].flatten(), } sim_data = pd.DataFrame(raw_data) sim_data["Cons"] = sim_data.nrmC * sim_data.pIncome sim_data["M"] = sim_data.nrmM * sim_data.pIncome # Find the mean of each variable at every age AgeMeans = sim_data.groupby(["Age"]).median().reset_index() plt.figure() plt.plot(AgeMeans.Age, AgeMeans.pIncome, label="Permanent Income") plt.plot(AgeMeans.Age, AgeMeans.M, label="Market resources") plt.plot(AgeMeans.Age, AgeMeans.Cons, label="Consumption") plt.legend() plt.xlabel("Age") plt.ylabel(f"Thousands of {adjust_infl_to} USD") plt.title("Variable Medians Conditional on Survival") plt.grid() # In[31]: simulate_moments(res.params, agent=LifeCycleAgent).plot() empirical_moments.plot() # In[ ]: