Notebook

In [ ]:

%pylab inline
import matplotlib.pyplot as plt

#import matplotlib
#matplotlib.style.use('ggplot') ## gnuplot style

import numpy as np
import pandas as pd

import warnings as wn
import partial_corr # found at 
                    # https://gist.github.com/fabianp/9396204419c7b638d38f


#size
width, height=12,8
plt.rcParams['figure.figsize'] = width, height #aggiunta pt

#precision in tables
pd.set_option('precision',2)
#rows in tables
pd.set_option('display.max_rows', 1000)

# this is used for regression below; install (via pip) statsmodels and patsy
import statsmodels.formula.api as sm

In [ ]:

# to chose a csv file in the current folder
import os
filesHere=os.listdir("./")
selected=[]
for i in range(len(filesHere)):
    if filesHere[i].find('_ts.csv')>0: selected.append(filesHere[i])
selected.sort()
for i in range(len(selected)):
    print (i, selected[i])

In [ ]:

num=int(input("Choose a file via its number (>=0;<="+str(len(selected)-1)+" "))

try:
    modPars_df = pd.read_csv(selected[num][:17]+'_modPars.csv')
    modPars_df.index += 1 
except BaseException:
    modPars_df = pd.DataFrame([["no changes in parameters"]],columns=["  "])
    modPars_df.index += 1

firms=False
    
try:
    firms_df = pd.read_csv(selected[num][:17]+'_firms.csv')
    modPars_df.index += 1 
    firms=True
except BaseException:
    pass


par_df = pd.read_csv(selected[num][:17]+'_par.csv')
par_df.index += 1 

nameFilePar=selected[num][:17]+'_par.csv'

ts_df = pd.read_csv(selected[num])
#set index to start from 1, data are collected at the end of each period
ts_df.index += 1 

str_df = pd.read_csv(selected[num][:17]+'_str.csv')
#leave index to start from 0, data are collected at the beginning of each period

select rows (time steps) in the database¶

activating the cell below before running the whole program

[a:b] => from a+1 to b

[:b]  => fron init to b
[a:]  => fron a+1 to end

In [ ]:

#ts_df =ts_df [0:45]
#str_df=str_df[0:45]

*Parameters*

In [ ]:

par_df.astype(str,errors='ignore')

*Modified parameters*

In [ ]:

modPars_df.astype(str,errors='ignore')

*Time series, data collected at the end of each period*

In [ ]:

if len(ts_df.columns) == 6:
    ts_df.columns = \
    ['unempl.','totalProfit','totalProd.','plannedP.','price','wage']
    # to have shorter names
if len(ts_df.columns) == 8:
    ts_df.columns = \
    ['unempl.','totalProfit','totalProd.','plannedP.', 'cQ','hPSd','price','wage']
    # to have shorter names

In [ ]:

ts_df

In [ ]:

ts_df.describe()

In [ ]:

ts_df.corr(method="pearson").style.format("{:.2}")

The origin of the partial_corr source is https://gist.github.com/fabianp/9396204419c7b638d38f

At http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression we have the explanation of the need of augmenting the data matrix with a 1 to allow for a constant term in the regression.

In [ ]:

wn.filterwarnings(action="ignore") # to eliminate a warning about 
                                   #LAPACK lib

np.set_printoptions(precision=2,suppress=True)
ts=ts_df.values
ts_int = np.hstack((np.ones((ts.shape[0],1)), ts))

out1=partial_corr.partial_corr(ts_int)[1:, 1:]
out1

In [ ]:

ts=ts_df.drop(columns="plannedP.").values
ts_int = np.hstack((np.ones((ts.shape[0],1)), ts))

out2=partial_corr.partial_corr(ts_int)[1:, 1:]
out2

In [ ]:

ts=ts_df.drop(columns="totalProd.").values
ts_int = np.hstack((np.ones((ts.shape[0],1)), ts))

out3=partial_corr.partial_corr(ts_int)[1:, 1:]
out3

In [ ]:

ts2_df=ts_df
if len(ts_df.columns) == 6:
    ts2_df.columns = \
    ['unempl','totalProfit','totalProd','plannedP','price','wage']
if len(ts_df.columns) == 8:
    ts2_df.columns = \
    ['unempl','totalProfit','totalProd','plannedP','cQ','hPSd','price','wage']

In [ ]:

result = sm.ols(formula="totalProfit ~ price + wage + totalProd + unempl", \
                data=ts2_df).fit()

print (result.summary())

*Structural infos, data collected at the beginning of each period*

In [ ]:

str_df

levels of gray https://en.wikipedia.org/wiki/Shades_of_gray

In [ ]:

myPlot = ts_df.plot(figsize=(11,8),secondary_y=['hPriceSd', 'price','wage'],marker="*",
color=["OrangeRed","LawnGreen","Blue","Violet","lightblue","Pink","Gray","Brown"])
myPlot.set_ylabel('unemployed, totalProfit, totalProduction, plannedProduction, consumptionQ')
myPlot.right_ax.set_ylabel('hPriceSd, price, wage')
myPlot.legend(loc='upper left') #, bbox_to_anchor=(-0.35, 0.5)
myPlot.axes.right_ax.legend(loc='lower right') #, bbox_to_anchor=(1.1, 0.5)

In [ ]:

myPlot = ts_df.plot(figsize=(11,8),secondary_y=['hPriceSd', 'price','wage'],marker="",
color=["lightgray","Black","Black","Black","Gray","lightgray","lightgray","lightgray"],
style=['-', '--', '-.', ':','-', '--', '-.'],
linewidth=1.)
myPlot.set_ylabel('unemployed, totalProfit, totalProduction, plannedProduction, consumptionQ')
myPlot.right_ax.set_ylabel('hPriceSd, price, wage')
myPlot.legend(loc='upper left') #, bbox_to_anchor=(-0.35, 0.5)
myPlot.axes.right_ax.legend(loc='lower right') #, bbox_to_anchor=(1.1, 0.5)

In [ ]:

myPlot = ts_df.plot(figsize=(11,8),secondary_y=['hPriceSd', 'price','wage'],marker="",
color=["silver","Black","Black","Black","Gray","slategray","slategray","slategray"],
style=['-', '--', '-.', ':','-', '--', '-.'],
linewidth=2.)
myPlot.set_ylabel('unemployed, totalProfit, totalProduction, plannedProduction, consumptionQ')
myPlot.right_ax.set_ylabel('hPriceSd, price, wage')
myPlot.legend(loc='upper left') #, bbox_to_anchor=(-0.35, 0.5)
myPlot.axes.right_ax.legend(loc='lower right') #, bbox_to_anchor=(1.1, 0.5)

In [ ]:

str_df.plot(figsize=(11,8),secondary_y='workers',marker="*",color=["r","b"])

In [ ]:

str_df.plot(figsize=(11,8),secondary_y='workers',marker="*",color=["black",
            "lightgrey"])

In [ ]:

str_df.plot(figsize=(11,8),linewidth=2.0,secondary_y='workers',marker="*",color=["black",
            "gray"])

Best solutions to produce a LaTeX table from these data (the example is related to ts_df.corr table):

corr=ts_df.corr(method='pearson')
print corr.to_latex()

"print" to have the output nicely formatted; copy and paste it to LaTeX and the 
result works.

To output is included within:

\begin{table}[htbp]

... output above ...

\label{a label}
\caption{a caption}
\end{table}

We add also size specifications (\footnotesize in this case) and the usual [htbp] specification with \begin{table}[htbp]

The first method is applied in the cells below.¶

In [ ]:

corr=ts_df.corr(method='pearson')
def ff(x):
    return '%1.2f' % x
if len(ts_df.columns) == 6:
    print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 8:
    print ("\\begin{table}[!htbp]\n{\\tiny \center")
print (corr.to_latex(formatters=[ff,ff,ff,ff,ff,ff,ff,ff]))
print("}\n\\caption{Correlations among the time series of the model,"+\
      " with xxx}")
print("\\label{correlations xxx}\n\\end{table}")

In [ ]:

ou=out1
if len(ts_df.columns) == 6:
    names=['unempl.','totalProfit','totalProd.','plannedP.','price','wage']
if len(ts_df.columns) == 8:
    names=['unempl.','totalProfit','totalProd.','plannedP.','cQ','hPSd','price','wage']
if len(ts_df.columns) == 6:
    print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 8:
    print ("\\begin{table}[!htbp]\n{\\tiny \center")
if len(ts_df.columns) == 6:
        print ("\\begin{tabular}{lrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & plannedP. & price &  wage \\\\"+\
"\n\\midrule")
if len(ts_df.columns) == 8:
        print ("\\begin{tabular}{lrrrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & plannedP. & cQ & hPSd & price &  wage \\\\"+\
"\n\\midrule")
for i in range(len(ou)):
    print(names[i], end="")
    for j in range(len(ou[i])):
        print(" & %.2f" % ou[i,j], end="")
    print(" \\\\")
print("\\bottomrule\n\\end{tabular}")    
print("}\n\\caption{Partial correlations among the time series of the model,"+\
      " with xxx}")
print("\\label{partial correlations xxx}\n\\end{table}")

In [ ]:

ou=out2
if len(ts_df.columns) == 6:
    names=['unempl.','totalProfit','totalProd.','price','wage']
if len(ts_df.columns) == 8:
    names=['unempl.','totalProfit','totalProd.','cQ','hPSd','price','wage']
print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 6:
        print ("\\begin{tabular}{lrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & price &  wage \\\\"+\
"\n\\midrule")
if len(ts_df.columns) == 8:
        print ("\\begin{tabular}{lrrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & cQ & hPSd & price &  wage \\\\"+\
"\n\\midrule")
for i in range(len(ou)):
    print(names[i], end="")
    for j in range(len(ou[i])):
        print(" & %.2f" % ou[i,j], end="")
    print(" \\\\")
print("\\bottomrule\n\\end{tabular}")    
print("}\n\\caption{Partial correlations (no plannedProduction) among the time series of the model,"+\
      " with xxx}")
print("\\label{partial correlations (no plannedP.) xxx}\n\\end{table}")

In [ ]:

ou=out3
if len(ts_df.columns) == 6:
    names=['unempl.','totalProfit','plannedP.','price','wage']
if len(ts_df.columns) == 8:
    names=['unempl.','totalProfit','plannedP.','cQ','hPSd','price','wage']
print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 6:
        print ("\\begin{tabular}{lrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & plannedP. & price &  wage \\\\"+\
"\n\\midrule")
if len(ts_df.columns) == 8:
        print ("\\begin{tabular}{lrrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & plannedP. & cQ & hPSd & price &  wage \\\\"+\
"\n\\midrule")
for i in range(len(ou)):
    print(names[i], end="")
    for j in range(len(ou[i])):
        print(" & %.2f" % ou[i,j], end="")
    print(" \\\\")
print("\\bottomrule\n\\end{tabular}")    
print("}\n\\caption{Partial correlations (no totalProduction) among the time series of the model,"+\
      " with xxx}")
print("\\label{partial correlations (no totalProd.) xxx}\n\\end{table}")

Data from each firm in each period¶

In [ ]:

if firms: print(firms_df.describe())
else: print('no data for each firm in each period')

Managing parameter list¶

In [ ]:

ctitle=""
if len(par_df.columns)==2: ctitle=par_df.columns[0]
if len(par_df.columns)==3: ctitle=par_df.columns[1]
if len(ts_df.columns) == 6:
    parList=par_df[ctitle].tolist()
    valList=par_df["Values"].tolist()
if len(ts_df.columns) == 8: 
    parList=par_df["Parameter internal names"].tolist()
    valList=par_df["Values"].tolist()
# both parList are generated by the 'print' of parameters.py

dictionay of values

*d_val*

it comes from the file *_par.csv coming from the 'print' of parameters.py

NB the different versions of the model have different parameters output sequences; the main difference is about the 6 time series case and the 8 time series case in file *_ts.csv, emerging above

[zip() function take iterables (can be zero or more), makes iterator that aggregates elements based on the iterables passed, and returns an iterator of tuples. zip() function take iterables (can be zero or more), makes iterator that aggregates elements based on the iterables passed, and returns an iterator of tuples]

In [ ]:

d_val=dict(zip(parList,valList))

In [ ]:

d_val

dictionay of position

*d_pos*

the dict of positions (file parPos.csv) comes from a manual work based on the table of parameter definition of appendix B of the book; the goal is that of retrieving the parameters of a specific experiment in dict d_val and assign their values to the correct position in the rows of the table of the values in the different experiments in the parameter value table of Appendix B

the vector (row) is pre-filled with '-' signs as values not existent in the specific experiment

the case of the par 'checkResConsUnsoldProd' is handled in a special way: the parameter 'checkResConsUnsoldProd' (not affecting the model, but only working on its output) appears 20180829 in 28ter experiment; in the first commit, of 20180830, the name is checkResCons, but quite immediately became checkResConsUnsoldProd; the commit of 20181013 signals the we have the output from parameters.py (the experiment 80 is of 20181009, so without that output); all the experiments from 28ter to 80 have implicitly 'checkResConsUnsoldProd' set to True

'w' case is corrected to 'Q'

to check for the consistence of the dictionaries, we list unfound parameters in *d_val* when searching for values (the master dict is *d_pos*)

In [ ]:

labelsPositions_df= pd.read_csv('labelsPositions.csv')

In [ ]:

#labelsPositions_df

In [ ]:

parList2=labelsPositions_df["name"].tolist()
posList=labelsPositions_df["position"].tolist()

In [ ]:

d_pos=dict(zip(parList2,posList))

In [ ]:

#d_pos

In [ ]:

row=['-']*53 # 52 parameters, pos. 0 is used for unuseful values
row[44]='51' # as default value for the par 'startHayekianMarket' for old
             # SMAC versions where it was not defined

In [ ]:

for _ in range(len(parList)):
    if parList[_]=='w': row[d_pos['Q']]=d_val[parList[_]]
    if parList[_] in d_pos: row[d_pos[parList[_]]]=d_val[parList[_]]
    else: print('not found:',parList[_])

the parameter checkResConsUnsoldProd (not affecting the model, but only working on its output) appears 20180829 in 28ter experiment; in the first commit, of 20180830, the name is checkResCons, but quite immediately became checkResConsUnsoldProd; the commit of 20181013 signals the we have the output from parameters.py (the experiment 80 is of 20181009, so without that output); all the experiments from 28ter to 80 have internally checkResConsUnsoldProd set to True

so from >= 20180829 to <= 20181009 the val of checkResConsUnsoldProd is True

1535414400 is equivalent to 08/28/2018 @ 12:00am (UTC) 1539129600 is equivalent to 10/10/2018 @ 12:00am (UTC)

In [ ]:

import platform
def creation_date(path_to_file):
    """
    Try to get the date that a file was created, falling back to when it was
    last modified if that isn't possible.
    See http://stackoverflow.com/a/39501288/1709587 for explanation.
    """
    if platform.system() == 'Windows':
        return os.path.getctime(path_to_file)
    else: #MacOs
        stat = os.stat(path_to_file)
        try:
            return stat.st_birthtime
        except AttributeError:
            # We're probably on Linux. No easy way to get creation dates here,
            # so we'll settle for when its content was last modified.
            return stat.st_mtime
        
#converter https://www.unixtimestamp.com
        
fileTime=creation_date("./"+nameFilePar)

In [ ]:

if fileTime >= 1535414400 and fileTime <= 1539129600:
    row[8]='True'

In [ ]:

#row

In [ ]:

#for i in range(1,len(row)-1):
#    print(row[i],"& ",end='')
#print(row[-1])

In [ ]:

for i in range(1,26):
    print(row[i],"& ",end='')
print(row[26])

In [ ]:

for i in range(27,len(row)-1):
    print(row[i],"& ",end='')
if '[' in row[-1]: row[-1]=row[-1][1:5] # [1:5] is to avoid the [ ] output
print(row[-1])