%pylab inline
import matplotlib.pyplot as plt
#import matplotlib
#matplotlib.style.use('ggplot') ## gnuplot style
import numpy as np
import pandas as pd
import warnings as wn
import partial_corr # found at
# https://gist.github.com/fabianp/9396204419c7b638d38f
#size
width, height=12,8
plt.rcParams['figure.figsize'] = width, height #aggiunta pt
#precision in tables
pd.set_option('precision',2)
#rows in tables
pd.set_option('display.max_rows', 1000)
# this is used for regression below; install (via pip) statsmodels and patsy
import statsmodels.formula.api as sm
# to chose a csv file in the current folder
import os
filesHere=os.listdir("./")
selected=[]
for i in range(len(filesHere)):
if filesHere[i].find('_ts.csv')>0: selected.append(filesHere[i])
selected.sort()
for i in range(len(selected)):
print (i, selected[i])
num=int(input("Choose a file via its number (>=0;<="+str(len(selected)-1)+" "))
try:
modPars_df = pd.read_csv(selected[num][:17]+'_modPars.csv')
modPars_df.index += 1
except BaseException:
modPars_df = pd.DataFrame([["no changes in parameters"]],columns=[" "])
modPars_df.index += 1
firms=False
try:
firms_df = pd.read_csv(selected[num][:17]+'_firms.csv')
modPars_df.index += 1
firms=True
except BaseException:
pass
par_df = pd.read_csv(selected[num][:17]+'_par.csv')
par_df.index += 1
nameFilePar=selected[num][:17]+'_par.csv'
ts_df = pd.read_csv(selected[num])
#set index to start from 1, data are collected at the end of each period
ts_df.index += 1
str_df = pd.read_csv(selected[num][:17]+'_str.csv')
#leave index to start from 0, data are collected at the beginning of each period
activating the cell below before running the whole program
[a:b] => from a+1 to b
[:b] => fron init to b
[a:] => fron a+1 to end
#ts_df =ts_df [0:45]
#str_df=str_df[0:45]
*Parameters*
par_df.astype(str,errors='ignore')
*Modified parameters*
modPars_df.astype(str,errors='ignore')
*Time series, data collected at the end of each period*
if len(ts_df.columns) == 6:
ts_df.columns = \
['unempl.','totalProfit','totalProd.','plannedP.','price','wage']
# to have shorter names
if len(ts_df.columns) == 8:
ts_df.columns = \
['unempl.','totalProfit','totalProd.','plannedP.', 'cQ','hPSd','price','wage']
# to have shorter names
ts_df
ts_df.describe()
ts_df.corr(method="pearson").style.format("{:.2}")
The origin of the partial_corr source is https://gist.github.com/fabianp/9396204419c7b638d38f
At http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression we have the explanation of the need of augmenting the data matrix with a 1 to allow for a constant term in the regression.
wn.filterwarnings(action="ignore") # to eliminate a warning about
#LAPACK lib
np.set_printoptions(precision=2,suppress=True)
ts=ts_df.values
ts_int = np.hstack((np.ones((ts.shape[0],1)), ts))
out1=partial_corr.partial_corr(ts_int)[1:, 1:]
out1
ts=ts_df.drop(columns="plannedP.").values
ts_int = np.hstack((np.ones((ts.shape[0],1)), ts))
out2=partial_corr.partial_corr(ts_int)[1:, 1:]
out2
ts=ts_df.drop(columns="totalProd.").values
ts_int = np.hstack((np.ones((ts.shape[0],1)), ts))
out3=partial_corr.partial_corr(ts_int)[1:, 1:]
out3
ts2_df=ts_df
if len(ts_df.columns) == 6:
ts2_df.columns = \
['unempl','totalProfit','totalProd','plannedP','price','wage']
if len(ts_df.columns) == 8:
ts2_df.columns = \
['unempl','totalProfit','totalProd','plannedP','cQ','hPSd','price','wage']
result = sm.ols(formula="totalProfit ~ price + wage + totalProd + unempl", \
data=ts2_df).fit()
print (result.summary())
*Structural infos, data collected at the beginning of each period*
str_df
levels of gray https://en.wikipedia.org/wiki/Shades_of_gray
myPlot = ts_df.plot(figsize=(11,8),secondary_y=['hPriceSd', 'price','wage'],marker="*",
color=["OrangeRed","LawnGreen","Blue","Violet","lightblue","Pink","Gray","Brown"])
myPlot.set_ylabel('unemployed, totalProfit, totalProduction, plannedProduction, consumptionQ')
myPlot.right_ax.set_ylabel('hPriceSd, price, wage')
myPlot.legend(loc='upper left') #, bbox_to_anchor=(-0.35, 0.5)
myPlot.axes.right_ax.legend(loc='lower right') #, bbox_to_anchor=(1.1, 0.5)
myPlot = ts_df.plot(figsize=(11,8),secondary_y=['hPriceSd', 'price','wage'],marker="",
color=["lightgray","Black","Black","Black","Gray","lightgray","lightgray","lightgray"],
style=['-', '--', '-.', ':','-', '--', '-.'],
linewidth=1.)
myPlot.set_ylabel('unemployed, totalProfit, totalProduction, plannedProduction, consumptionQ')
myPlot.right_ax.set_ylabel('hPriceSd, price, wage')
myPlot.legend(loc='upper left') #, bbox_to_anchor=(-0.35, 0.5)
myPlot.axes.right_ax.legend(loc='lower right') #, bbox_to_anchor=(1.1, 0.5)
myPlot = ts_df.plot(figsize=(11,8),secondary_y=['hPriceSd', 'price','wage'],marker="",
color=["silver","Black","Black","Black","Gray","slategray","slategray","slategray"],
style=['-', '--', '-.', ':','-', '--', '-.'],
linewidth=2.)
myPlot.set_ylabel('unemployed, totalProfit, totalProduction, plannedProduction, consumptionQ')
myPlot.right_ax.set_ylabel('hPriceSd, price, wage')
myPlot.legend(loc='upper left') #, bbox_to_anchor=(-0.35, 0.5)
myPlot.axes.right_ax.legend(loc='lower right') #, bbox_to_anchor=(1.1, 0.5)
str_df.plot(figsize=(11,8),secondary_y='workers',marker="*",color=["r","b"])
str_df.plot(figsize=(11,8),secondary_y='workers',marker="*",color=["black",
"lightgrey"])
str_df.plot(figsize=(11,8),linewidth=2.0,secondary_y='workers',marker="*",color=["black",
"gray"])
Best solutions to produce a LaTeX table from these data (the example is related to ts_df.corr table):
corr=ts_df.corr(method='pearson')
print corr.to_latex()
"print" to have the output nicely formatted; copy and paste it to LaTeX and the
result works.
To output is included within:
\begin{table}[htbp]
... output above ...
\label{a label}
\caption{a caption}
\end{table}
We add also size specifications (\footnotesize in this case) and the usual [htbp] specification with \begin{table}[htbp]
Other solutions:
corr=ts_df.corr(method='pearson')
def ff(x):
return '%1.2f' % x
if len(ts_df.columns) == 6:
print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 8:
print ("\\begin{table}[!htbp]\n{\\tiny \center")
print (corr.to_latex(formatters=[ff,ff,ff,ff,ff,ff,ff,ff]))
print("}\n\\caption{Correlations among the time series of the model,"+\
" with xxx}")
print("\\label{correlations xxx}\n\\end{table}")
ou=out1
if len(ts_df.columns) == 6:
names=['unempl.','totalProfit','totalProd.','plannedP.','price','wage']
if len(ts_df.columns) == 8:
names=['unempl.','totalProfit','totalProd.','plannedP.','cQ','hPSd','price','wage']
if len(ts_df.columns) == 6:
print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 8:
print ("\\begin{table}[!htbp]\n{\\tiny \center")
if len(ts_df.columns) == 6:
print ("\\begin{tabular}{lrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & plannedP. & price & wage \\\\"+\
"\n\\midrule")
if len(ts_df.columns) == 8:
print ("\\begin{tabular}{lrrrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & plannedP. & cQ & hPSd & price & wage \\\\"+\
"\n\\midrule")
for i in range(len(ou)):
print(names[i], end="")
for j in range(len(ou[i])):
print(" & %.2f" % ou[i,j], end="")
print(" \\\\")
print("\\bottomrule\n\\end{tabular}")
print("}\n\\caption{Partial correlations among the time series of the model,"+\
" with xxx}")
print("\\label{partial correlations xxx}\n\\end{table}")
ou=out2
if len(ts_df.columns) == 6:
names=['unempl.','totalProfit','totalProd.','price','wage']
if len(ts_df.columns) == 8:
names=['unempl.','totalProfit','totalProd.','cQ','hPSd','price','wage']
print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 6:
print ("\\begin{tabular}{lrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & price & wage \\\\"+\
"\n\\midrule")
if len(ts_df.columns) == 8:
print ("\\begin{tabular}{lrrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & totalProd. & cQ & hPSd & price & wage \\\\"+\
"\n\\midrule")
for i in range(len(ou)):
print(names[i], end="")
for j in range(len(ou[i])):
print(" & %.2f" % ou[i,j], end="")
print(" \\\\")
print("\\bottomrule\n\\end{tabular}")
print("}\n\\caption{Partial correlations (no plannedProduction) among the time series of the model,"+\
" with xxx}")
print("\\label{partial correlations (no plannedP.) xxx}\n\\end{table}")
ou=out3
if len(ts_df.columns) == 6:
names=['unempl.','totalProfit','plannedP.','price','wage']
if len(ts_df.columns) == 8:
names=['unempl.','totalProfit','plannedP.','cQ','hPSd','price','wage']
print ("\\begin{table}[!htbp]\n{\\footnotesize \center")
if len(ts_df.columns) == 6:
print ("\\begin{tabular}{lrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & plannedP. & price & wage \\\\"+\
"\n\\midrule")
if len(ts_df.columns) == 8:
print ("\\begin{tabular}{lrrrrrrr}\n\\toprule\n"+\
"{} & unempl. & totalProfit & plannedP. & cQ & hPSd & price & wage \\\\"+\
"\n\\midrule")
for i in range(len(ou)):
print(names[i], end="")
for j in range(len(ou[i])):
print(" & %.2f" % ou[i,j], end="")
print(" \\\\")
print("\\bottomrule\n\\end{tabular}")
print("}\n\\caption{Partial correlations (no totalProduction) among the time series of the model,"+\
" with xxx}")
print("\\label{partial correlations (no totalProd.) xxx}\n\\end{table}")
if firms: print(firms_df.describe())
else: print('no data for each firm in each period')
ctitle=""
if len(par_df.columns)==2: ctitle=par_df.columns[0]
if len(par_df.columns)==3: ctitle=par_df.columns[1]
if len(ts_df.columns) == 6:
parList=par_df[ctitle].tolist()
valList=par_df["Values"].tolist()
if len(ts_df.columns) == 8:
parList=par_df["Parameter internal names"].tolist()
valList=par_df["Values"].tolist()
# both parList are generated by the 'print' of parameters.py
dictionay of values
*d_val*
it comes from the file *_par.csv coming from the 'print' of parameters.py
NB the different versions of the model have different parameters output sequences; the main difference is about the 6 time series case and the 8 time series case in file *_ts.csv, emerging above
[zip() function take iterables (can be zero or more), makes iterator that aggregates elements based on the iterables passed, and returns an iterator of tuples. zip() function take iterables (can be zero or more), makes iterator that aggregates elements based on the iterables passed, and returns an iterator of tuples]
d_val=dict(zip(parList,valList))
d_val
dictionay of position
*d_pos*
the dict of positions (file parPos.csv) comes from a manual work based on the table of parameter definition of appendix B of the book; the goal is that of retrieving the parameters of a specific experiment in dict d_val and assign their values to the correct position in the rows of the table of the values in the different experiments in the parameter value table of Appendix B
the vector (row) is pre-filled with '-' signs as values not existent in the specific experiment
the case of the par 'checkResConsUnsoldProd' is handled in a special way: the parameter 'checkResConsUnsoldProd' (not affecting the model, but only working on its output) appears 20180829 in 28ter experiment; in the first commit, of 20180830, the name is checkResCons, but quite immediately became checkResConsUnsoldProd; the commit of 20181013 signals the we have the output from parameters.py (the experiment 80 is of 20181009, so without that output); all the experiments from 28ter to 80 have implicitly 'checkResConsUnsoldProd' set to True
'w' case is corrected to 'Q'
to check for the consistence of the dictionaries, we list unfound parameters in *d_val* when searching for values (the master dict is *d_pos*)
labelsPositions_df= pd.read_csv('labelsPositions.csv')
#labelsPositions_df
parList2=labelsPositions_df["name"].tolist()
posList=labelsPositions_df["position"].tolist()
d_pos=dict(zip(parList2,posList))
#d_pos
row=['-']*53 # 52 parameters, pos. 0 is used for unuseful values
row[44]='51' # as default value for the par 'startHayekianMarket' for old
# SMAC versions where it was not defined
for _ in range(len(parList)):
if parList[_]=='w': row[d_pos['Q']]=d_val[parList[_]]
if parList[_] in d_pos: row[d_pos[parList[_]]]=d_val[parList[_]]
else: print('not found:',parList[_])
the parameter checkResConsUnsoldProd (not affecting the model, but only working on its output) appears 20180829 in 28ter experiment; in the first commit, of 20180830, the name is checkResCons, but quite immediately became checkResConsUnsoldProd; the commit of 20181013 signals the we have the output from parameters.py (the experiment 80 is of 20181009, so without that output); all the experiments from 28ter to 80 have internally checkResConsUnsoldProd set to True
so from >= 20180829 to <= 20181009 the val of checkResConsUnsoldProd is True
1535414400 is equivalent to 08/28/2018 @ 12:00am (UTC) 1539129600 is equivalent to 10/10/2018 @ 12:00am (UTC)
import platform
def creation_date(path_to_file):
"""
Try to get the date that a file was created, falling back to when it was
last modified if that isn't possible.
See http://stackoverflow.com/a/39501288/1709587 for explanation.
"""
if platform.system() == 'Windows':
return os.path.getctime(path_to_file)
else: #MacOs
stat = os.stat(path_to_file)
try:
return stat.st_birthtime
except AttributeError:
# We're probably on Linux. No easy way to get creation dates here,
# so we'll settle for when its content was last modified.
return stat.st_mtime
#converter https://www.unixtimestamp.com
fileTime=creation_date("./"+nameFilePar)
if fileTime >= 1535414400 and fileTime <= 1539129600:
row[8]='True'
#row
#for i in range(1,len(row)-1):
# print(row[i],"& ",end='')
#print(row[-1])
for i in range(1,26):
print(row[i],"& ",end='')
print(row[26])
for i in range(27,len(row)-1):
print(row[i],"& ",end='')
if '[' in row[-1]: row[-1]=row[-1][1:5] # [1:5] is to avoid the [ ] output
print(row[-1])