import os
import pandas as pd
pd.set_option('display.max_columns', 100)
import forestplot as fp
print(f"{fp.__version__=}")
import matplotlib.pyplot as plt
def save_mpl_fig(savename):
savepath = os.path.join('../docs/images', savename)
plt.savefig(f'{savepath}.png', dpi='figure', bbox_inches='tight')
df = fp.load_data("sleep")
df
fp.__version__='0.2.2'
n | r | CI95% | p-val | BF10 | power | var | hl | ll | moerror | group | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 706 | 0.090373 | [0.02 0.16] | 1.630887e-02 | 8.390000e-01 | 0.67 | age | 0.16 | 0.02 | 0.069627 | age | in years |
1 | 706 | 0.048081 | [-0.03 0.12] | 2.019484e-01 | 1.060000e-01 | 0.25 | clerical | 0.12 | -0.03 | 0.071919 | occupation | =1 if clerical worker |
2 | 706 | 0.041229 | [-0.03 0.11] | 2.739475e-01 | 8.600000e-02 | 0.19 | construc | 0.11 | -0.03 | 0.068771 | occupation | =1 if construction worker |
3 | 706 | -0.095004 | [-0.17 -0.02] | 1.155151e-02 | 1.137000e+00 | 0.72 | educ | -0.02 | -0.17 | 0.075004 | labor factors | years of schooling |
4 | 706 | -0.102825 | [-0.18 -0.03] | 6.246660e-03 | 1.967000e+00 | 0.78 | gdhlth | -0.03 | -0.18 | 0.072825 | health factors | =1 if in good or excel. health |
5 | 706 | -0.066997 | [-0.14 0.01] | 7.524015e-02 | 2.290000e-01 | 0.43 | smsa | 0.01 | -0.14 | 0.076997 | area of residence | =1 if live in smsa |
6 | 706 | -0.035909 | [-0.11 0.04] | 3.407214e-01 | 7.400000e-02 | 0.16 | male | 0.04 | -0.11 | 0.075909 | other factors | =1 if male |
7 | 706 | 0.027147 | [-0.05 0.1 ] | 4.714176e-01 | 6.100000e-02 | 0.11 | prot | 0.10 | -0.05 | 0.072853 | other factors | =1 if Protestant |
8 | 706 | 0.001782 | [-0.07 0.08] | 9.623058e-01 | 4.700000e-02 | 0.05 | selfe | 0.08 | -0.07 | 0.078218 | labor factors | =1 if self employed |
9 | 706 | 0.078600 | [0. 0.15] | 3.679946e-02 | 4.150000e-01 | 0.55 | south | 0.15 | 0.00 | 0.071400 | area of residence | =1 if live in south |
10 | 706 | 0.007881 | [-0.07 0.08] | 8.344125e-01 | 4.800000e-02 | 0.06 | spsepay | 0.08 | -0.07 | 0.072119 | other factors | spousal wage income |
11 | 706 | -0.321384 | [-0.39 -0.25] | 1.994095e-18 | 1.961000e+15 | 1.00 | totwrk | -0.25 | -0.39 | 0.071384 | labor factors | mins worked per week |
12 | 706 | -0.013262 | [-0.09 0.06] | 7.250012e-01 | 5.000000e-02 | 0.06 | yngkid | 0.06 | -0.09 | 0.073262 | family factors | =1 if children < 3 present |
13 | 706 | 0.063997 | [-0.01 0.14] | 8.928507e-02 | 1.990000e-01 | 0.40 | yrsmarr | 0.14 | -0.01 | 0.076003 | family factors | years married |
14 | 532 | -0.049450 | [-0.13 0.04] | 2.548774e-01 | 1.040000e-01 | 0.21 | hrwage | 0.04 | -0.13 | 0.089450 | labor factors | hourly wage |
fp.forestplot(df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll", hl="hl", # columns containing conf. int. lower and higher limits
varlabel="label", # column containing variable label
ylabel="Confidence interval", # y-label title
xlabel="Pearson correlation" # x-label title
)
save_mpl_fig('vanilla')
groupvar
)group_order
)sort
)capitalize
)fp.forestplot(df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll", hl="hl", # columns containing conf. int. lower and higher limits
varlabel="label", # column containing variable label
capitalize="capitalize", # Capitalize labels
groupvar="group", # Add variable groupings
# group ordering
group_order=["labor factors", "occupation", "age", "health factors",
"family factors", "area of residence", "other factors"],
sort=True # sort in ascending order (sorts within group if group is specified)
)
save_mpl_fig('group-grouporder-sort')
pval
)color_alt_rows
)fp.forestplot(df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll", hl="hl", # columns containing conf. int. lower and higher limits
varlabel="label", # column containing variable label
capitalize="capitalize", # Capitalize labels
groupvar="group", # Add variable groupings
# group ordering
group_order=["labor factors", "occupation", "age", "health factors",
"family factors", "area of residence", "other factors"],
sort=True, # sort in ascending order (sorts within group if group is specified)
pval="p-val", # Column of p-value to be reported on right
color_alt_rows=True, # Gray alternate rows
ylabel="Est.(95% Conf. Int.)", # ylabel to print
**{"ylabel1_size": 11} # control size of printed ylabel
)
save_mpl_fig('group-grouporder-pvalue-sort-colorrows')
annote
and rightannote
)annoteheaders
and right_annoteheaders
)table
)fp.forestplot(df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll", hl="hl", # lower & higher limits of conf. int.
varlabel="label", # column containing the varlabels to be printed on far left
capitalize="capitalize", # Capitalize labels
pval="p-val", # column containing p-values to be formatted
annote=["n", "power", "est_ci"], # columns to report on left of plot
annoteheaders=["N", "Power", "Est. (95% Conf. Int.)"], # ^corresponding headers
rightannote=["formatted_pval", "group"], # columns to report on right of plot
right_annoteheaders=["P-value", "Variable group"], # ^corresponding headers
xlabel="Pearson correlation coefficient", # x-label title
table=True, # Format as a table
)
save_mpl_fig('leftannote-rightannote-table')
fp.forestplot(df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll", hl="hl", # lower & higher limits of conf. int.
varlabel="label", # column containing the varlabels to be printed on far left
capitalize="capitalize", # Capitalize labels
ci_report=False, # Turn off conf. int. reporting
flush=False, # Turn off left-flush of text
**{'fontfamily': 'sans-serif'} # revert to sans-serif
)
save_mpl_fig('vcoefplot')
annote
, rightannote
)pval
)groupvar
) and adjust group order (group_order
) to reporttable
)fp.forestplot(df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll", hl="hl", # lower & higher limits of conf. int.
varlabel="label", # column containing the varlabels to be printed on far left
capitalize="capitalize", # Capitalize labels
pval="p-val", # column containing p-values to be formatted
annote=["n", "power", "est_ci"], # columns to report on left of plot
annoteheaders=["N", "Power", "Est. (95% Conf. Int.)"], # ^corresponding headers
rightannote=["formatted_pval", "group"], # columns to report on right of plot
right_annoteheaders=["P-value", "Variable group"], # ^corresponding headers
groupvar="group", # column containing group labels
group_order=["labor factors", "occupation", "age", "health factors",
"family factors", "area of residence", "other factors"],
xlabel="Pearson correlation coefficient", # x-label title
xticks=[-.4,-.2,0, .2], # x-ticks to be printed
sort=True, # sort estimates in ascending order
table=True, # Format as a table
# Additional kwargs for customizations
**{"marker": "D", # set maker symbol as diamond
"markersize": 35, # adjust marker size
"xlinestyle": (0, (10, 5)), # long dash for x-reference line
"xlinecolor": ".1", # gray color for x-reference line
"xtick_size": 12, # adjust x-ticker fontsize
}
)
save_mpl_fig('main')