import os
import pandas as pd
pd.set_option("display.max_columns", 100)
import forestplot as fp
print(f"{fp.__version__=}")
import matplotlib.pyplot as plt
def save_mpl_fig(savename):
savepath = os.path.join("../docs/images", savename)
plt.savefig(f"{savepath}.png", dpi="figure", bbox_inches="tight")
df = fp.load_data("sleep")
df
fp.__version__='0.3.1'
n | r | CI95% | p-val | BF10 | power | var | hl | ll | moerror | group | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 706 | 0.090373 | [0.02 0.16] | 1.630887e-02 | 8.390000e-01 | 0.67 | age | 0.16 | 0.02 | 0.069627 | age | in years |
1 | 706 | 0.048081 | [-0.03 0.12] | 2.019484e-01 | 1.060000e-01 | 0.25 | clerical | 0.12 | -0.03 | 0.071919 | occupation | =1 if clerical worker |
2 | 706 | 0.041229 | [-0.03 0.11] | 2.739475e-01 | 8.600000e-02 | 0.19 | construc | 0.11 | -0.03 | 0.068771 | occupation | =1 if construction worker |
3 | 706 | -0.095004 | [-0.17 -0.02] | 1.155151e-02 | 1.137000e+00 | 0.72 | educ | -0.02 | -0.17 | 0.075004 | labor factors | years of schooling |
4 | 706 | -0.102825 | [-0.18 -0.03] | 6.246660e-03 | 1.967000e+00 | 0.78 | gdhlth | -0.03 | -0.18 | 0.072825 | health factors | =1 if in good or excel. health |
5 | 706 | -0.066997 | [-0.14 0.01] | 7.524015e-02 | 2.290000e-01 | 0.43 | smsa | 0.01 | -0.14 | 0.076997 | area of residence | =1 if live in smsa |
6 | 706 | -0.035909 | [-0.11 0.04] | 3.407214e-01 | 7.400000e-02 | 0.16 | male | 0.04 | -0.11 | 0.075909 | other factors | =1 if male |
7 | 706 | 0.027147 | [-0.05 0.1 ] | 4.714176e-01 | 6.100000e-02 | 0.11 | prot | 0.10 | -0.05 | 0.072853 | other factors | =1 if Protestant |
8 | 706 | 0.001782 | [-0.07 0.08] | 9.623058e-01 | 4.700000e-02 | 0.05 | selfe | 0.08 | -0.07 | 0.078218 | labor factors | =1 if self employed |
9 | 706 | 0.078600 | [0. 0.15] | 3.679946e-02 | 4.150000e-01 | 0.55 | south | 0.15 | 0.00 | 0.071400 | area of residence | =1 if live in south |
10 | 706 | 0.007881 | [-0.07 0.08] | 8.344125e-01 | 4.800000e-02 | 0.06 | spsepay | 0.08 | -0.07 | 0.072119 | other factors | spousal wage income |
11 | 706 | -0.321384 | [-0.39 -0.25] | 1.994095e-18 | 1.961000e+15 | 1.00 | totwrk | -0.25 | -0.39 | 0.071384 | labor factors | mins worked per week |
12 | 706 | -0.013262 | [-0.09 0.06] | 7.250012e-01 | 5.000000e-02 | 0.06 | yngkid | 0.06 | -0.09 | 0.073262 | family factors | =1 if children < 3 present |
13 | 706 | 0.063997 | [-0.01 0.14] | 8.928507e-02 | 1.990000e-01 | 0.40 | yrsmarr | 0.14 | -0.01 | 0.076003 | family factors | years married |
14 | 532 | -0.049450 | [-0.13 0.04] | 2.548774e-01 | 1.040000e-01 | 0.21 | hrwage | 0.04 | -0.13 | 0.089450 | labor factors | hourly wage |
fp.forestplot(
df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll",
hl="hl", # columns containing conf. int. lower and higher limits
varlabel="label", # column containing variable label
ylabel="Confidence interval", # y-label title
xlabel="Pearson correlation", # x-label title
)
save_mpl_fig("vanilla")
groupvar
)group_order
)sort
)capitalize
)fp.forestplot(
df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll",
hl="hl", # columns containing conf. int. lower and higher limits
varlabel="label", # column containing variable label
capitalize="capitalize", # Capitalize labels
groupvar="group", # Add variable groupings
# group ordering
group_order=[
"labor factors",
"occupation",
"age",
"health factors",
"family factors",
"area of residence",
"other factors",
],
sort=True, # sort in ascending order (sorts within group if group is specified)
)
save_mpl_fig("group-grouporder-sort")
pval
)color_alt_rows
)fp.forestplot(
df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll",
hl="hl", # columns containing conf. int. lower and higher limits
varlabel="label", # column containing variable label
capitalize="capitalize", # Capitalize labels
groupvar="group", # Add variable groupings
# group ordering
group_order=[
"labor factors",
"occupation",
"age",
"health factors",
"family factors",
"area of residence",
"other factors",
],
sort=True, # sort in ascending order (sorts within group if group is specified)
pval="p-val", # Column of p-value to be reported on right
color_alt_rows=True, # Gray alternate rows
ylabel="Est.(95% Conf. Int.)", # ylabel to print
**{"ylabel1_size": 11}, # control size of printed ylabel
)
save_mpl_fig("group-grouporder-pvalue-sort-colorrows")
annote
and rightannote
)annoteheaders
and right_annoteheaders
)table
)fp.forestplot(
df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll",
hl="hl", # lower & higher limits of conf. int.
varlabel="label", # column containing the varlabels to be printed on far left
capitalize="capitalize", # Capitalize labels
pval="p-val", # column containing p-values to be formatted
annote=["n", "power", "est_ci"], # columns to report on left of plot
annoteheaders=["N", "Power", "Est. (95% Conf. Int.)"], # ^corresponding headers
rightannote=["formatted_pval", "group"], # columns to report on right of plot
right_annoteheaders=["P-value", "Variable group"], # ^corresponding headers
xlabel="Pearson correlation coefficient", # x-label title
table=True, # Format as a table
)
save_mpl_fig("leftannote-rightannote-table")
fp.forestplot(
df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll",
hl="hl", # lower & higher limits of conf. int.
varlabel="label", # column containing the varlabels to be printed on far left
capitalize="capitalize", # Capitalize labels
ci_report=False, # Turn off conf. int. reporting
flush=False, # Turn off left-flush of text
**{"fontfamily": "sans-serif"}, # revert to sans-serif
)
save_mpl_fig("vcoefplot")
annote
, rightannote
)pval
)groupvar
) and adjust group order (group_order
) to reporttable
)fp.forestplot(
df, # the dataframe with results data
estimate="r", # col containing estimated effect size
ll="ll",
hl="hl", # lower & higher limits of conf. int.
varlabel="label", # column containing the varlabels to be printed on far left
capitalize="capitalize", # Capitalize labels
pval="p-val", # column containing p-values to be formatted
annote=["n", "power", "est_ci"], # columns to report on left of plot
annoteheaders=["N", "Power", "Est. (95% Conf. Int.)"], # ^corresponding headers
rightannote=["formatted_pval", "group"], # columns to report on right of plot
right_annoteheaders=["P-value", "Variable group"], # ^corresponding headers
groupvar="group", # column containing group labels
group_order=[
"labor factors",
"occupation",
"age",
"health factors",
"family factors",
"area of residence",
"other factors",
],
xlabel="Pearson correlation coefficient", # x-label title
xticks=[-0.4, -0.2, 0, 0.2], # x-ticks to be printed
sort=True, # sort estimates in ascending order
table=True, # Format as a table
# Additional kwargs for customizations
**{
"marker": "D", # set maker symbol as diamond
"markersize": 35, # adjust marker size
"xlinestyle": (0, (10, 5)), # long dash for x-reference line
"xlinecolor": ".1", # gray color for x-reference line
"xtick_size": 12, # adjust x-ticker fontsize
},
)
save_mpl_fig("main")
mforestplot
)¶df_mmodel = pd.read_csv("../examples/data/sleep-mmodel.csv").query(
"model=='all' | model=='young kids'"
)
df_mmodel.head(3)
var | coef | se | T | pval | r2 | adj_r2 | ll | hl | model | group | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | age | 0.994889 | 1.969249 | 0.505213 | 0.613625 | 0.127289 | 0.103656 | -2.873825 | 4.863603 | all | age | in years |
3 | age | 22.634017 | 15.495320 | 1.460700 | 0.149315 | 0.178147 | -0.013619 | -8.361238 | 53.629272 | young kids | age | in years |
4 | black | -84.796612 | 82.150125 | -1.032215 | 0.302454 | 0.127289 | 0.103656 | -246.185715 | 76.592491 | all | other factors | =1 if black |
fp.mforestplot(
dataframe=df_mmodel,
estimate="coef",
ll="ll",
hl="hl",
varlabel="label",
capitalize="capitalize",
model_col="model",
color_alt_rows=True,
groupvar="group",
table=True,
rightannote=["var", "group"],
right_annoteheaders=["Source", "Group"],
xlabel="Coefficient (95% CI)",
modellabels=["Have young kids", "Full sample"],
xticks=[-1200, -600, 0, 600],
mcolor=["#CC6677", "#4477AA"],
# Additional kwargs for customizations
**{
"markersize": 30,
# override default vertical offset between models (0.0 to 1.0)
"offset": 0.35,
"xlinestyle": (0, (10, 5)), # long dash for x-reference line
"xlinecolor": ".8", # gray color for x-reference line
},
)
save_mpl_fig("multimodel")