#!/usr/bin/env python # coding: utf-8 # # Notebook: Readme Examples # In[1]: import os import pandas as pd pd.set_option("display.max_columns", 100) import forestplot as fp print(f"{fp.__version__=}") import matplotlib.pyplot as plt def save_mpl_fig(savename): savepath = os.path.join("../docs/images", savename) plt.savefig(f"{savepath}.png", dpi="figure", bbox_inches="tight") df = fp.load_data("sleep") df # ## Out-of-box # # * Quickstart example # * Estimate and confidence interval will be printed on left # In[2]: fp.forestplot( df, # the dataframe with results data estimate="r", # col containing estimated effect size ll="ll", hl="hl", # columns containing conf. int. lower and higher limits varlabel="label", # column containing variable label ylabel="Confidence interval", # y-label title xlabel="Pearson correlation", # x-label title ) save_mpl_fig("vanilla") # ## Add variable groupings, add group order, and sort by estimate size # # * Add group labels (`groupvar`) # * Add group ordering (this is optional, `group_order`) # * Sort estimates (`sort`) # * Capitalize labels (`capitalize`) # In[3]: fp.forestplot( df, # the dataframe with results data estimate="r", # col containing estimated effect size ll="ll", hl="hl", # columns containing conf. int. lower and higher limits varlabel="label", # column containing variable label capitalize="capitalize", # Capitalize labels groupvar="group", # Add variable groupings # group ordering group_order=[ "labor factors", "occupation", "age", "health factors", "family factors", "area of residence", "other factors", ], sort=True, # sort in ascending order (sorts within group if group is specified) ) save_mpl_fig("group-grouporder-sort") # ## Add P-value and color alternate rows gray # * Add (formatted) p-values on the right (`pval`) # * P-values are automatically formatted # * Color alternate colors gray (`color_alt_rows`) # In[4]: fp.forestplot( df, # the dataframe with results data estimate="r", # col containing estimated effect size ll="ll", hl="hl", # columns containing conf. int. lower and higher limits varlabel="label", # column containing variable label capitalize="capitalize", # Capitalize labels groupvar="group", # Add variable groupings # group ordering group_order=[ "labor factors", "occupation", "age", "health factors", "family factors", "area of residence", "other factors", ], sort=True, # sort in ascending order (sorts within group if group is specified) pval="p-val", # Column of p-value to be reported on right color_alt_rows=True, # Gray alternate rows ylabel="Est.(95% Conf. Int.)", # ylabel to print **{"ylabel1_size": 11}, # control size of printed ylabel ) save_mpl_fig("group-grouporder-pvalue-sort-colorrows") # ## Add custom annotations and make it a table # * Add more custom annotations on left and right (`annote` and `rightannote`) # * Add headers for the annotations (`annoteheaders` and `right_annoteheaders`) # * Make the plot a table (`table`) # In[5]: fp.forestplot( df, # the dataframe with results data estimate="r", # col containing estimated effect size ll="ll", hl="hl", # lower & higher limits of conf. int. varlabel="label", # column containing the varlabels to be printed on far left capitalize="capitalize", # Capitalize labels pval="p-val", # column containing p-values to be formatted annote=["n", "power", "est_ci"], # columns to report on left of plot annoteheaders=["N", "Power", "Est. (95% Conf. Int.)"], # ^corresponding headers rightannote=["formatted_pval", "group"], # columns to report on right of plot right_annoteheaders=["P-value", "Variable group"], # ^corresponding headers xlabel="Pearson correlation coefficient", # x-label title table=True, # Format as a table ) save_mpl_fig("leftannote-rightannote-table") # ## Strip down all bells and whistle # * Out-of-box settings but # * Turn off reporting of confidence interval on the left # * Turn off left-flushing of variable labels # In[6]: fp.forestplot( df, # the dataframe with results data estimate="r", # col containing estimated effect size ll="ll", hl="hl", # lower & higher limits of conf. int. varlabel="label", # column containing the varlabels to be printed on far left capitalize="capitalize", # Capitalize labels ci_report=False, # Turn off conf. int. reporting flush=False, # Turn off left-flush of text **{"fontfamily": "sans-serif"}, # revert to sans-serif ) save_mpl_fig("vcoefplot") # ## Forest plot as a table, with multiple customizations # # * Multiple annotations on left and right of forest plot (`annote`, `rightannote`) # * Format p-values (`pval`) # * Add variable groupings (`groupvar`) and adjust group order (`group_order`) to report # * Make plot a table (`table`) # In[7]: fp.forestplot( df, # the dataframe with results data estimate="r", # col containing estimated effect size ll="ll", hl="hl", # lower & higher limits of conf. int. varlabel="label", # column containing the varlabels to be printed on far left capitalize="capitalize", # Capitalize labels pval="p-val", # column containing p-values to be formatted annote=["n", "power", "est_ci"], # columns to report on left of plot annoteheaders=["N", "Power", "Est. (95% Conf. Int.)"], # ^corresponding headers rightannote=["formatted_pval", "group"], # columns to report on right of plot right_annoteheaders=["P-value", "Variable group"], # ^corresponding headers groupvar="group", # column containing group labels group_order=[ "labor factors", "occupation", "age", "health factors", "family factors", "area of residence", "other factors", ], xlabel="Pearson correlation coefficient", # x-label title xticks=[-0.4, -0.2, 0, 0.2], # x-ticks to be printed sort=True, # sort estimates in ascending order table=True, # Format as a table # Additional kwargs for customizations **{ "marker": "D", # set maker symbol as diamond "markersize": 35, # adjust marker size "xlinestyle": (0, (10, 5)), # long dash for x-reference line "xlinecolor": ".1", # gray color for x-reference line "xtick_size": 12, # adjust x-ticker fontsize }, ) save_mpl_fig("main") # ## Multiple models (`mforestplot`) # In[8]: df_mmodel = pd.read_csv("../examples/data/sleep-mmodel.csv").query( "model=='all' | model=='young kids'" ) df_mmodel.head(3) # In[9]: fp.mforestplot( dataframe=df_mmodel, estimate="coef", ll="ll", hl="hl", varlabel="label", capitalize="capitalize", model_col="model", color_alt_rows=True, groupvar="group", table=True, rightannote=["var", "group"], right_annoteheaders=["Source", "Group"], xlabel="Coefficient (95% CI)", modellabels=["Have young kids", "Full sample"], xticks=[-1200, -600, 0, 600], mcolor=["#CC6677", "#4477AA"], # Additional kwargs for customizations **{ "markersize": 30, # override default vertical offset between models (0.0 to 1.0) "offset": 0.35, "xlinestyle": (0, (10, 5)), # long dash for x-reference line "xlinecolor": ".8", # gray color for x-reference line }, ) save_mpl_fig("multimodel") # In[ ]: