#!/usr/bin/env python # coding: utf-8 # # Waterfall Chart # In[1]: from lets_plot import * from lets_plot.bistro import * # In[2]: LetsPlot.setup_html() # In[3]: data1 = dict( x = ["A", "B", "C", "D", "E"], y = [300, -100, -400, 300, 200], ) data2 = dict( x = ["A", "B", "C", "D", "T1", "A", "B", "C", "D", "T2"], y = [100, 100, -300, 500, None, -200, 300, 100, -300, 0.0], m = ['relative', 'relative', 'relative', 'relative', 'total', 'relative', 'relative', 'relative', 'relative', 'total'], ) data3 = dict( x = ["A", "B", "C", "D", "E", "A", "B", "C", "D", "T"], y = [100, 100, -300, 500, 300, -200, 300, 100, -300, 0.0], m = ['absolute', 'relative', 'relative', 'relative', 'absolute', 'relative', 'relative', 'relative', 'relative', 'total'], ) # ## Default # In[4]: gggrid([ waterfall_plot(data1, 'x', 'y') + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m') + ggtitle("With measure", "Without absolute"), waterfall_plot(data3, 'x', 'y', measure='m') + ggtitle("With measure", "With absolute"), ]) # ## Parameters # ### Aesthetics # In[5]: # color gggrid([ waterfall_plot(data1, 'x', 'y', size=1, color="magenta") + ggtitle("Without measure"), waterfall_plot(data1, 'x', 'y', size=1, color='flow_type', fill="lightgrey") + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', size=1, color="magenta") + ggtitle("With measure"), waterfall_plot(data2, 'x', 'y', measure='m', size=1, color='flow_type', fill="lightgrey") + ggtitle("With measure"), ], ncol=2) # In[6]: # fill gggrid([ waterfall_plot(data1, 'x', 'y', fill="magenta") + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', fill="magenta") + ggtitle("With measure"), ]) # In[7]: # size gggrid([ waterfall_plot(data1, 'x', 'y', size=2) + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', size=2) + ggtitle("With measure"), ]) # In[8]: # alpha gggrid([ waterfall_plot(data1, 'x', 'y', alpha=.5) + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', alpha=.5) + ggtitle("With measure"), ]) # In[9]: # linetype gggrid([ waterfall_plot(data1, 'x', 'y', size=1, linetype='dashed') + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', size=1, linetype='dashed') + ggtitle("With measure"), ]) # In[10]: # width gggrid([ waterfall_plot(data1, 'x', 'y', width=.4) + ggtitle("With measure"), waterfall_plot(data2, 'x', 'y', measure='m', width=.4) + ggtitle("Without measure"), ]) # ### Standard parameters # In[11]: # show_legend gggrid([ waterfall_plot(data1, 'x', 'y', show_legend=True) + ggtitle("Without measure", "Default calc_total"), waterfall_plot(data1, 'x', 'y', show_legend=True, calc_total=False) + ggtitle("Without measure", "calc_total=False"), waterfall_plot(data2, 'x', 'y', measure='m', show_legend=True) + ggtitle("With measure", "Default calc_total, without absolute"), waterfall_plot(data2, 'x', 'y', measure='m', show_legend=True, calc_total=False) + ggtitle("With measure", "calc_total=False, without absolute"), waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True) + ggtitle("With measure", "Default calc_total, with absolute"), waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, calc_total=False) + ggtitle("With measure", "calc_total=False, with absolute"), ], ncol=2) # In[12]: # tooltips gggrid([ waterfall_plot(data1, 'x', 'y', relative_tooltips='none', absolute_tooltips='none') + \ ggtitle("Without measure", "relative_tooltips='none',\nabsolute_tooltips='none'"), waterfall_plot(data1, 'x', 'y', relative_tooltips='detailed', absolute_tooltips='detailed') + \ ggtitle("Without measure", "relative_tooltips='detailed',\nabsolute_tooltips='detailed'"), waterfall_plot(data1, 'x', 'y', relative_tooltips=layer_tooltips().line("@..dy..: from @..initial.. to @..value..").disable_splitting(), \ absolute_tooltips=layer_tooltips().line("@..flow_type..: @..value..").disable_splitting()) + \ ggtitle("Without measure", "Customized tooltips"), waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips='none', absolute_tooltips='none') + \ ggtitle("With measure", "relative_tooltips='none',\nabsolute_tooltips='none'"), waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips='detailed', absolute_tooltips='detailed') + \ ggtitle("With measure", "relative_tooltips='detailed',\nabsolute_tooltips='detailed'"), waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips=layer_tooltips().line("@..dy..: from @..initial.. to @..value..").disable_splitting(), \ absolute_tooltips=layer_tooltips().line("@..flow_type..: @..value..").disable_splitting()) + \ ggtitle("With measure", "Customized tooltips"), ], ncol=3) # ### Waterfall-specific parameters # In[13]: # sorted_value gggrid([ waterfall_plot(data1, 'x', 'y', sorted_value=True) + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', sorted_value=True) + ggtitle("With measure"), ]) # In[14]: # threshold gggrid([ waterfall_plot(data1, 'x', 'y', threshold=200) + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', threshold=200) + ggtitle("With measure"), ]) # In[15]: # max_values gggrid([ waterfall_plot(data1, 'x', 'y', max_values=2) + ggtitle("Without measure"), waterfall_plot(data2, 'x', 'y', measure='m', max_values=2) + ggtitle("With measure"), ]) # In[16]: # Use threshold to skip zeros data_with_zeros = dict( x=['a', 'b', 'c', 'd', 't', 'a', 'b', 't'], y=[1, -2, 3, 0, None, 0, 2, None], m=['relative', 'relative', 'relative', 'relative', 'total', 'relative', 'relative', 'total'], ) gggrid([ waterfall_plot(data_with_zeros, 'x', 'y', measure='m'), waterfall_plot(data_with_zeros, 'x', 'y', measure='m', threshold=0), ]) # In[17]: # calc_total gggrid([ waterfall_plot(data1, 'x', 'y') + ggtitle("Without measure", "Default"), waterfall_plot(data1, 'x', 'y', calc_total=False) + ggtitle("Without measure", "calc_total=False"), waterfall_plot(data2, 'x', 'y', measure='m') + ggtitle("With measure", "Default"), waterfall_plot(data2, 'x', 'y', measure='m', calc_total=False) + ggtitle("With measure", "calc_total=False"), ], ncol=2) # In[18]: # total_title gggrid([ waterfall_plot(data1, 'x', 'y', total_title="Result", show_legend=True, absolute_tooltips='detailed'), waterfall_plot(data2, 'x', 'y', measure='m', total_title="Result", show_legend=True, absolute_tooltips='detailed'), ]) # ### Control additional geometries # In[19]: # hline gggrid([ waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line()), waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(blank=True)), waterfall_plot(data2, 'x', 'y', measure='m', hline=element_blank()), waterfall_plot(data2, 'x', 'y', measure='m', hline='blank'), ], ncol=2) # In[20]: # hline_ontop waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(), hline_ontop=False) # In[21]: # hline color waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(color="magenta")) # In[22]: # hline size waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(size=2)) # In[23]: # hline linetype waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(linetype='solid')) # In[24]: # connector gggrid([ waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line()), waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(blank=True)), waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_blank()), waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector='blank'), ], ncol=2) # In[25]: # connector color waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(color="magenta")) # In[26]: # connector size waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(size=2)) # In[27]: # connector linetype waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(linetype='dotted')) # In[28]: # label gggrid([ waterfall_plot(data2, 'x', 'y', measure='m', label=element_text()), waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(blank=True)), waterfall_plot(data2, 'x', 'y', measure='m', label=element_blank()), waterfall_plot(data2, 'x', 'y', measure='m', label='blank'), ], ncol=2) # In[29]: # label color gggrid([ waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(color="yellow")), waterfall_plot(data2, 'x', 'y', measure='m', fill="lightgray", label=element_text(color='flow_type')) ]) # In[30]: # label family waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(family="Courier")) # In[31]: # label face waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(face='bold_italic')) # In[32]: # label size waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(size=10)) # In[33]: # label angle waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(angle=45)) # In[34]: # label hjust/vjust def get_waterfall_with_justified_labels(hjust, vjust): return waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(hjust=hjust, vjust=vjust)) + \ ggtitle("Justified labels", "hjust={0}, vjust={1}".format(hjust, vjust)) gggrid([ get_waterfall_with_justified_labels(0, 0), get_waterfall_with_justified_labels(0, 1), get_waterfall_with_justified_labels(1, 0), get_waterfall_with_justified_labels(1, 1), ], ncol=2) # In[35]: # label_format waterfall_plot(data2, 'x', 'y', measure='m', label_format="({.1f})") # In[36]: # group data_with_groups = { 'x': ['A', 'C', 'T1', 'A', 'B', 'C', 'T2'], 'y': [2, -1, None, 1, 3, -2, 0.0], 'm': ['absolute', 'relative', 'total', 'absolute', 'relative', 'relative', 'total'], 'g': ['a', 'a', 'a', 'b', 'b', 'b', 'b'], } waterfall_plot(data_with_groups, 'x', 'y', measure='m', group='g') + facet_wrap(facets='g', scales='free_x') # ## Other Customizations # In[37]: # fill and color gggrid([ waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, width=.7, size=1, color="#777777", label=element_text(color="#777777")) + \ scale_fill_manual({"Increase": "white", "Decrease": "black", "Absolute": "green", "Total": "yellow"}) + \ ggtitle("Custom scale_fill_manual()"), waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, width=.7, fill="black", label=element_text(color='flow_type')) + \ scale_color_manual({"Increase": "green", "Decrease": "yellow", "Absolute": "red", "Total": "#bbbbff"}) + \ ggtitle("Custom scale_color_manual()"), waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, width=.7, color="#777777", label=element_text(color="#777777")) + \ scale_fill_manual({"Increase": "green", "Decrease": "red", "Absolute": "cyan", "Total": "yellow"}, labels=["Up", "Down", "From zero", "Result"]) + \ ggtitle("Custom flow type names"), ], ncol=1) + ggsize(1000, 800) # In[38]: # flip coordinates waterfall_plot(data2, 'x', 'y', measure='m') + coord_flip() # In[39]: # custom theme waterfall_plot(data2, 'x', 'y', measure='m') + theme_bw() + flavor_darcula() # ## Tests # ### Boundary Value Analysis # In[40]: import numpy as np class BVATest: def __init__(self, data, title, show=True): self.data = data self.title = title self.show = show def to_plot(self): return gggrid([ waterfall_plot(self.data, 'x', 'y', measure='m', calc_total=True) + \ ggtitle(self.title, "calc_total=True"), waterfall_plot(self.data, 'x', 'y', measure='m', calc_total=False) + \ ggtitle(self.title, "calc_total=False"), ]) show_all = False bva_tests = [ BVATest( data=dict( x=[], y=[], m=[], ), title="Empty dataset", ), BVATest( data=dict( x=["A", "T"], y=[1, None], m=['relative', 'total'], ), title="One value dataset", ), BVATest( data=dict( x=["A", "A", "T"], y=[1, 2, None], m=['relative', 'relative', 'total'], ), title="Repeated categories", ), BVATest( data=dict( x=["A", "T"], y=[0, None], m=['relative', 'total'], ), title="Zero values", ), BVATest( data=dict( x=["A", "T"], y=[-1, None], m=['relative', 'total'], ), title="Negative values", ), BVATest( data=dict( x=[1, 0], y=[1, None], m=['relative', 'total'], ), title="Numeric x", ), BVATest( data=dict( x=["A", "B", "T"], y=[1, float('inf'), float('-inf')], m=['relative', 'relative', 'total'], ), title="Inf values", ), BVATest( data=dict( x=["A", "B", "C", None], y=[1, 1, None, 1], m=['relative', None, 'relative', 'total'], ), title="None values", ), BVATest( data=dict( x=["A", "B", "C", np.nan], y=[1, 1, np.nan, 1], m=['relative', np.nan, 'relative', 'total'], ), title="np.nan values", ), BVATest( data=dict( x=["A", "B", "T"], y=[1, -1, None], m=['relative', 'relative', 'total'], ), title="Total is zero", ), ] gggrid([ t.to_plot() for t in filter(lambda t: show_all or t.show, bva_tests) ], ncol=1) # ### Regression Testing # In[41]: # Total should be equal to [3, 4] (in labels and tooltips) tdata = dict( x=["A", "B", "T1", "A", "B", "T2"], y=[1, 2, None, -1, 2, 0], m=['relative', 'relative', 'total'] * 2 ) waterfall_plot(tdata, 'x', 'y', measure='m') # In[42]: # Change of hline properties shouldn't affect to the further plots. The same is for connector and label. gggrid([ waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(color="magenta")), waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line()), waterfall_plot(data2, 'x', 'y', measure='m', connector=element_line(color="magenta")), waterfall_plot(data2, 'x', 'y', measure='m', connector=element_line()), waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(color="black")), waterfall_plot(data2, 'x', 'y', measure='m', label=element_text()), ], ncol=2) # In[43]: # Change of total_title shouldn't affect to the further plots gggrid([ waterfall_plot(data1, 'x', 'y', total_title="Result"), waterfall_plot(data1, 'x', 'y'), ]) # In[44]: # xxx_tooltips='none' should disable tooltips gggrid([ waterfall_plot(data1, 'x', 'y', relative_tooltips='none', absolute_tooltips='none'), waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips='none', absolute_tooltips='none'), ]) # ## New Features # ### Param `base` # In[45]: gggrid([ waterfall_plot(data1, 'x', 'y') + ggtitle("Without measure", "Default"), waterfall_plot(data1, 'x', 'y', base=-100) + ggtitle("Without measure", "base=-100"), waterfall_plot(data2, 'x', 'y', measure='m') + ggtitle("With measure", "Default"), waterfall_plot(data2, 'x', 'y', measure='m', base=-100) + ggtitle("With measure", "base=-100"), ], ncol=2)