#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd from lets_plot import * from lets_plot.mapping import as_discrete LetsPlot.setup_html() # In[2]: QUANTILES = [.25, .5, .75] # In[3]: def plot_matrix(plots=[], width=400, height=300, columns=2): bunch = GGBunch() for i in range(len(plots)): row = int(i / columns) column = i % columns bunch.add_plot(plots[i], column * width, row * height, width, height) return bunch.show() # In[4]: mpg_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv") mpg_df.head() # ## Minimalistic example # In[5]: ggplot(mpg_df, aes(y='hwy')) + geom_violin() + ggtitle("Simplest example") # ## Comparison of geoms # In[6]: p_d = ggplot(mpg_df) + \ geom_density(aes(x='hwy', fill='drv'), color='black', alpha=.5) + \ facet_grid(x='drv') + \ coord_flip() + \ ggtitle("geom_density()") p_v = ggplot(mpg_df, aes(x=as_discrete('drv', order=1), y='hwy')) + \ geom_violin(aes(fill='drv'), alpha=.5) + \ ggtitle("geom_violin()") plot_matrix([p_d, p_v]) # ## Original parameters # ### `quantiles` # In[7]: tests = [ {'quantiles': None}, {'quantiles': []}, {'quantiles': [.05, .5, .95]}, {'quantiles': [.25]}, {'quantiles': [0, .5, 1]}, ] plot_matrix([ ggplot(mpg_df, aes('drv', 'hwy')) + \ geom_violin(quantiles=test['quantiles'], quantile_lines=True) + \ ggtitle("quantiles={0}".format(test['quantiles'])) for test in tests ]) # ### `scale` # In[8]: tests = [ {'scale': None}, {'scale': 'area'}, {'scale': 'count'}, {'scale': 'width'}, ] plot_matrix([ ggplot(mpg_df, aes('drv', 'hwy')) + \ geom_violin(scale=test['scale'], \ quantiles=QUANTILES, quantile_lines=True) + \ ggtitle("scale={0}".format(test['scale'])) for test in tests ]) # ### `trim` and `tails_cutoff` # In[9]: tests = [ {'trim': True, 'tails_cutoff': None}, {'trim': False, 'tails_cutoff': 1}, {'trim': False, 'tails_cutoff': 2}, {'trim': False, 'tails_cutoff': 3}, ] plot_matrix([ ggplot(mpg_df, aes('drv', 'hwy')) + \ geom_violin(trim=test['trim'], tails_cutoff=test['tails_cutoff']) + \ ggtitle("trim={0}, tails_cutoff={1}".format(test['trim'], test['tails_cutoff'])) for test in tests ]) # ### `show_half` # In[10]: ggplot(mpg_df) + \ geom_violin(aes(x='drv', y='hwy'), show_half=-1, fill="#66c2a5") + \ geom_violin(aes(x='drv', y='cty'), show_half=1, fill="#fc8d62") + \ ylab('hwy/cty') + \ ggtitle("hwy is green and cty is orange") # ## Custom density parameters # In[11]: p = ggplot(mpg_df, aes('drv', 'hwy')) p_default = p + geom_violin(quantiles=QUANTILES, quantile_lines=True) + \ ggtitle("Default") p_kernel = p + geom_violin(quantiles=QUANTILES, kernel='epanechikov', quantile_lines=True) + \ ggtitle("kernel='epanechikov'") p_bw = p + geom_violin(quantiles=QUANTILES, bw=.1, quantile_lines=True) + \ ggtitle("bw=0.1") p_adjust = p + geom_violin(quantiles=QUANTILES, adjust=2, quantile_lines=True) + \ ggtitle("adjust=2") plot_matrix([p_default, p_kernel, p_bw, p_adjust]) # ## Grouping and tooltips # In[12]: ggplot(mpg_df, aes(x='drv', y='hwy')) + \ geom_violin(aes(group='year', fill=as_discrete('year')), \ quantiles=QUANTILES, quantile_lines=True, \ tooltips=layer_tooltips().line('^x') .line('year|@year') .line('hwy|@hwy') .line('violinwidth|@..violinwidth..') .line('density|@..density..') .line('count|@..count..') .line('scaled|@..scaled..')) + \ ggtitle("Grouping and tooltips") # ## Facets # In[13]: ggplot(mpg_df, aes(x='drv', y='hwy')) + \ geom_violin(aes(fill=as_discrete('year')), \ quantiles=QUANTILES, quantile_lines=True) + \ facet_grid(y='year') # ## `coord_flip()` # In[14]: ggplot(mpg_df, aes('drv', 'hwy')) + \ geom_violin(quantiles=QUANTILES, quantile_lines=True) + \ coord_flip() + \ ggtitle("Use coord_flip()") # ## Additional layers # In[15]: ggplot(mpg_df, aes(as_discrete('drv', order=-1), 'hwy')) + \ geom_violin(aes(color='drv', fill='drv'), alpha=.5, size=2, \ n=8, quantiles=QUANTILES, quantile_lines=True, sampling=sampling_group_systematic(2)) + \ scale_y_continuous(breaks=list(range(12, 29, 2))) + \ ylim(12, 28) + \ coord_fixed(ratio=.2) + \ theme_grey() + \ ggtitle("Some additional aesthetics, parameters and layers") # In[16]: quartiles = [1/4, 2/4, 3/4] ggplot(mpg_df, aes(x='drv', y='hwy')) + \ geom_violin(quantiles=quartiles, quantile_lines=True) + \ geom_boxplot(width=.1)