#!/usr/bin/env python # coding: utf-8 # # Dot Plots # ## Preparation # In[1]: import numpy as np import pandas as pd from lets_plot import * from lets_plot.mapping import as_discrete LetsPlot.setup_html() # In[2]: def plot_matrix(plots=[], width=500, height=375, columns=2): bunch = GGBunch() for i in range(len(plots)): row = int(i / columns) column = i % columns bunch.add_plot(plots[i], column * width, row * height, width, height) return bunch.show() # In[3]: df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv") df = df.drop(columns=["Unnamed: 0"]) df = df.sample(n=50, random_state=42, ignore_index=True) df.head() # In[4]: DEF_BIN_COUNT = 15 def get_binwidth(df, column, binwidth_ratio=1.0/DEF_BIN_COUNT): return binwidth_ratio * (df[column].max() - df[column].min()) def get_bincount(df, column, binwidth): return int(round((df[column].max() - df[column].min()) / binwidth)) nice_binwidth = round(get_binwidth(df, "hwy"), 3) print("Nice binwidth value for examples: {0}".format(nice_binwidth)) # ## Default # In[5]: ggplot(df, aes(x="hwy")) + geom_dotplot() + ggtitle("geom_dotplot() example") # In[6]: ggplot(df, aes(y="hwy")) + geom_ydotplot() + ggtitle("geom_ydotplot() example") # ## Comparison of stats and geoms # In[7]: ggplot(df, aes(x="hwy")) + \ geom_area(stat='density') + \ geom_dotplot(binwidth=nice_binwidth, color='black', fill='white') + \ ggtitle("density + dotplot (method='dotdensity')") # In[8]: ggplot(df, aes(x="hwy")) + \ geom_histogram(binwidth=nice_binwidth, color='white') + \ geom_dotplot(binwidth=nice_binwidth, method='histodot', \ color='black', fill='white') + \ coord_fixed(ratio=nice_binwidth) + \ ggtitle("histogram + dotplot (method='histodot')") # In[9]: ggplot(df, aes("drv", "hwy")) + \ geom_violin() + \ geom_ydotplot(binwidth=nice_binwidth, stackratio=.5, \ color='black', fill='white') + \ ggtitle("violin + ydotplot (method='dotdensity')") # ## Parameters # ### `binwidth` # In[10]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot() + ggtitle("Default binwidth") p2 = p + geom_dotplot(binwidth=nice_binwidth) + ggtitle("binwidth={0}".format(nice_binwidth)) plot_matrix([p1, p2]) # In[11]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot() + ggtitle("Default binwidth") p2 = p + geom_ydotplot(binwidth=nice_binwidth) + ggtitle("binwidth={0}".format(nice_binwidth)) plot_matrix([p1, p2]) # ### `stackdir` # In[12]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot(stackdir='up') + ggtitle("stackdir='up' (default)") p2 = p + geom_dotplot(stackdir='down') + ggtitle("stackdir='down'") p3 = p + geom_dotplot(stackdir='center') + ggtitle("stackdir='center'") p4 = p + geom_dotplot(stackdir='centerwhole') + ggtitle("stackdir='centerwhole'") plot_matrix([p1, p2, p3, p4]) # In[13]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot(stackdir='left') + \ ggtitle("stackdir='left'") p2 = p + geom_ydotplot(stackdir='right') + \ ggtitle("stackdir='right'") p3 = p + geom_ydotplot(stackdir='center') + \ ggtitle("stackdir='center' (default)") p4 = p + geom_ydotplot(stackdir='centerwhole') + \ ggtitle("stackdir='centerwhole'") plot_matrix([p1, p2, p3, p4]) # ### `stackratio` # In[14]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot() + ggtitle("stackratio=1.0 (default)") p2 = p + geom_dotplot(stackratio=0.5) + ggtitle("stackratio=0.5") p3 = p + geom_dotplot(stackratio=1.5) + ggtitle("stackratio=1.5") plot_matrix([p1, p2, p3]) # In[15]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot() + ggtitle("stackratio=1.0 (default)") p2 = p + geom_ydotplot(stackratio=0.5) + ggtitle("stackratio=0.5") p3 = p + geom_ydotplot(stackratio=1.5) + ggtitle("stackratio=1.5") plot_matrix([p1, p2, p3]) # ### `dotsize` # In[16]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot() + ggtitle("dotsize=1.0 (default)") p2 = p + geom_dotplot(dotsize=0.5) + ggtitle("dotsize=0.5") p3 = p + geom_dotplot(dotsize=1.5) + ggtitle("dotsize=1.5") plot_matrix([p1, p2, p3]) # In[17]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot() + ggtitle("dotsize=1.0 (default)") p2 = p + geom_ydotplot(dotsize=0.5) + ggtitle("dotsize=0.5") p3 = p + geom_ydotplot(dotsize=1.5) + ggtitle("dotsize=1.5") plot_matrix([p1, p2, p3]) # ### `center` # In[18]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot(method='histodot') + ggtitle("Default") p2 = p + geom_dotplot(method='histodot', center=12.0) + ggtitle("center=12.0") plot_matrix([p1, p2]) # In[19]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default") p2 = p + geom_ydotplot(method='histodot', center=12.0) + ggtitle("center=12.0") plot_matrix([p1, p2]) # ### `boundary` # In[20]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot(method='histodot') + ggtitle("Default") p2 = p + geom_dotplot(method='histodot', boundary=11.0) + ggtitle("boundary=11.0") plot_matrix([p1, p2]) # In[21]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default") p2 = p + geom_ydotplot(method='histodot', boundary=11.0) + ggtitle("boundary=11.0") plot_matrix([p1, p2]) # ### `bins` # In[22]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot(method='histodot') + ggtitle("Default") p2 = p + geom_dotplot(method='histodot', bins=20) + ggtitle("bins=20") plot_matrix([p1, p2]) # In[23]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default") p2 = p + geom_ydotplot(method='histodot', bins=20) + ggtitle("bins=20") plot_matrix([p1, p2]) # ## Grouping # In[24]: ggplot(df, aes(x="hwy")) + \ geom_dotplot(aes(fill=as_discrete("drv")), color='black') + \ ggtitle("method='dotdensity'") # In[25]: p = ggplot(df, aes(x="hwy")) p1 = p + geom_dotplot(aes(fill=as_discrete("drv")), method='histodot', color='black') + \ ggtitle("method='histodot', stackgroups=False (default)") p2 = p + geom_dotplot(aes(fill=as_discrete("drv")), method='histodot', \ stackgroups=True, color='black') + \ ggtitle("method='histodot', stackgroups=True") plot_matrix([p1, p2]) # In[26]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), color='black') + \ ggtitle("method='dotdensity', stackgroups=False (default)") p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), stackgroups=True, color='black') + \ ggtitle("method='dotdensity', stackgroups=True") plot_matrix([p1, p2]) # In[27]: p = ggplot(df, aes("drv", "hwy")) p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), \ method='histodot', color='black') + \ ggtitle("method='histodot', stackgroups=False (default)") p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), \ method='histodot', stackgroups=True, color='black') + \ ggtitle("method='histodot', stackgroups=True") plot_matrix([p1, p2]) # ## Tooltips # In[28]: ggplot(df, aes(x="hwy")) + \ geom_dotplot(tooltips=layer_tooltips().line("Stack center|^x")\ .line("Number of dots in stack|@..count..")\ .format('@..binwidth..', ".3f")\ .line("Width of the bin|@..binwidth..")) + \ ggtitle("With tooltips") # In[29]: ggplot(df, aes("drv", "hwy")) + \ geom_ydotplot(tooltips=layer_tooltips().line("^x")\ .line("Stack center|^y")\ .line("Number of dots in stack|@..count..")\ .format('@..binwidth..', ".3f")\ .line("Width of the bin|@..binwidth..")) + \ ggtitle("With tooltips") # ## Facetting # In[30]: ggplot(df, aes(x="hwy")) + \ geom_dotplot(aes(fill=as_discrete("year")), color='black') + \ facet_grid(x="year") + \ ggtitle("facet_grid()") # In[31]: ggplot(df, aes("drv", "hwy")) + \ geom_ydotplot(aes(fill="drv"), color='black') + \ facet_grid(x="year") + \ ggtitle("facet_grid()") # ## Flip coordinates # In[32]: ggplot(df, aes(x="hwy")) + \ geom_dotplot() + \ coord_flip() + \ ggtitle("Flip coordinates") # In[33]: ggplot(df, aes("drv", "hwy")) + \ geom_ydotplot(aes(fill="drv"), color='black') + \ coord_flip() + \ ggtitle("Flip coordinates") # ## Other layers # In[34]: ggplot(df, aes(x="hwy")) + \ geom_dotplot(aes(fill=as_discrete("year")), method='histodot', \ bins=15, stackdir='centerwhole', \ stackratio=.75, dotsize=.75, \ color='black', alpha=.5, size=1) + \ theme_grey() + \ ggtitle("Some additional aesthetics, parameters and layers") # In[35]: ggplot(df, aes("drv", "hwy")) + \ geom_ydotplot(aes(fill=as_discrete("drv")), method='histodot', \ bins=15, stackdir='center', stackratio=.75, \ color='black', alpha=.5, size=1) + \ theme_grey() + \ ggtitle("Some additional aesthetics, parameters and layers")