#!/usr/bin/env python # coding: utf-8 # # Handling an overplotting on a scatter plot: `geom_count()`/`stat_sum()` # # The `geom_count()` counts the number of observations at each location. # # Computed variables: # # - `..n..` - number of observations at location # - `..prop..` - value in range 0..1 : share of observations at location # - `..proppct..` - value in range 0..100 : % of observations at location # In[1]: import pandas as pd from lets_plot import * # In[2]: LetsPlot.setup_html() # In[3]: mpg_df = pd.read_csv ("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv") mpg_df.head() # In[4]: p = ggplot(mpg_df, aes(x=as_discrete('class', order=1), y=as_discrete('drv', order=1))) # #### 1. Plot an Observation Count by Location # In[5]: p + geom_count() # In[6]: p + stat_sum() # #### 2. Plot an Observations Share by Location # In[7]: p + geom_count(aes(size='..prop..')) # #### 3. Plot an Observations Share by Drivetrain Type within each Vehicle "class" # # Note: group by "class". # In[8]: p + geom_count(aes(size='..prop..', group='class'))