#!/usr/bin/env python
# coding: utf-8

# # Handling an overplotting on a scatter plot: `geom_count()`/`stat_sum()`
# 
# The `geom_count()` counts the number of observations at each location. 
# 
# Computed variables:
# 
# - `..n..` - number of observations at location
# - `..prop..` - value in range 0..1 : share of observations at location
# - `..proppct..` - value in range 0..100 : % of observations at location

# In[1]:


import pandas as pd

from lets_plot import *


# In[2]:


LetsPlot.setup_html() 


# In[3]:


mpg_df = pd.read_csv ("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
mpg_df.head()


# In[4]:


p = ggplot(mpg_df, aes(x=as_discrete('class', order=1), y=as_discrete('drv', order=1)))


# #### 1. Plot an Observation Count by Location

# In[5]:


p + geom_count()


# In[6]:


p + stat_sum()


# #### 2. Plot an Observations Share by Location 

# In[7]:


p + geom_count(aes(size='..prop..'))


# #### 3. Plot an Observations Share by Drivetrain Type within each Vehicle "class"
# 
# Note: group by "class".

# In[8]:


p + geom_count(aes(size='..prop..', group='class'))