#!/usr/bin/env python
# coding: utf-8
# # Lets-Plot Cheatbook
# ## Contents
#
# - [Preparation](#preparation)
# - [Imports](#preparation-imports)
# - [Data](#preparation-data)
# - [Basics](#basics)
# - [Features](#features)
# - [Interactive Maps](#features-interactive_maps)
# - [Customizable Tooltips](#features-customizable_tooltips)
# - [Formatting](#features-formatting)
# - [Sampling](#features-sampling)
# - [Images](#features-images)
# - [Correlation Plot](#features-correlation_plot)
# - [Joint Plot](#features-joint_plot)
# - [Residual Plot](#features-residual_plot)
# - [Geoms](#geoms)
# - [Graphical Primitives](#geoms-graphical_primitives)
# - [Line Segments](#geoms-graphical_primitives-line_segments)
# - [One Variable](#geoms-one_variable)
# - [Continuous](#geoms-one_variable-continuous)
# - [Discrete](#geoms-one_variable-discrete)
# - [Two Variables](#geoms-two_variables)
# - [Both Continuous](#geoms-two_variables-both_continuous)
# - [One Discrete, One Continuous](#geoms-two_variables-one_discrete_one_continuous)
# - [Both Discrete](#geoms-two_variables-both_discrete)
# - [Continuous Bivariate Distribution](#geoms-two_variables-continuous_bivariate_distribution)
# - [Continuous Function](#geoms-two_variables-continuous_function)
# - [Visualizing Error](#geoms-two_variables-visualizing_error)
# - [Maps](#geoms-two_variables-maps)
# - [Three Variables](#geoms-three_variables)
# - [Stats](#stats)
# - [Identity](#stats-identity)
# - [One Variable](#stats-one_variable)
# - [Continuous](#stats-one_variable-continuous)
# - [Discrete](#stats-one_variable-discrete)
# - [Two Variables](#stats-two_variables)
# - [Both Continuous](#stats-two_variables-both_continuous)
# - [One Discrete, One Continuous](#stats-two_variables-one_discrete_one_continuous)
# - [Both Discrete](#stats-two_variables-both_discrete)
# - [Continuous Bivariate Distribution](#stats-two_variables-continuous_bivariate_distribution)
# - [Three Variables](#stats-three_variables)
# - [Scales](#scales)
# - [General Purpose Scales](#scales-general_purpose_scales)
# - [X & Y Location Scales](#scales-x_and_y_location_scales)
# - [Color & Fill Scales](#scales-color_and_fill_scales)
# - [Continuous](#scales-color_and_fill_scales-continuous)
# - [Discrete](#scales-color_and_fill_scales-discrete)
# - [Size & Shape Scales](#scales-size_and_shape_scales)
# - [Coordinate Systems](#coordinate_systems)
# - [Position Adjustments](#position_adjustments)
# - [Themes](#themes)
# - [Flavors](#flavors)
# - [Faceting](#faceting)
# - [Labels & Legends](#labels_and_legends)
# - [Zooming](#zooming)
# ## Preparation
# ### Imports
# In[1]:
from io import BytesIO
import requests
import numpy as np
import pandas as pd
from PIL import Image
from scipy.stats import multivariate_normal
from lets_plot import *
from lets_plot.mapping import as_discrete
from lets_plot.bistro import *
from lets_plot.geo_data import *
LetsPlot.setup_html()
# ### Data
# In[2]:
mpg_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv')
mpg_df.head(3)
# In[3]:
class_df = mpg_df.groupby('class').hwy.agg(['min', 'median', 'max', 'count']).reset_index()
class_df.head(3)
# In[4]:
fl_df = mpg_df.groupby(['cty', 'hwy']).fl.agg(pd.Series.mode).to_frame('fl').reset_index()
fl_df.head(3)
# In[5]:
economics_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/economics.csv', parse_dates=['date'])
economics_df.head(3)
# In[6]:
midwest_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/midwest.csv')
midwest_df.head()
# In[7]:
pop_df = midwest_df.groupby('state').poptotal.sum().to_frame('population').reset_index()
pop_df.head(3)
# In[8]:
states_df = geocode('state', pop_df.state, scope='US').get_boundaries(9)
states_df.head(3)
# In[9]:
def generate_random_data(size=50, mean=[0, 0], cov=[[1, .5], [.5, 1]], seed=42):
np.random.seed(seed)
x = np.linspace(-1, 1, size)
y = np.linspace(-1, 1, size)
X, Y = np.meshgrid(x, y)
Z = multivariate_normal(mean, cov).pdf(np.dstack((X, Y)))
return pd.DataFrame({'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()})
random_df = generate_random_data()
random_df.head(3)
# In[10]:
response = requests.get('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/source/examples/cookbook/images/fisher_boat.png')
img = Image.open(BytesIO(response.content))
img
# ## Basics
# In[11]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(color='cyl')) + \
geom_smooth(method='lm') + \
scale_color_brewer(type='div', palette='Spectral') + \
theme_classic() + \
ggtitle("Simple linear smoothing")
# ## Features
# ### Interactive Maps
# In[12]:
ggplot() + \
geom_livemap() + \
geom_map(aes(color='population', fill='population'), \
data=pop_df, map=states_df, map_join='state', size=1, alpha=.3) + \
scale_color_gradient(low='#1a9641', high='#d7191c') + \
scale_fill_gradient(low='#1a9641', high='#d7191c')
# ### Customizable Tooltips
# In[13]:
ggplot(mpg_df, aes(x='fl', fill=as_discrete('year'))) + \
geom_bar(tooltips=layer_tooltips().line('fl|^x')
.line('@|@year')
.line('count|@..count..'))
# ### Formatting
# In[14]:
ggplot(economics_df, aes('date', 'unemploy')) + \
geom_area(color='#253494', fill='#41b6c4') + \
scale_x_datetime(format='%e %b %Y')
# ### Sampling
# In[15]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(color=as_discrete('cyl')), sampling=sampling_group_random(2, seed=42))
# ### Images
# In[16]:
ggplot() + \
geom_imshow(np.asarray(img)) + \
theme_void()
# ### Correlation Plot
# In[17]:
corr_plot(data=mpg_df.select_dtypes(include=np.number), threshold=.5)\
.points().labels()\
.palette_gradient(low='#d7191c', mid='#ffffbf', high='#1a9641')\
.build() + \
ggsize(400, 400)
# ### Joint Plot
# In[18]:
joint_plot(data=mpg_df, x='cty', y='hwy')
# ### Residual Plot
# In[19]:
residual_plot(data=mpg_df, x='cty', y='hwy', size=5, alpha=.5, color_by='drv', marginal="dens:tr")
# ## Geoms
# ### Graphical Primitives
# In[20]:
ggplot(economics_df, aes('date', 'unemploy')) + scale_x_datetime() + \
geom_path()
# In[21]:
ggplot() + \
geom_polygon(data=states_df)
# In[22]:
ggplot() + \
geom_rect(xmin=0, xmax=1, ymin=0, ymax=1)
# In[23]:
ggplot(economics_df, aes('date', 'unemploy')) + scale_x_datetime() + \
geom_ribbon(aes(ymin=economics_df.unemploy - 900, ymax=economics_df.unemploy + 900))
# #### Line Segments
# In[24]:
ggplot() + \
geom_abline(slope=.5)
# In[25]:
ggplot() + \
geom_hline(yintercept=0)
# In[26]:
ggplot() + \
geom_vline(xintercept=0)
# In[27]:
ggplot() + \
geom_segment(x=0, y=0, xend=1, yend=1, arrow=arrow())
# In[28]:
ggplot() + \
geom_curve(x=0, y=0, xend=1, yend=1, curvature=0.3, arrow=arrow())
# In[29]:
ggplot() + \
geom_spoke(x=0, y=0, angle=0.64, radius=5)
# ### One Variable
# #### Continuous
# In[30]:
ggplot(mpg_df, aes(x='hwy')) + \
geom_area(stat='bin')
# In[31]:
ggplot(mpg_df, aes(x='hwy')) + \
geom_density()
# In[32]:
ggplot(mpg_df, aes(x='hwy')) + \
geom_freqpoly()
# In[33]:
ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram()
# In[34]:
ggplot(mpg_df, aes(x='hwy')) + \
geom_dotplot(stackratio=.5)
# In[35]:
ggplot(mpg_df, aes(sample='hwy')) + \
geom_qq() + \
geom_qq_line()
# #### Discrete
# In[36]:
ggplot(mpg_df, aes(x='fl')) + \
geom_bar()
# In[37]:
ggplot(mpg_df) + \
geom_pie(aes(fill='fl'))
# In[38]:
ggplot() + \
geom_function(aes(x='hwy'), data=mpg_df, fun=lambda t: t**.5)
# ### Two Variables
# #### Both Continuous
# In[39]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point()
# In[40]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_smooth()
# In[41]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_qq2() + \
geom_qq2_line()
# In[42]:
ggplot(fl_df, aes('cty', 'hwy')) + \
geom_text(aes(label='fl'))
# In[43]:
ggplot(fl_df, aes('cty', 'hwy')) + \
geom_label(aes(label='fl'))
# #### One Discrete, One Continuous
# In[44]:
ggplot(mpg_df, aes('class', 'hwy')) + \
geom_boxplot()
# In[45]:
ggplot(mpg_df, aes('hwy', 'class')) + \
geom_area_ridges()
# In[46]:
ggplot(mpg_df, aes('class', 'hwy')) + \
geom_violin()
# In[47]:
ggplot(mpg_df, aes('class', 'hwy')) + \
geom_ydotplot(stackratio=.5)
# In[48]:
ggplot(class_df, aes('class', 'count')) + \
geom_bar(stat='identity')
# #### Both Discrete
# In[49]:
ggplot(mpg_df, aes('fl', 'drv')) + \
geom_count()
# In[50]:
ggplot(mpg_df, aes('fl', 'drv')) + \
geom_jitter()
# #### Continuous Bivariate Distribution
# In[51]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_bin2d()
# In[52]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_density2d(aes(color='..group..'))
# In[53]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_density2df(aes(fill='..group..'))
# #### Continuous Function
# In[54]:
ggplot(economics_df, aes('date', 'unemploy')) + scale_x_datetime() + \
geom_area()
# In[55]:
ggplot(economics_df, aes('date', 'unemploy')) + scale_x_datetime() + \
geom_line()
# In[56]:
ggplot(economics_df, aes('date', 'unemploy')) + scale_x_datetime() + \
geom_step()
# #### Visualizing Error
# In[57]:
ggplot(class_df, aes(x='class')) + \
geom_crossbar(aes(ymin='min', y='median', ymax='max'))
# In[58]:
ggplot(class_df, aes(x='class')) + \
geom_errorbar(aes(ymin='min', ymax='max'))
# In[59]:
ggplot(class_df, aes(x='class')) + \
geom_linerange(aes(ymin='min', ymax='max'))
# In[60]:
ggplot(class_df, aes(x='class')) + \
geom_pointrange(aes(ymin='min', y='median', ymax='max'))
# #### Maps
# In[61]:
ggplot() + \
geom_map(data=states_df)
# ### Three Variables
# In[62]:
ggplot(random_df, aes('x', 'y')) + \
geom_contour(aes(z='z'))
# In[63]:
ggplot(random_df, aes('x', 'y')) + \
geom_contourf(aes(z='z'))
# In[64]:
ggplot(random_df, aes('x', 'y')) + \
geom_raster(aes(fill='z'))
# In[65]:
ggplot(random_df, aes('x', 'y')) + \
geom_tile(aes(fill='z'))
# ## Stats
# ### Identity
# In[66]:
p_bunch_1 = ggplot(mpg_df, aes('class', 'hwy')) + \
geom_bar() + \
ggtitle("Bar geom, default stat")
p_bunch_2 = ggplot(class_df, aes('class', 'count')) + \
geom_bar(stat='identity') + \
ggtitle("Bar geom, identity stat")
gggrid([p_bunch_1, p_bunch_2])
# ### One Variable
# #### Continuous
# In[67]:
ggplot(mpg_df, aes(x='hwy')) + \
stat_ecdf()
# #### Discrete
# In[68]:
p_bunch_1 = ggplot(mpg_df, aes(x='fl')) + \
geom_bar() + \
ggtitle("Bar geom, default stat")
p_bunch_2 = ggplot(mpg_df, aes(x='fl')) + \
geom_lollipop(aes(y='..count..'), stat='count') + \
ggtitle("Lollipop geom, count stat")
gggrid([p_bunch_1, p_bunch_2])
# In[69]:
p_bunch_1 = ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram() + \
ggtitle("Histogram geom, default stat")
p_bunch_2 = ggplot(mpg_df, aes(x='hwy')) + \
geom_step(aes(y='..count..'), stat='bin') + \
ggtitle("Step geom, bin stat")
gggrid([p_bunch_1, p_bunch_2])
# In[70]:
p_bunch_1 = ggplot(mpg_df, aes(x='hwy')) + \
geom_density() + \
ggtitle("Density geom, default stat")
p_bunch_2 = ggplot(mpg_df, aes(x='hwy')) + \
geom_point(stat='density') + \
ggtitle("Point geom, density stat")
gggrid([p_bunch_1, p_bunch_2])
# ### Two Variables
# #### Both Continuous
# In[71]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
stat_summary_bin()
# In[72]:
p_bunch_1 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_smooth() + \
ggtitle("Smooth geom, default stat")
p_bunch_2 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_crossbar(aes(y='hwy', ymin='..ymin..', ymax='..ymax..'), stat='smooth') + \
ggtitle("Crossbar geom, smooth stat")
gggrid([p_bunch_1, p_bunch_2])
# #### One Discrete, One Continuous
# In[73]:
ggplot(mpg_df, aes('class', 'hwy')) + \
stat_summary()
# In[74]:
p_bunch_1 = ggplot(mpg_df, aes('class', 'hwy')) + \
geom_boxplot() + \
ylim(10, 50) + \
ggtitle("Boxplot geom, default stat")
p_bunch_2 = ggplot(mpg_df, aes('class', 'hwy')) + \
geom_linerange(aes(ymin='..ymin..', ymax='..ymax..'), stat='boxplot', color='black') + \
geom_errorbar(aes(ymin='..lower..', ymax='..upper..'), stat='boxplot', width=.9) + \
ylim(10, 50) + \
ggtitle("Linerange and errorbar geoms, boxplot stat")
gggrid([p_bunch_1, p_bunch_2])
# #### Both Discrete
# In[75]:
ggplot(mpg_df, aes('fl', 'drv')) + \
stat_sum()
# #### Continuous Bivariate Distribution
# In[76]:
p_bunch_1 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_bin2d() + \
ggtitle("Bin2d geom, default stat")
p_bunch_2 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(color='..count..'), stat='bin2d') + \
ggtitle("Point geom, bin2d stat")
gggrid([p_bunch_1, p_bunch_2])
# In[77]:
p_bunch_1 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_density2d(aes(color='..group..')) + \
ggtitle("Density2d geom, default stat")
p_bunch_2 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_tile(aes(color='..group..'), stat='density2d', size=.5) + \
ggtitle("Tile geom, density2d stat")
gggrid([p_bunch_1, p_bunch_2])
# ### Three Variables
# In[78]:
p_bunch_1 = ggplot(random_df, aes('x', 'y')) + \
geom_contour(aes(z='z')) + \
ggtitle("Contour geom, default stat")
p_bunch_2 = ggplot(random_df, aes('x', 'y')) + \
geom_path(aes(z='z'), stat='contour') + \
ggtitle("Path geom, contour stat")
gggrid([p_bunch_1, p_bunch_2])
# ## Scales
# ### General Purpose Scales
# In[79]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(fill='fl'))
p_bunch_1 = p_common + \
ggtitle("Bar geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_continuous() + \
ggtitle("Bar geom, continuous fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[80]:
p_common = ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram(aes(fill='hwy'))
p_bunch_1 = p_common + \
ggtitle("Histogram geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_discrete(guide='none') + \
ggtitle("Histogram geom, discrete fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[81]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(alpha='fl'), color='#0c2c84', fill='#0c2c84')
p_bunch_1 = p_common + \
ggtitle("Bar geom, default alpha scale")
p_bunch_2 = p_common + \
scale_alpha_manual(values=[.4, .1, .8, .85, .9]) + \
ggtitle("Bar geom, manual alpha scale")
gggrid([p_bunch_1, p_bunch_2])
# In[82]:
p_common = ggplot(economics_df, aes('date', 'unemploy')) + \
scale_x_datetime() + \
geom_point(aes(size='psavert'), shape=21, alpha=.3, show_legend=False)
p_bunch_1 = p_common + \
ggtitle("Point geom, default size scale")
p_bunch_2 = p_common + \
scale_size_identity() + \
ggtitle("Point geom, identity size scale")
gggrid([p_bunch_1, p_bunch_2])
# ### X & Y Location Scales
# In[83]:
breaks = [economics_df.date.min(), economics_df.date.median(), economics_df.date.max()]
labels = [str(date).split('-')[0] for date in breaks]
p_common = ggplot(economics_df, aes('date', 'pce')) + geom_line()
p_bunch_1 = p_common + \
ggtitle("Line geom, default x scale")
p_bunch_2 = p_common + \
scale_x_datetime() + \
ggtitle("Line geom, datetime x scale")
p_bunch_3 = p_common + \
scale_x_time(breaks=breaks, labels=labels) + \
ggtitle("Line geom, time x scale")
gggrid([p_bunch_1, p_bunch_2, p_bunch_3])
# In[84]:
p_common = ggplot(midwest_df, aes('state', 'poptotal')) + \
geom_jitter(aes(color='state')) + \
coord_flip()
p_bunch_1 = p_common + \
ggtitle("Jitter geom, default y scale")
p_bunch_2 = p_common + \
scale_y_log10() + \
ggtitle("Jitter geom, log10 y scale")
gggrid([p_bunch_1, p_bunch_2])
# In[85]:
p_common = ggplot(economics_df, aes('date', 'pce')) + \
geom_line()
p_bunch_1 = p_common + \
ggtitle("Line geom, default x scale")
p_bunch_2 = p_common + \
scale_x_reverse() + \
ggtitle("Line geom, reversed x scale")
gggrid([p_bunch_1, p_bunch_2])
# ### Color & Fill Scales
# #### Continuous
# In[86]:
p_common = ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram(aes(fill='hwy'))
p_bunch_1 = p_common + \
ggtitle("Histogram geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_grey() + \
ggtitle("Histogram geom, grey fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[87]:
p_common = ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram(aes(fill='hwy'))
p_bunch_1 = p_common + \
ggtitle("Histogram geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_gradient(low='#006d2c', high='#edf8e9') + \
ggtitle("Histogram geom, gradient fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[88]:
p_common = ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram(aes(fill='hwy'))
p_bunch_1 = p_common + \
ggtitle("Histogram geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_brewer(type='seq', palette='GnBu', direction=-1) + \
ggtitle("Histogram geom, brewer fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[89]:
p_common = ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram(aes(fill='hwy'))
p_bunch_1 = p_common + \
ggtitle("Histogram geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_viridis(option='inferno') + \
ggtitle("Histogram geom, viridis fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[90]:
p_common = ggplot(mpg_df, aes(x='hwy')) + \
geom_histogram(aes(fill='hwy'))
p_bunch_1 = p_common + \
ggtitle("Histogram geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_hue(l=80, c=150) + \
ggtitle("Histogram geom, hue fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[91]:
p_common = ggplot(random_df, aes('x', 'y')) + \
geom_histogram(aes(fill='x'), bins=7)
p_bunch_1 = p_common + \
ggtitle("Histogram geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_gradient2(low='#4575b4', mid='#ffffbf', high='#d73027') + \
ggtitle("Histogram geom, gradient2 fill scale")
gggrid([p_bunch_1, p_bunch_2])
# #### Discrete
# In[92]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(fill='fl'))
p_bunch_1 = p_common + \
ggtitle("Bar geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_brewer(type='qual', palette='Set1') + \
ggtitle("Bar geom, brewer fill scale")
gggrid([p_bunch_1, p_bunch_2])
# In[93]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(fill='fl'))
p_bunch_1 = p_common + \
ggtitle("Bar geom, default fill scale")
p_bunch_2 = p_common + \
scale_fill_manual(values=['#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4', '#fed9a6']) + \
ggtitle("Bar geom, manual fill scale")
gggrid([p_bunch_1, p_bunch_2])
# ### Size & Shape Scales
# In[94]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(size='cyl'), shape=21, alpha=.2)
p_bunch_1 = p_common + \
ggtitle("Point geom, default size scale")
p_bunch_2 = p_common + \
scale_size_area() + \
ggtitle("Point geom, area size scale")
gggrid([p_bunch_1, p_bunch_2])
# In[95]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(size='cyl'), shape=21, alpha=.2)
p_bunch_1 = p_common + \
ggtitle("Point geom, default size scale")
p_bunch_2 = p_common + \
scale_size(range=[3, 6]) + \
ggtitle("Point geom, size scale in range 3..6")
gggrid([p_bunch_1, p_bunch_2])
# In[96]:
p_common = ggplot(mpg_df[mpg_df["fl"] == "p"], aes('hwy', 'cty')) + \
geom_lollipop(aes(linewidth='cyl'), slope=.7, intercept=.8, dir='s') + \
coord_fixed()
p_bunch_1 = p_common + \
ggtitle("Lollipop geom, default linewidth scale")
p_bunch_2 = p_common + \
scale_linewidth(range=[.5, 2]) + \
ggtitle("Lollipop geom, scaled linewidth")
gggrid([p_bunch_1, p_bunch_2])
# In[97]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(stroke='cyl'), shape=1, alpha=.2)
p_bunch_1 = p_common + \
ggtitle("Point geom, default stroke scale")
p_bunch_2 = p_common + \
scale_stroke(range=[.5, 2]) + \
ggtitle("Point geom, scaled stroke")
gggrid([p_bunch_1, p_bunch_2])
# In[98]:
p_common = ggplot(fl_df, aes('cty', 'hwy')) + \
geom_point(aes(shape='fl'))
p_bunch_1 = p_common + \
ggtitle("Point geom, default shape scale")
p_bunch_2 = p_common + \
scale_shape(solid=False) + \
ggtitle("Point geom, shape scale with solid=False")
gggrid([p_bunch_1, p_bunch_2])
# In[99]:
p_common = ggplot(fl_df, aes('cty', 'hwy')) + \
geom_point(aes(shape='fl'))
p_bunch_1 = p_common + \
ggtitle("Point geom, default shape scale")
p_bunch_2 = p_common + \
scale_shape_manual(values=[0, 12, 1, 10, 3, 13, 2, 4]) + \
ggtitle("Point geom, manual shape scale")
gggrid([p_bunch_1, p_bunch_2])
# ## Coordinate Systems
# In[100]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar()
p_bunch_1 = p_common + \
ggtitle("Bar geom, default coordinate system")
p_bunch_2 = p_common + \
coord_cartesian(ylim=[0, 250]) + \
ggtitle("Bar geom, cartesian coordinate system")
gggrid([p_bunch_1, p_bunch_2])
# In[101]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point()
p_bunch_1 = p_common + \
ggtitle("Point geom, default coordinate system")
p_bunch_2 = p_common + \
coord_polar() + \
ggtitle("Point geom, polar coordinate system")
gggrid([p_bunch_1, p_bunch_2])
# In[102]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point()
p_bunch_1 = p_common + \
ggtitle("Point geom, default coordinate system")
p_bunch_2 = p_common + \
coord_fixed() + \
ggtitle("Point geom, fixed coordinate system")
gggrid([p_bunch_1, p_bunch_2])
# In[103]:
p_common = ggplot() + \
geom_polygon(data=states_df)
p_bunch_1 = p_common + \
ggtitle("Polygon geom, default coordinate system")
p_bunch_2 = p_common + \
coord_map() + \
ggtitle("Polygon geom, map coordinate system")
gggrid([p_bunch_1, p_bunch_2])
# In[104]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar()
p_bunch_1 = p_common + \
ggtitle("Bar geom, default coordinate system")
p_bunch_2 = p_common + \
coord_flip() + \
ggtitle("Bar geom, flipped coordinates")
gggrid([p_bunch_1, p_bunch_2])
# ## Position Adjustments
# In[105]:
p_bunch_1 = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(fill='drv')) + \
ggtitle("Bar geom, default position")
p_bunch_2 = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(fill='drv'), position='dodge') + \
ggtitle("Bar geom, dodge position")
gggrid([p_bunch_1, p_bunch_2])
# In[106]:
p_bunch_1 = ggplot(mpg_df, aes(x='hwy')) + \
geom_density(aes(fill='drv'), color="black") + \
ggtitle("Density geom, default position")
p_bunch_2 = ggplot(mpg_df, aes(x='hwy')) + \
geom_density(aes(fill='drv'), color="black", position='stack') + \
ggtitle("Density geom, stack position")
p_bunch_3 = ggplot(mpg_df, aes(x='hwy')) + \
geom_density(aes(fill='drv'), color="black", position='fill') + \
ggtitle("Density geom, fill position")
gggrid([p_bunch_1, p_bunch_2, p_bunch_3])
# In[107]:
p_bunch_1 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point() + \
ggtitle("Point geom, default position")
p_bunch_2 = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(position='jitter') + \
ggtitle("Point geom, jitter position")
gggrid([p_bunch_1, p_bunch_2])
# In[108]:
p_common = ggplot(mpg_df, aes('cyl', 'hwy', fill='drv'))
p_bunch_1 = p_common + \
geom_boxplot() + \
geom_point(color='black', shape=21) + \
ggtitle("Point geom, default position")
p_bunch_2 = p_common + \
geom_boxplot() + \
geom_point(position='jitterdodge', \
color='black', shape=21) + \
ggtitle("Point geom, jitterdodge position")
gggrid([p_bunch_1, p_bunch_2])
# In[109]:
p_common = ggplot(mpg_df, aes('class', 'hwy')) + \
geom_bar()
p_bunch_1 = p_common + \
geom_text(aes(label='..count..'), stat='count') + \
ggtitle("Text geom, default position")
p_bunch_2 = p_common + \
geom_text(aes(label='..count..'), stat='count', color='white', \
position=position_nudge(y=-2.5)) + \
ggtitle("Text geom, nudge position")
gggrid([p_bunch_1, p_bunch_2])
# ## Themes
# In[110]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point()
p_bunch = []
p_bunch.append(p_common + ggtitle("Default theme"))
p_bunch.append(p_common + theme_none() + ggtitle("Empty theme"))
p_bunch.append(p_common + theme_void() + ggtitle("Void theme"))
p_bunch.append(p_common + theme_minimal() + ggtitle("Minimalistic theme"))
p_bunch.append(p_common + theme_classic() + ggtitle("Classic theme"))
p_bunch.append(p_common + theme_grey() + ggtitle("Grey theme"))
p_bunch.append(p_common + theme_light() + ggtitle("Light theme"))
p_bunch.append(p_common + theme_bw() + ggtitle("Dark-on-light theme"))
gggrid(p_bunch, ncol=2)
# In[111]:
background_color_light = '#ffffe5'
main_color_dark = '#00441b'
main_color_normal = '#238b45'
main_color_light = '#f7fcf5'
custom_theme = theme(
line=element_line(color=main_color_normal, size=2),
rect=element_rect(color=main_color_normal, fill=main_color_light, size=2),
text=element_text(color=main_color_dark, family="Courier", face="bold"),
geom=element_geom(pen=main_color_normal),
axis_ontop=True,
axis_ticks=element_line(color=main_color_normal, size=1),
axis_ticks_length=7,
legend_background=element_rect(size=1),
legend_position='bottom',
panel_grid_major=element_line(color=main_color_normal, size=.5),
panel_grid_minor='blank',
plot_background=element_rect(fill=background_color_light, size=1),
axis_tooltip=element_rect(color=main_color_dark)
)
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point() + \
theme_none() + \
custom_theme + \
ggtitle("Custom theme")
# ## Flavors
# In[112]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point() + \
theme_grey()
p_bunch = []
p_bunch.append(p_common + ggtitle("Without flavor"))
p_bunch.append(p_common + flavor_darcula() + ggtitle("flavor_darcula()"))
p_bunch.append(p_common + flavor_solarized_light() + ggtitle("flavor_solarized_light()"))
p_bunch.append(p_common + flavor_solarized_dark() + ggtitle("flavor_solarized_dark()"))
p_bunch.append(p_common + flavor_high_contrast_light() + ggtitle("flavor_high_contrast_light()"))
p_bunch.append(p_common + flavor_high_contrast_dark() + ggtitle("flavor_high_contrast_dark()"))
gggrid(p_bunch, ncol=2)
# ## Faceting
# In[113]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point() + \
facet_grid(x='fl', y='year')
# In[114]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point() + \
facet_wrap(facets='fl', ncol=3)
# ## Labels & Legends
# In[115]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point()
p_bunch_1 = p_common + \
ggtitle("Default plot")
p_bunch_2 = p_common + \
labs(x='City miles per gallon', y='Highway miles per gallon') + \
ggtitle("Use labs()")
gggrid([p_bunch_1, p_bunch_2])
# In[116]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point()
p_bunch_1 = p_common + \
ggtitle("Default plot")
p_bunch_2 = p_common + \
xlab('City miles per gallon') + \
ylab('Highway miles per gallon') + \
ggtitle("Use xlab() and ylab()")
gggrid([p_bunch_1, p_bunch_2])
# In[117]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(fill='fl'))
p_bunch_1 = p_common + \
ggtitle("Default plot")
p_bunch_2 = p_common + \
theme(legend_position='top') + \
ggtitle("Use legend_position='top'")
gggrid([p_bunch_1, p_bunch_2])
# In[118]:
p_common = ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(color='hwy'))
p_bunch_1 = p_common + \
ggtitle("Default plot")
p_bunch_2 = p_common + \
scale_color_gradient(
guide=guide_colorbar(nbin=40, barwidth=10, barheight=200)
) + \
ggtitle("Use guide_colorbar()")
gggrid([p_bunch_1, p_bunch_2])
# In[119]:
p_common = ggplot(mpg_df, aes(x='fl')) + \
geom_bar(aes(fill='manufacturer')) + \
theme(legend_position='bottom')
p_bunch_1 = p_common + \
ggtitle("Default plot")
p_bunch_2 = p_common + \
scale_fill_discrete(guide=guide_legend(nrow=3)) + \
ggtitle("Use guide_legend()")
gggrid([p_bunch_1, p_bunch_2])
# ## Zooming
# In[120]:
p_common = ggplot() + \
geom_map(data=states_df) + \
theme_classic()
p_bunch_1 = p_common + \
ggtitle("Default plot")
p_bunch_2 = p_common + \
scale_x_continuous(limits=[-92, -82]) + \
ylim(36, 43) + \
ggtitle("Zoom with clipping")
p_bunch_3 = p_common + \
coord_map(xlim=[-92, -82], ylim=[36, 43]) + \
ggtitle("Zoom without clipping")
gggrid([p_bunch_1, p_bunch_2, p_bunch_3])