Lets-Plot Cheatbook

Preparation

Imports

In [1]:
from datetime import datetime
from io import BytesIO
import requests

import numpy as np
import pandas as pd
from PIL import Image
from scipy.stats import multivariate_normal

from lets_plot import *
from lets_plot.mapping import as_discrete
from lets_plot.bistro.corr import *
from lets_plot.geo_data import *
LetsPlot.setup_html()
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).

Functions

In [2]:
def plot_matrix(plots=[], width=400, height=300, columns=2):
    bunch = GGBunch()
    for i in range(len(plots)):
        row = int(i / columns)
        column = i % columns
        bunch.add_plot(plots[i], column * width, row * height, width, height)
    return bunch.show()

Data

In [3]:
mpg_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv')
mpg_df.head(3)
Out[3]:
Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy fl class
0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
In [4]:
class_df = mpg_df.groupby('class').hwy.agg(['min', 'median', 'max', 'count']).reset_index()
class_df.head(3)
Out[4]:
class min median max count
0 2seater 23 25.0 26 5
1 compact 23 27.0 44 47
2 midsize 23 27.0 32 41
In [5]:
fl_df = mpg_df.groupby(['cty', 'hwy']).fl.agg(pd.Series.mode).to_frame('fl').reset_index()
fl_df.head(3)
Out[5]:
cty hwy fl
0 9 12 e
1 11 14 [e, p]
2 11 15 r
In [6]:
economics_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/economics.csv', \
                           parse_dates=['date'])
economics_df = economics_df[economics_df.date > datetime(2000, 1, 1)]
economics_df.head(3)
Out[6]:
Unnamed: 0 date pce pop psavert uempmed unemploy
391 392 2000-02-01 6619.7 281190.0 4.8 6.1 5858
392 393 2000-03-01 6685.8 281409.0 4.5 6.0 5733
393 394 2000-04-01 6671.1 281653.0 5.0 6.1 5481
In [7]:
midwest_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/midwest.csv')
midwest_df.head()
Out[7]:
Unnamed: 0 PID county state area poptotal popdensity popwhite popblack popamerindian ... percollege percprof poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty inmetro category
0 1 561 ADAMS IL 0.052 66090 1270.961540 63917 1702 98 ... 19.631392 4.355859 63628 96.274777 13.151443 18.011717 11.009776 12.443812 0 AAR
1 2 562 ALEXANDER IL 0.014 10626 759.000000 7054 3496 19 ... 11.243308 2.870315 10529 99.087145 32.244278 45.826514 27.385647 25.228976 0 LHR
2 3 563 BOND IL 0.022 14991 681.409091 14477 429 35 ... 17.033819 4.488572 14235 94.956974 12.068844 14.036061 10.852090 12.697410 0 AAR
3 4 564 BOONE IL 0.017 30806 1812.117650 29344 127 46 ... 17.278954 4.197800 30337 98.477569 7.209019 11.179536 5.536013 6.217047 1 ALU
4 5 565 BROWN IL 0.018 5836 324.222222 5264 547 14 ... 14.475999 3.367680 4815 82.505140 13.520249 13.022889 11.143211 19.200000 0 AAR

5 rows × 29 columns

In [8]:
pop_df = midwest_df.groupby('state').poptotal.sum().to_frame('population').reset_index()
pop_df.head(3)
Out[8]:
state population
0 IL 11430602
1 IN 5544159
2 MI 9295297
In [9]:
states_df = geocode('state', pop_df.state, scope='US').get_boundaries(9)
states_df.head(3)
Out[9]:
state found name geometry
0 IL Illinois MULTIPOLYGON (((-89.00848 37.22158, -89.00574 ...
1 IN Indiana MULTIPOLYGON (((-86.07513 37.99400, -86.07788 ...
2 MI Michigan MULTIPOLYGON (((-90.31860 46.54564, -90.32135 ...
In [10]:
def generate_random_data(size=50, mean=[0, 0], cov=[[1, .5], [.5, 1]], seed=42):
    np.random.seed(seed)
    x = np.linspace(-1, 1, size)
    y = np.linspace(-1, 1, size)
    X, Y = np.meshgrid(x, y)
    Z = multivariate_normal(mean, cov).pdf(np.dstack((X, Y)))
    return pd.DataFrame({'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()})

random_df = generate_random_data()
random_df.head(3)
Out[10]:
x y z
0 -1.000000 -1.0 0.094354
1 -0.959184 -1.0 0.096849
2 -0.918367 -1.0 0.099189
In [11]:
response = requests.get('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/source/examples/cookbook/images/fisher_boat.png')
img = Image.open(BytesIO(response.content))
img
Out[11]:

Plots

In [12]:
p1 = ggplot(economics_df, aes('date', 'unemploy')) + scale_x_datetime()
p2 = ggplot(mpg_df, aes(x='hwy'))
p3 = ggplot(mpg_df, aes(x='fl'))
p4 = ggplot(mpg_df, aes('cty', 'hwy'))
p5 = ggplot(mpg_df, aes('class', 'hwy'))
p6 = ggplot(class_df, aes(x='class'))
p7 = ggplot(random_df, aes('x', 'y'))

Basics

In [13]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
    geom_point(aes(color='cyl')) + \
    geom_smooth(method='lm') + \
    scale_color_brewer(type='div', palette='Spectral') + \
    theme_classic() + \
    ggtitle("Simple linear smoothing")
Out[13]:

Features

Interactive Maps

In [14]:
ggplot() + \
    geom_livemap() + \
    geom_map(aes(color='population', fill='population'), \
             data=pop_df, map=states_df, map_join='state', size=1, alpha=.3) + \
    scale_color_gradient(low='#1a9641', high='#d7191c') + \
    scale_fill_gradient(low='#1a9641', high='#d7191c')
Out[14]:

Customizable Tooltips

In [15]:
ggplot(mpg_df, aes(x='fl', fill=as_discrete('year'))) + \
    geom_bar(tooltips=layer_tooltips().line('fl|^x')
                                      .line('@|@year')
                                      .line('count|@..count..'))
Out[15]:

Formatting

In [16]:
ggplot(economics_df, aes('date', 'unemploy')) + \
    geom_area(color='#253494', fill='#41b6c4') + \
    scale_x_datetime(format='%e %b %Y')
Out[16]:

Sampling

In [17]:
ggplot(mpg_df, aes('cty', 'hwy')) + \
    geom_point(aes(color=as_discrete('cyl')), sampling=sampling_group_random(2, seed=42))
Out[17]:

Images

In [18]:
ggplot() + \
    geom_imshow(np.asarray(img)) + \
    theme_classic()
Out[18]:

Correlation Plot

In [19]:
corr_plot(data=mpg_df, threshold=.5)\
    .points().labels()\
    .palette_gradient(low='#d7191c', mid='#ffffbf', high='#1a9641')\
    .build() + \
    ggsize(400, 400)
D:\anaconda3\envs\lets-plot-docs\lib\site-packages\lets_plot\bistro\corr.py:422: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  data = data.corr()
Out[19]:

Geoms

Graphical Primitives

For p1 value see the Plots section.

In [20]:
p1 + geom_path()
Out[20]:
In [21]:
ggplot() + geom_polygon(data=states_df)
Out[21]: