from datetime import datetime
from io import BytesIO
import requests
import numpy as np
import pandas as pd
from PIL import Image
from scipy.stats import multivariate_normal
from lets_plot import *
from lets_plot.mapping import as_discrete
from lets_plot.bistro import *
from lets_plot.geo_data import *
LetsPlot.setup_html()
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
mpg_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv')
mpg_df.head(3)
Unnamed: 0 | manufacturer | model | displ | year | cyl | trans | drv | cty | hwy | fl | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | audi | a4 | 1.8 | 1999 | 4 | auto(l5) | f | 18 | 29 | p | compact |
1 | 2 | audi | a4 | 1.8 | 1999 | 4 | manual(m5) | f | 21 | 29 | p | compact |
2 | 3 | audi | a4 | 2.0 | 2008 | 4 | manual(m6) | f | 20 | 31 | p | compact |
class_df = mpg_df.groupby('class').hwy.agg(['min', 'median', 'max', 'count']).reset_index()
class_df.head(3)
class | min | median | max | count | |
---|---|---|---|---|---|
0 | 2seater | 23 | 25.0 | 26 | 5 |
1 | compact | 23 | 27.0 | 44 | 47 |
2 | midsize | 23 | 27.0 | 32 | 41 |
fl_df = mpg_df.groupby(['cty', 'hwy']).fl.agg(pd.Series.mode).to_frame('fl').reset_index()
fl_df.head(3)
cty | hwy | fl | |
---|---|---|---|
0 | 9 | 12 | e |
1 | 11 | 14 | [e, p] |
2 | 11 | 15 | r |
economics_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/economics.csv', \
parse_dates=['date'])
economics_df = economics_df[economics_df.date > datetime(2000, 1, 1)]
economics_df.head(3)
Unnamed: 0 | date | pce | pop | psavert | uempmed | unemploy | |
---|---|---|---|---|---|---|---|
391 | 392 | 2000-02-01 | 6619.7 | 281190.0 | 4.8 | 6.1 | 5858 |
392 | 393 | 2000-03-01 | 6685.8 | 281409.0 | 4.5 | 6.0 | 5733 |
393 | 394 | 2000-04-01 | 6671.1 | 281653.0 | 5.0 | 6.1 | 5481 |
midwest_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/midwest.csv')
midwest_df.head()
Unnamed: 0 | PID | county | state | area | poptotal | popdensity | popwhite | popblack | popamerindian | ... | percollege | percprof | poppovertyknown | percpovertyknown | percbelowpoverty | percchildbelowpovert | percadultpoverty | percelderlypoverty | inmetro | category | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 561 | ADAMS | IL | 0.052 | 66090 | 1270.961540 | 63917 | 1702 | 98 | ... | 19.631392 | 4.355859 | 63628 | 96.274777 | 13.151443 | 18.011717 | 11.009776 | 12.443812 | 0 | AAR |
1 | 2 | 562 | ALEXANDER | IL | 0.014 | 10626 | 759.000000 | 7054 | 3496 | 19 | ... | 11.243308 | 2.870315 | 10529 | 99.087145 | 32.244278 | 45.826514 | 27.385647 | 25.228976 | 0 | LHR |
2 | 3 | 563 | BOND | IL | 0.022 | 14991 | 681.409091 | 14477 | 429 | 35 | ... | 17.033819 | 4.488572 | 14235 | 94.956974 | 12.068844 | 14.036061 | 10.852090 | 12.697410 | 0 | AAR |
3 | 4 | 564 | BOONE | IL | 0.017 | 30806 | 1812.117650 | 29344 | 127 | 46 | ... | 17.278954 | 4.197800 | 30337 | 98.477569 | 7.209019 | 11.179536 | 5.536013 | 6.217047 | 1 | ALU |
4 | 5 | 565 | BROWN | IL | 0.018 | 5836 | 324.222222 | 5264 | 547 | 14 | ... | 14.475999 | 3.367680 | 4815 | 82.505140 | 13.520249 | 13.022889 | 11.143211 | 19.200000 | 0 | AAR |
5 rows × 29 columns
pop_df = midwest_df.groupby('state').poptotal.sum().to_frame('population').reset_index()
pop_df.head(3)
state | population | |
---|---|---|
0 | IL | 11430602 |
1 | IN | 5544159 |
2 | MI | 9295297 |
states_df = geocode('state', pop_df.state, scope='US').get_boundaries(9)
states_df.head(3)
state | found name | geometry | |
---|---|---|---|
0 | IL | Illinois | MULTIPOLYGON (((-89.00848 37.22158, -89.00574 ... |
1 | IN | Indiana | MULTIPOLYGON (((-86.07513 37.99400, -86.07788 ... |
2 | MI | Michigan | MULTIPOLYGON (((-90.31860 46.54564, -90.32135 ... |
def generate_random_data(size=50, mean=[0, 0], cov=[[1, .5], [.5, 1]], seed=42):
np.random.seed(seed)
x = np.linspace(-1, 1, size)
y = np.linspace(-1, 1, size)
X, Y = np.meshgrid(x, y)
Z = multivariate_normal(mean, cov).pdf(np.dstack((X, Y)))
return pd.DataFrame({'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()})
random_df = generate_random_data()
random_df.head(3)
x | y | z | |
---|---|---|---|
0 | -1.000000 | -1.0 | 0.094354 |
1 | -0.959184 | -1.0 | 0.096849 |
2 | -0.918367 | -1.0 | 0.099189 |
response = requests.get('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/source/examples/cookbook/images/fisher_boat.png')
img = Image.open(BytesIO(response.content))
img
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(color='cyl')) + \
geom_smooth(method='lm') + \
scale_color_brewer(type='div', palette='Spectral') + \
theme_classic() + \
ggtitle("Simple linear smoothing")
ggplot() + \
geom_livemap() + \
geom_map(aes(color='population', fill='population'), \
data=pop_df, map=states_df, map_join='state', size=1, alpha=.3) + \
scale_color_gradient(low='#1a9641', high='#d7191c') + \
scale_fill_gradient(low='#1a9641', high='#d7191c')