from datetime import datetime
from io import BytesIO
import requests
import numpy as np
import pandas as pd
from PIL import Image
from scipy.stats import multivariate_normal
from lets_plot import *
from lets_plot.mapping import as_discrete
from lets_plot.bistro.corr import *
from lets_plot.geo_data import *
LetsPlot.setup_html()
def plot_matrix(plots=[], width=400, height=300, columns=2):
bunch = GGBunch()
for i in range(len(plots)):
row = int(i / columns)
column = i % columns
bunch.add_plot(plots[i], column * width, row * height, width, height)
return bunch.show()
mpg_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv')
mpg_df.head(3)
class_df = mpg_df.groupby('class').hwy.agg(['min', 'median', 'max', 'count']).reset_index()
class_df.head(3)
fl_df = mpg_df.groupby(['cty', 'hwy']).fl.agg(pd.Series.mode).to_frame('fl').reset_index()
fl_df.head(3)
economics_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/economics.csv', \
parse_dates=['date'])
economics_df = economics_df[economics_df.date > datetime(2000, 1, 1)]
economics_df.head(3)
midwest_df = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/midwest.csv')
midwest_df.head()
pop_df = midwest_df.groupby('state').poptotal.sum().to_frame('population').reset_index()
pop_df.head(3)
states_df = geocode('state', pop_df.state, scope='US').get_boundaries(9)
states_df.head(3)
def generate_random_data(size=50, mean=[0, 0], cov=[[1, .5], [.5, 1]], seed=42):
np.random.seed(seed)
x = np.linspace(-1, 1, size)
y = np.linspace(-1, 1, size)
X, Y = np.meshgrid(x, y)
Z = multivariate_normal(mean, cov).pdf(np.dstack((X, Y)))
return pd.DataFrame({'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()})
random_df = generate_random_data()
random_df.head(3)
response = requests.get('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/source/examples/cookbook/images/fisher_boat.png')
img = Image.open(BytesIO(response.content))
img
p1 = ggplot(economics_df, aes('date', 'unemploy')) + scale_x_datetime()
p2 = ggplot(mpg_df, aes(x='hwy'))
p3 = ggplot(mpg_df, aes(x='fl'))
p4 = ggplot(mpg_df, aes('cty', 'hwy'))
p5 = ggplot(mpg_df, aes('class', 'hwy'))
p6 = ggplot(class_df, aes(x='class'))
p7 = ggplot(random_df, aes('x', 'y'))
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(color='cyl')) + \
geom_smooth(method='lm') + \
scale_color_brewer(type='div', palette='Spectral') + \
theme_classic() + \
ggtitle("Simple linear smoothing")
ggplot() + \
geom_livemap() + \
geom_map(aes(color='population', fill='population'), \
data=pop_df, map=states_df, map_join='state', size=1, alpha=.3) + \
scale_color_gradient(low='#1a9641', high='#d7191c') + \
scale_fill_gradient(low='#1a9641', high='#d7191c')
ggplot(mpg_df, aes(x='fl', fill=as_discrete('year'))) + \
geom_bar(tooltips=layer_tooltips().line('fl|^x')
.line('@|@year')
.line('count|@..count..'))
ggplot(economics_df, aes('date', 'unemploy')) + \
geom_area(color='#253494', fill='#41b6c4') + \
scale_x_datetime(format='%e %b %Y')
ggplot(mpg_df, aes('cty', 'hwy')) + \
geom_point(aes(color=as_discrete('cyl')), sampling=sampling_group_random(2, seed=42))
ggplot() + \
geom_image(np.asarray(img)) + \
theme_classic()
corr_plot(data=mpg_df, threshold=.5)\
.points().labels()\
.palette_gradient(low='#d7191c', mid='#ffffbf', high='#1a9641')\
.build() + \
ggsize(400, 400)
p1 + geom_path()
ggplot() + geom_polygon(data=states_df)