import numpy as np
import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()
def plot_matrix(plots=[], width=500, height=375, columns=2):
bunch = GGBunch()
for i in range(len(plots)):
row = int(i / columns)
column = i % columns
bunch.add_plot(plots[i], column * width, row * height, width, height)
return bunch.show()
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
df = df.drop(columns=["Unnamed: 0"])
df = df.sample(n=50, random_state=42, ignore_index=True)
df.head()
manufacturer | model | displ | year | cyl | trans | drv | cty | hwy | fl | class | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | dodge | ram 1500 pickup 4wd | 4.7 | 2008 | 8 | manual(m6) | 4 | 9 | 12 | e | pickup |
1 | toyota | toyota tacoma 4wd | 4.0 | 2008 | 6 | auto(l5) | 4 | 16 | 20 | r | pickup |
2 | toyota | camry | 2.2 | 1999 | 4 | auto(l4) | f | 21 | 27 | r | midsize |
3 | audi | a4 quattro | 2.0 | 2008 | 4 | manual(m6) | 4 | 20 | 28 | p | compact |
4 | jeep | grand cherokee 4wd | 4.7 | 2008 | 8 | auto(l5) | 4 | 14 | 19 | r | suv |
DEF_BIN_COUNT = 15
def get_binwidth(df, column, binwidth_ratio=1.0/DEF_BIN_COUNT):
return binwidth_ratio * (df[column].max() - df[column].min())
def get_bincount(df, column, binwidth):
return int(round((df[column].max() - df[column].min()) / binwidth))
nice_binwidth = round(get_binwidth(df, "hwy"), 3)
print("Nice binwidth value for examples: {0}".format(nice_binwidth))
Nice binwidth value for examples: 1.667
ggplot(df, aes(x="hwy")) + geom_dotplot() + ggtitle("geom_dotplot() example")
ggplot(df, aes(y="hwy")) + geom_ydotplot() + ggtitle("geom_ydotplot() example")
PACIFIC_BLUE = '#118ed8'
ggplot(df, aes(x="hwy")) + \
geom_area(stat='density', fill=PACIFIC_BLUE) + \
geom_dotplot(binwidth=nice_binwidth, color=PACIFIC_BLUE, fill='white') + \
ggtitle("density + dotplot (method='dotdensity')")
ggplot(df, aes(x="hwy")) + \
geom_histogram(binwidth=nice_binwidth, color='white') + \
geom_dotplot(binwidth=nice_binwidth, method='histodot', \
color=PACIFIC_BLUE, fill='white') + \
coord_fixed(ratio=nice_binwidth) + \
ggtitle("histogram + dotplot (method='histodot')")
ggplot(df, aes("drv", "hwy")) + \
geom_violin(fill=PACIFIC_BLUE, size=0) + \
geom_ydotplot(binwidth=nice_binwidth, stackratio=.5, \
color=PACIFIC_BLUE, fill='white') + \
ggtitle("violin + ydotplot (method='dotdensity')")
binwidth
¶p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot() + ggtitle("Default binwidth")
p2 = p + geom_dotplot(binwidth=nice_binwidth) + ggtitle("binwidth={0}".format(nice_binwidth))
plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot() + ggtitle("Default binwidth")
p2 = p + geom_ydotplot(binwidth=nice_binwidth) + ggtitle("binwidth={0}".format(nice_binwidth))
plot_matrix([p1, p2])
stackdir
¶p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(stackdir='up') + ggtitle("stackdir='up' (default)")
p2 = p + geom_dotplot(stackdir='down') + ggtitle("stackdir='down'")
p3 = p + geom_dotplot(stackdir='center') + ggtitle("stackdir='center'")
p4 = p + geom_dotplot(stackdir='centerwhole') + ggtitle("stackdir='centerwhole'")
plot_matrix([p1, p2, p3, p4])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(stackdir='left') + \
ggtitle("stackdir='left'")
p2 = p + geom_ydotplot(stackdir='right') + \
ggtitle("stackdir='right'")
p3 = p + geom_ydotplot(stackdir='center') + \
ggtitle("stackdir='center' (default)")
p4 = p + geom_ydotplot(stackdir='centerwhole') + \
ggtitle("stackdir='centerwhole'")
plot_matrix([p1, p2, p3, p4])
stackratio
¶p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot() + ggtitle("stackratio=1.0 (default)")
p2 = p + geom_dotplot(stackratio=0.5) + ggtitle("stackratio=0.5")
p3 = p + geom_dotplot(stackratio=1.5) + ggtitle("stackratio=1.5")
plot_matrix([p1, p2, p3])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot() + ggtitle("stackratio=1.0 (default)")
p2 = p + geom_ydotplot(stackratio=0.5) + ggtitle("stackratio=0.5")
p3 = p + geom_ydotplot(stackratio=1.5) + ggtitle("stackratio=1.5")
plot_matrix([p1, p2, p3])
dotsize
¶p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot() + ggtitle("dotsize=1.0 (default)")
p2 = p + geom_dotplot(dotsize=0.5) + ggtitle("dotsize=0.5")
p3 = p + geom_dotplot(dotsize=1.5) + ggtitle("dotsize=1.5")
plot_matrix([p1, p2, p3])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot() + ggtitle("dotsize=1.0 (default)")
p2 = p + geom_ydotplot(dotsize=0.5) + ggtitle("dotsize=0.5")
p3 = p + geom_ydotplot(dotsize=1.5) + ggtitle("dotsize=1.5")
plot_matrix([p1, p2, p3])
center
¶p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_dotplot(method='histodot', center=12.0) + ggtitle("center=12.0")
plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(method='histodot', center=12.0) + ggtitle("center=12.0")
plot_matrix([p1, p2])
boundary
¶p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_dotplot(method='histodot', boundary=11.0) + ggtitle("boundary=11.0")
plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(method='histodot', boundary=11.0) + ggtitle("boundary=11.0")
plot_matrix([p1, p2])
bins
¶p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_dotplot(method='histodot', bins=20) + ggtitle("bins=20")
plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(method='histodot', bins=20) + ggtitle("bins=20")
plot_matrix([p1, p2])
ggplot(df, aes(x="hwy")) + \
geom_dotplot(aes(fill=as_discrete("drv")), color='black') + \
ggtitle("method='dotdensity'")
p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(aes(fill=as_discrete("drv")), method='histodot', color='black') + \
ggtitle("method='histodot', stackgroups=False (default)")
p2 = p + geom_dotplot(aes(fill=as_discrete("drv")), method='histodot', \
stackgroups=True, color='black') + \
ggtitle("method='histodot', stackgroups=True")
plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), color='black') + \
ggtitle("method='dotdensity', stackgroups=False (default)")
p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), stackgroups=True, color='black') + \
ggtitle("method='dotdensity', stackgroups=True")
plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
method='histodot', color='black') + \
ggtitle("method='histodot', stackgroups=False (default)")
p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
method='histodot', stackgroups=True, color='black') + \
ggtitle("method='histodot', stackgroups=True")
plot_matrix([p1, p2])
ggplot(df, aes(x="hwy")) + \
geom_dotplot(tooltips=layer_tooltips().line("Stack center|^x")\
.line("Number of dots in stack|@..count..")\
.format('@..binwidth..', ".3f")\
.line("Width of the bin|@..binwidth..")) + \
ggtitle("With tooltips")
ggplot(df, aes("drv", "hwy")) + \
geom_ydotplot(tooltips=layer_tooltips().line("^x")\
.line("Stack center|^y")\
.line("Number of dots in stack|@..count..")\
.format('@..binwidth..', ".3f")\
.line("Width of the bin|@..binwidth..")) + \
ggtitle("With tooltips")
ggplot(df, aes(x="hwy")) + \
geom_dotplot(aes(fill=as_discrete("year")), color='black') + \
facet_grid(x="year") + \
ggtitle("facet_grid()")
ggplot(df, aes("drv", "hwy")) + \
geom_ydotplot(aes(fill="drv"), color='black') + \
facet_grid(x="year") + \
ggtitle("facet_grid()")
ggplot(df, aes(x="hwy")) + \
geom_dotplot() + \
coord_flip() + \
ggtitle("Flip coordinates")
ggplot(df, aes("drv", "hwy")) + \
geom_ydotplot(aes(fill="drv"), color='black') + \
coord_flip() + \
ggtitle("Flip coordinates")
dp_identity_df = pd.DataFrame(list(zip(*np.histogram(df.hwy, bins=DEF_BIN_COUNT))), columns=["count", "hwy"])
dp_identity_df['binwidth'] = [nice_binwidth] * dp_identity_df.shape[0]
dp_identity_df.head()
count | hwy | binwidth | |
---|---|---|---|
0 | 1 | 12.000000 | 1.667 |
1 | 2 | 13.666667 | 1.667 |
2 | 1 | 15.333333 | 1.667 |
3 | 8 | 17.000000 | 1.667 |
4 | 7 | 18.666667 | 1.667 |
ggplot(dp_identity_df, aes(x="hwy", stacksize="count", binwidth="binwidth")) + \
geom_dotplot(stat='identity') + \
ggtitle("stat='identity'")
ydp_identity_df = pd.concat([
pd.DataFrame(
list(zip(*np.histogram(df[df.drv == drv].hwy, bins=get_bincount(df[df.drv == drv], "hwy", nice_binwidth)))),
columns=["count", "hwy"]
).assign(
drv = [drv] * get_bincount(df[df.drv == drv], "hwy", nice_binwidth),
binwidth = [nice_binwidth] * get_bincount(df[df.drv == drv], "hwy", nice_binwidth),
)
for drv in df.drv.unique()
])
ydp_identity_df.head()
count | hwy | drv | binwidth | |
---|---|---|---|---|
0 | 1 | 12.0 | 4 | 1.667 |
1 | 1 | 13.6 | 4 | 1.667 |
2 | 1 | 15.2 | 4 | 1.667 |
3 | 8 | 16.8 | 4 | 1.667 |
4 | 2 | 18.4 | 4 | 1.667 |
ggplot(ydp_identity_df, aes("drv", "hwy", stacksize="count", binwidth="binwidth")) + \
geom_ydotplot(aes(fill="drv"), stat='identity', stackratio=.75, color="black") + \
ggtitle("stat='identity'")
ggplot(df, aes(x="hwy")) + \
geom_dotplot(aes(fill=as_discrete("year")), method='histodot', \
bins=15, stackdir='centerwhole', \
stackratio=.75, dotsize=.75, \
color='black', alpha=.5, size=1) + \
scale_fill_brewer(type='qual', palette='Set1') + \
theme_grey() + \
ggtitle("Some additional aesthetics, parameters and layers")
ggplot(df, aes("drv", "hwy")) + \
geom_ydotplot(aes(fill=as_discrete("drv")), method='histodot', \
bins=15, stackdir='center', stackratio=.75, \
color='black', alpha=.5, size=1) + \
scale_fill_brewer(type='qual', palette='Set1') + \
theme_grey() + \
ggtitle("Some additional aesthetics, parameters and layers")