import pandas as pd
from lets_plot import *
LetsPlot.setup_html()
mpg = pd.read_csv ("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
mpg.head(3)
Unnamed: 0 | manufacturer | model | displ | year | cyl | trans | drv | cty | hwy | fl | class | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | audi | a4 | 1.8 | 1999 | 4 | auto(l5) | f | 18 | 29 | p | compact |
1 | 2 | audi | a4 | 1.8 | 1999 | 4 | manual(m5) | f | 21 | 29 | p | compact |
2 | 3 | audi | a4 | 2.0 | 2008 | 4 | manual(m6) | f | 20 | 31 | p | compact |
subaru = mpg.loc[mpg['manufacturer'] == 'subaru']
p = ggplot(mpg, aes('displ', 'hwy')) \
+ geom_point(data=subaru, color='orange', size = 5) \
+ geom_point()
p
p \
+ geom_text(x=5.05, y=35, label='subaru', hjust='left', color='#d76e00', size=10) \
+ geom_curve(x=5, y=35, xend=2.62, yend=27,
curvature=0.2, arrow=arrow(length=6),
color='#d76e00')
p \
+ geom_text(x=4.2, y=25, label='subaru', hjust='left', color='#d76e00', size=10) \
+ geom_curve(x=4.5, y=26.2, xend=2.62, yend=27,
curvature=0.5, angle=60, arrow=arrow(length=6),
color='#d76e00')
p \
+ geom_text(x=3, y=12, label='subaru', hjust='left', color='#d76e00', size=10) \
+ geom_curve(x=2.95, y=12, xend=2.5, yend=22,
curvature=-0.3, arrow=arrow(length=6),
color='#d76e00')
mpg_cyl5 = mpg.loc[mpg['cyl'] == 5]
ggplot(mpg, aes('displ', 'hwy')) \
+ geom_point(data=mpg_cyl5, color='#de77ae', size=5) \
+ geom_point() \
+ geom_text(label="Five-cylinder engine", x=4,y=37, hjust=0, color='#c51b7d', size=10) \
+ geom_curve(x=3.95, y=37, xend=2.6, yend=29,
curvature=0.1, arrow=arrow(length=6),
color='#c51b7d')
ggplot(mpg, aes('displ', 'hwy')) \
+ geom_point(data=mpg_cyl5, color='#de77ae', size=5) \
+ geom_point() \
+ geom_text(label="Five-cylinder engine", x=4, y=37, hjust=0, color='#c51b7d', size=10) \
+ geom_curve(data=mpg_cyl5, xend=3.95, yend=37,
size_start=5,
curvature=0.1, arrow=arrow(length=6, ends='first'),
color='#c51b7d')
mpg['manufacturer'].value_counts()
dodge 37 toyota 34 volkswagen 27 ford 25 chevrolet 19 audi 18 hyundai 14 subaru 14 nissan 13 honda 9 jeep 8 pontiac 5 land rover 4 mercury 4 lincoln 3 Name: manufacturer, dtype: int64
brand = 'pontiac'
brand_df = mpg.loc[mpg['manufacturer'] == brand]
ggplot(mpg, aes('displ', 'hwy')) \
+ geom_point(data=brand_df, color='#bd423f', size=5) \
+ geom_point() \
+ geom_text(label=brand, x=6, y=37, hjust=1, color='#bd423f', size=10) \
+ geom_curve(data=brand_df, xend=5.95, yend=35,
size_start=5,
curvature=-0.1, arrow=arrow(length=6, ends='first'),
color='#bd423f') \
+ xlim(3,6)