#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import random from lets_plot import * LetsPlot.setup_html() # In[2]: # This example was found at: www.cookbook-r.com/Graphs/Scatterplots_(ggplot2) random.seed(123) data = dict( cond=np.repeat(['A','B'], 10), xvar=[i + random.normalvariate(0, 3) for i in range(0,20)], yvar=[i + random.normalvariate(0, 3) for i in range(0,20)] ) # #### Basic scatter-plot # In[3]: p = ggplot(data, aes(x='xvar', y='yvar')) + ggsize(300, 250) p + geom_point(shape=1) # #### Add regression line # In[4]: p + geom_point(shape=1) + geom_smooth() # In[5]: # Without standard error band. p + geom_point(shape=1) + geom_smooth(se=False) # #### Split dataset by the `cond` variable # In[6]: p1 = ggplot(data, aes(x='xvar', y='yvar', color='cond')) + ggsize(500, 250) p1 + geom_point(shape=1) + geom_smooth(se=False) # In[7]: # Map `shape` to the `cond` variable. p2 = p1 + geom_point(aes(shape='cond'), size=5) p2 # In[8]: # Choose different shapes using `scale_shape_manual`: # 1 - hollow circle # 2 - hollow triangle p2 + scale_shape_manual(values=[1,2]) # #### Handling overplotting # In[9]: # Create data containing overlapping points. data['xrnd'] = [round(v / 5) * 5 for v in data['xvar']] data['yrnd'] = [round(v / 5) * 5 for v in data['yvar']] # In[10]: p3 = ggplot(data, aes(x='xrnd', y='yrnd')) + ggsize(500, 250) # Use `alpha` to show overplotting. p3 + geom_point(alpha=.3, size=5) # In[11]: # `jitter` points to show overplotting in another way. p3 + geom_point(shape=1, position=position_jitter(width=.2,height=.5))