#!/usr/bin/env python # coding: utf-8 # ## Systematic Sampling # In[1]: import numpy as np from lets_plot import * LetsPlot.setup_html() # In[2]: N = 10000 X = np.arange(N) np.random.seed(1) Y = np.random.normal(0, 1, N) p = ggplot(dict(x=X, y=Y), aes('x', 'y')) # In[3]: # Linear regression is computed using the entire data # therefore it is same on all plots regardless the sampling applied to 'lines' layer. p + geom_line() + geom_smooth() # In[4]: # Systematic sampling often looks better than 'random' sampling p + geom_line(sampling=sampling_systematic(50)) + geom_smooth() # In[5]: p + geom_line(sampling=sampling_random(50, seed=33)) + geom_smooth()