Systematic Sampling

In [1]:
import numpy as np
from lets_plot import *

LetsPlot.setup_html()
In [2]:
N = 10000
X = np.arange(N)
np.random.seed(1)
Y = np.random.normal(0, 1, N)
p = ggplot(dict(x=X, y=Y), aes('x', 'y'))
In [3]:
# Linear regression is computed using the entire data 
# therefore it is same on all plots regardless the sampling applied to 'lines' layer.
p + geom_line() + geom_smooth()
Out[3]:
In [4]:
# Systematic sampling often looks better than 'random' sampling
p + geom_line(sampling=sampling_systematic(50)) + geom_smooth()
Out[4]:
In [5]:
p + geom_line(sampling=sampling_random(50, seed=33)) + geom_smooth()
Out[5]: