#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from math import ceil, floor from sklearn.datasets import load_boston from sklearn.model_selection import ShuffleSplit as skShuffleSplit # In[2]: class ShuffleSplit(): def __init__(self, n_splits=10, train_size=0.9, test_size=0.1, random_state=0): self.n_splits = n_splits self.train_size = train_size self.test_size = test_size self.random_state = random_state def split(self, X, y): n_train = floor(self.train_size * X.shape[0]) n_test = ceil(self.test_size * X.shape[0]) rng = np.random.RandomState(self.random_state) for _ in range(self.n_splits): permutation = rng.permutation(X.shape[0]) yield (permutation[n_test:(n_test + n_train)], permutation[:n_test]) # In[3]: X, y = load_boston(return_X_y=True) cv1 = ShuffleSplit(n_splits=5, random_state=0) cv2 = skShuffleSplit(n_splits=5, random_state=0) for (train1, test1), (train2, test2) in zip(cv1.split(X, y), cv2.split(X, y)): assert np.array_equal(train1, train2) assert np.array_equal(test1, test2) # In[4]: X, y = load_boston(return_X_y=True) cv1 = ShuffleSplit(n_splits=5, train_size=0.5, test_size=0.2, random_state=0) cv2 = skShuffleSplit(n_splits=5, train_size=0.5, test_size=0.2, random_state=0) for (train1, test1), (train2, test2) in zip(cv1.split(X, y), cv2.split(X, y)): assert np.array_equal(train1, train2) assert np.array_equal(test1, test2)