#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from sklearn.base import clone from sklearn.datasets import load_boston, load_iris from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier from sklearn.model_selection import KFold, StratifiedKFold from sklearn.model_selection import learning_curve as sklearning_curve # In[2]: def learning_curve(estimator, X, y, train_sizes, random_state=0): if estimator._estimator_type == "regressor": cv = KFold() else: # estimator._estimator_type == "classifier" cv = StratifiedKFold() train_scores = np.zeros((len(train_sizes), cv.n_splits)) test_scores = np.zeros((len(train_sizes), cv.n_splits)) cv_iter = list(cv.split(X, y)) train_sizes_abs = (len(cv_iter[0][0]) * np.array(train_sizes)).astype(int) rng = np.random.RandomState(random_state) cv_iter = [(rng.permutation(train), test) for train, test in cv_iter] for i, train_size in enumerate(train_sizes_abs): for j, (train, test) in enumerate(cv_iter): est = clone(estimator) est.fit(X[train][:train_size], y[train][:train_size]) train_scores[i, j] = est.score(X[train][:train_size], y[train][:train_size]) test_scores[i, j] = est.score(X[test], y[test]) return train_sizes_abs, train_scores, test_scores # In[3]: # regression X, y = load_boston(return_X_y=True) clf = RandomForestRegressor(random_state=0) ans1 = learning_curve(clf, X, y, train_sizes=[0.2, 0.4, 0.6, 0.8, 1], random_state=0) ans2 = sklearning_curve(clf, X, y, train_sizes=[0.2, 0.4, 0.6, 0.8, 1], shuffle=True, random_state=0) assert np.array_equal(ans1[0], ans2[0]) assert np.allclose(ans1[1], ans2[1]) assert np.allclose(ans1[2], ans2[2]) # In[4]: # classification X, y = load_iris(return_X_y=True) clf = RandomForestClassifier(random_state=0) ans1 = learning_curve(clf, X, y, train_sizes=[0.2, 0.4, 0.6, 0.8, 1], random_state=0) ans2 = sklearning_curve(clf, X, y, train_sizes=[0.2, 0.4, 0.6, 0.8, 1], shuffle=True, random_state=0) assert np.array_equal(ans1[0], ans2[0]) assert np.allclose(ans1[1], ans2[1]) assert np.allclose(ans1[2], ans2[2])