#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from itertools import product from sklearn.base import clone from sklearn.datasets import load_boston, load_iris from sklearn.svm import SVC, SVR from sklearn.model_selection import KFold, StratifiedKFold from sklearn.model_selection import GridSearchCV as skGridSearchCV # In[2]: class GridSearchCV(): def __init__(self, estimator, param_grid): self.estimator = estimator self.param_grid = param_grid def generate_grid(self): items = sorted(self.param_grid.items()) keys, values = zip(*items) for v in product(*values): params = dict(zip(keys, v)) yield params def fit(self, X, y): if self.estimator._estimator_type == "regressor": cv = KFold() else: # estimator._estimator_type == "classifier" cv = StratifiedKFold() train_scores, test_scores = [], [] params = [] for i, cur_param in enumerate(self.generate_grid()): cur_train_score, cur_test_score = [], [] for j, (train, test) in enumerate(cv.split(X, y)): est = clone(self.estimator) est.set_params(**cur_param) est.fit(X[train], y[train]) cur_train_score.append(est.score(X[train], y[train])) cur_test_score.append(est.score(X[test], y[test])) params.append(cur_param) train_scores.append(cur_train_score) test_scores.append(cur_test_score) train_scores = np.array(train_scores) test_scores = np.array(test_scores) cv_results = {} for i in range(cv.n_splits): cv_results["split" + str(i) + "_train_score"] = train_scores[:, i] cv_results["split" + str(i) + "_test_score"] = test_scores[:, i] cv_results["mean_train_score"] = np.mean(train_scores, axis=1) cv_results["std_train_score"] = np.std(train_scores, axis=1) cv_results["mean_test_score"] = np.mean(test_scores, axis=1) cv_results["std_test_score"] = np.std(test_scores, axis=1) cv_results['params'] = params self.cv_results_ = cv_results self.best_params_ = cv_results['params'][np.argmax(cv_results['mean_test_score'])] self.best_estimator_ = clone(self.estimator) self.best_estimator_.set_params(**self.best_params_) self.best_estimator_.fit(X, y) return self def decision_function(self, X): return self.best_estimator_.decision_function(X) def predict(self, X): return self.best_estimator_.predict(X) # In[3]: # regressor X, y = load_boston(return_X_y=True) param_grid = {"C":[0.1, 1, 10], "gamma":[0.1, 1, 10]} clf1 = GridSearchCV(SVR(), param_grid).fit(X, y) clf2 = skGridSearchCV(SVR(), param_grid, return_train_score=True).fit(X, y) for i in range(5): assert np.allclose(clf1.cv_results_["split" + str(i) + "_train_score"], clf2.cv_results_["split" + str(i) + "_train_score"]) assert np.allclose(clf1.cv_results_["split" + str(i) + "_test_score"], clf2.cv_results_["split" + str(i) + "_test_score"]) assert np.allclose(clf1.cv_results_["mean_train_score"], clf2.cv_results_["mean_train_score"]) assert np.allclose(clf1.cv_results_["std_train_score"], clf2.cv_results_["std_train_score"]) assert np.allclose(clf1.cv_results_["mean_test_score"], clf2.cv_results_["mean_test_score"]) assert np.allclose(clf1.cv_results_["std_test_score"], clf2.cv_results_["std_test_score"]) assert np.allclose(clf1.best_params_["C"], clf2.best_params_["C"]) assert np.allclose(clf1.best_params_["gamma"], clf2.best_params_["gamma"]) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.allclose(pred1, pred2) # In[4]: # classification X, y = load_iris(return_X_y=True) param_grid = {"C":[0.1, 1, 10], "gamma":[0.1, 1, 10]} clf1 = GridSearchCV(SVC(random_state=0), param_grid).fit(X, y) clf2 = skGridSearchCV(SVC(random_state=0), param_grid, return_train_score=True).fit(X, y) for i in range(5): assert np.allclose(clf1.cv_results_["split" + str(i) + "_train_score"], clf2.cv_results_["split" + str(i) + "_train_score"]) assert np.allclose(clf1.cv_results_["split" + str(i) + "_test_score"], clf2.cv_results_["split" + str(i) + "_test_score"]) assert np.allclose(clf1.cv_results_["mean_train_score"], clf2.cv_results_["mean_train_score"]) assert np.allclose(clf1.cv_results_["std_train_score"], clf2.cv_results_["std_train_score"]) assert np.allclose(clf1.cv_results_["mean_test_score"], clf2.cv_results_["mean_test_score"]) assert np.allclose(clf1.cv_results_["std_test_score"], clf2.cv_results_["std_test_score"]) assert np.allclose(clf1.best_params_["C"], clf2.best_params_["C"]) assert np.allclose(clf1.best_params_["gamma"], clf2.best_params_["gamma"]) prob1 = clf1.decision_function(X) prob2 = clf2.decision_function(X) assert np.allclose(prob1, prob2) pred1 = clf1.predict(X) pred2 = clf2.predict(X) assert np.array_equal(pred1, pred2)