This notebook is an example of using bootstrap and bootstrap632
Based on
References:
of a Prediction Rule: Improvement on Cross-Validation." Journal of the American Statistical Association 78 (382): 316. doi:10.2307/2288636.
"Improvements on Cross-Validation: The .632+ Bootstrap Method." Journal of the American Statistical Association 92 (438): 548. doi:10.2307/2965703.
!pip install git+https://github.com/pattersonconsulting/ml_tools.git
Collecting git+https://github.com/pattersonconsulting/ml_tools.git Cloning https://github.com/pattersonconsulting/ml_tools.git to /tmp/pip-req-build-t751ukqr Running command git clone -q https://github.com/pattersonconsulting/ml_tools.git /tmp/pip-req-build-t751ukqr Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (1.1.5) Requirement already satisfied: sklearn in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (0.0) Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (3.2.2) Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from ml-valuation==0.0.1) (1.19.5) Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (2.8.2) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (0.10.0) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (1.3.2) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->ml-valuation==0.0.1) (2.4.7) Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from cycler>=0.10->matplotlib->ml-valuation==0.0.1) (1.15.0) Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->ml-valuation==0.0.1) (2018.9) Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from sklearn->ml-valuation==0.0.1) (0.22.2.post1) Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->sklearn->ml-valuation==0.0.1) (1.4.1) Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->sklearn->ml-valuation==0.0.1) (1.0.1)
from mlxtend.evaluate import bootstrap_point632_score
from mlxtend.evaluate import BootstrapOutOfBag
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
#from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.utils import resample
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve
from sklearn.metrics import roc_curve, auc, confusion_matrix
from matplotlib import pyplot
import ml_valuation
from ml_valuation import model_valuation
from ml_valuation import model_visualization
arr_X, arr_y = load_breast_cancer(return_X_y=True)
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
print("X: " + str(arr_X.shape))
#print("X_test: " + str(X.shape))
X: (569, 30)
print(arr_X)
[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01] [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02] [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02] ... [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02] [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01] [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
unique, counts = np.unique(arr_y, return_counts=True)
dict(zip(unique, counts))
{0: 212, 1: 357}
X = arr_X
y = arr_y
#data = pd.concat(objs=[X, y], axis=1)
print(X.shape)
print(y.shape)
(569, 30) (569,)
# fit a model
model = LogisticRegression(solver='newton-cg')
#cv = StratifiedKFold( n_splits=10 )
#stats = list()
#num_splits = 500
#cnt = 0
scores = bootstrap_point632_score(model, X, y)
acc = np.mean(scores)
print('Accuracy: %.2f%%' % (100*acc))
# Confidence interval
lower = np.percentile(scores, 2.5)
upper = np.percentile(scores, 97.5)
print('95%% Confidence interval: [%.2f, %.2f]' % (100*lower, 100*upper))
/usr/local/lib/python3.7/dist-packages/sklearn/utils/optimize.py:212: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations. "number of iterations.", ConvergenceWarning) /usr/local/lib/python3.7/dist-packages/sklearn/utils/optimize.py:212: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations. "number of iterations.", ConvergenceWarning) /usr/local/lib/python3.7/dist-packages/sklearn/utils/optimize.py:212: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations. "number of iterations.", ConvergenceWarning) /usr/local/lib/python3.7/dist-packages/sklearn/utils/optimize.py:212: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations. "number of iterations.", ConvergenceWarning) /usr/local/lib/python3.7/dist-packages/sklearn/utils/optimize.py:212: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations. "number of iterations.", ConvergenceWarning) /usr/local/lib/python3.7/dist-packages/sklearn/utils/optimize.py:212: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations. "number of iterations.", ConvergenceWarning) /usr/local/lib/python3.7/dist-packages/sklearn/utils/optimize.py:212: ConvergenceWarning: newton-cg failed to converge. Increase the number of iterations. "number of iterations.", ConvergenceWarning)
Accuracy: 95.67% 95% Confidence interval: [94.14, 97.09]