이 노트북의 코드에 대한 설명은 다중 평가 지표: cross_validate() 글을 참고하세요.
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target == 9, random_state=42)
from sklearn.svm import SVC
cross_val_score(SVC(gamma='auto'), X_train, y_train, cv=3)
array([0.90200445, 0.90200445, 0.90200445])
cross_val_score(SVC(gamma='auto'), X_train, y_train, scoring='accuracy', cv=3)
array([0.90200445, 0.90200445, 0.90200445])
from sklearn.model_selection import cross_validate
cross_validate(SVC(gamma='auto'), X_train, y_train,
scoring=['accuracy', 'roc_auc'],
return_train_score=True, cv=3)
{'fit_time': array([0.07618928, 0.07637691, 0.07586432]), 'score_time': array([0.0641861 , 0.06401324, 0.0637908 ]), 'test_accuracy': array([0.90200445, 0.90200445, 0.90200445]), 'test_roc_auc': array([0.99657688, 0.99814815, 0.99943883]), 'train_accuracy': array([1., 1., 1.]), 'train_roc_auc': array([1., 1., 1.])}
cross_validate(SVC(gamma='auto'), X_train, y_train,
scoring=['accuracy'], cv=3,
return_train_score=False)['test_accuracy']
array([0.90200445, 0.90200445, 0.90200445])
cross_validate(SVC(gamma='auto'), X_train, y_train,
scoring={'acc':'accuracy', 'ra':'roc_auc'},
return_train_score=False, cv=3)
{'fit_time': array([0.07812428, 0.07830691, 0.077842 ]), 'score_time': array([0.06565356, 0.06545377, 0.06555676]), 'test_acc': array([0.90200445, 0.90200445, 0.90200445]), 'test_ra': array([0.99657688, 0.99814815, 0.99943883])}
from sklearn.model_selection import GridSearchCV
param_grid = {'gamma': [0.0001, 0.01, 0.1, 1, 10]}
grid = GridSearchCV(SVC(), param_grid=param_grid,
scoring=['accuracy'], refit='accuracy',
return_train_score=True, cv=3)
grid.fit(X_train, y_train)
GridSearchCV(cv=3, error_score='raise-deprecating', estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto_deprecated', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False), fit_params=None, iid='warn', n_jobs=None, param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]}, pre_dispatch='2*n_jobs', refit='accuracy', return_train_score=True, scoring=['accuracy'], verbose=0)
grid.best_params_
{'gamma': 0.0001}
grid.best_score_
0.9651076466221232
np.transpose(pd.DataFrame(grid.cv_results_))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
mean_fit_time | 0.015072 | 0.074425 | 0.07734 | 0.0700471 | 0.0694924 |
mean_score_time | 0.00654006 | 0.031659 | 0.0317933 | 0.0279772 | 0.0277294 |
mean_test_accuracy | 0.965108 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
mean_train_accuracy | 0.970676 | 1 | 1 | 1 | 1 |
param_gamma | 0.0001 | 0.01 | 0.1 | 1 | 10 |
params | {'gamma': 0.0001} | {'gamma': 0.01} | {'gamma': 0.1} | {'gamma': 1} | {'gamma': 10} |
rank_test_accuracy | 1 | 2 | 2 | 2 | 2 |
split0_test_accuracy | 0.966592 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split0_train_accuracy | 0.975501 | 1 | 1 | 1 | 1 |
split1_test_accuracy | 0.96882 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split1_train_accuracy | 0.962138 | 1 | 1 | 1 | 1 |
split2_test_accuracy | 0.959911 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split2_train_accuracy | 0.974388 | 1 | 1 | 1 | 1 |
std_fit_time | 0.000167006 | 0.000345247 | 4.47831e-05 | 2.40184e-05 | 3.97682e-05 |
std_score_time | 0.000106334 | 2.31535e-05 | 9.92971e-05 | 1.07896e-05 | 2.01933e-05 |
std_test_accuracy | 0.00378546 | 0 | 0 | 0 | 0 |
std_train_accuracy | 0.00605401 | 0 | 0 | 0 | 0 |
grid = GridSearchCV(SVC(), param_grid=param_grid,
scoring={'acc':'accuracy', 'ra':'roc_auc'}, refit='ra',
return_train_score=True, cv=3)
grid.fit(X_train, y_train)
GridSearchCV(cv=3, error_score='raise-deprecating', estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto_deprecated', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False), fit_params=None, iid='warn', n_jobs=None, param_grid={'gamma': [0.0001, 0.01, 0.1, 1, 10]}, pre_dispatch='2*n_jobs', refit='ra', return_train_score=True, scoring={'ra': 'roc_auc', 'acc': 'accuracy'}, verbose=0)
grid.best_params_
{'gamma': 0.01}
grid.best_score_
0.9983352038907595
np.transpose(pd.DataFrame(grid.cv_results_))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
mean_fit_time | 0.0148106 | 0.0743373 | 0.0773802 | 0.0702082 | 0.0701415 |
mean_score_time | 0.0133291 | 0.0638885 | 0.0638817 | 0.0564213 | 0.056355 |
mean_test_acc | 0.965108 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
mean_test_ra | 0.988403 | 0.998335 | 0.918986 | 0.5 | 0.5 |
mean_train_acc | 0.970676 | 1 | 1 | 1 | 1 |
mean_train_ra | 0.992134 | 1 | 1 | 1 | 1 |
param_gamma | 0.0001 | 0.01 | 0.1 | 1 | 10 |
params | {'gamma': 0.0001} | {'gamma': 0.01} | {'gamma': 0.1} | {'gamma': 1} | {'gamma': 10} |
rank_test_acc | 1 | 2 | 2 | 2 | 2 |
rank_test_ra | 2 | 1 | 3 | 4 | 4 |
split0_test_acc | 0.966592 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split0_test_ra | 0.98367 | 0.997419 | 0.934007 | 0.5 | 0.5 |
split0_train_acc | 0.975501 | 1 | 1 | 1 | 1 |
split0_train_ra | 0.992017 | 1 | 1 | 1 | 1 |
split1_test_acc | 0.96882 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split1_test_ra | 0.987149 | 0.998148 | 0.912458 | 0.5 | 0.5 |
split1_train_acc | 0.962138 | 1 | 1 | 1 | 1 |
split1_train_ra | 0.994935 | 1 | 1 | 1 | 1 |
split2_test_acc | 0.959911 | 0.902004 | 0.902004 | 0.902004 | 0.902004 |
split2_test_ra | 0.994388 | 0.999439 | 0.910494 | 0.5 | 0.5 |
split2_train_acc | 0.974388 | 1 | 1 | 1 | 1 |
split2_train_ra | 0.98945 | 1 | 1 | 1 | 1 |
std_fit_time | 0.000175309 | 0.000157003 | 2.14285e-05 | 0.000141432 | 0.000852465 |
std_score_time | 7.33891e-05 | 0.000202336 | 8.94391e-05 | 2.56253e-05 | 0.00055562 |
std_test_acc | 0.00378546 | 0 | 0 | 0 | 0 |
std_test_ra | 0.00446456 | 0.000835283 | 0.0106514 | 0 | 0 |
std_train_acc | 0.00605401 | 0 | 0 | 0 | 0 |
std_train_ra | 0.00224093 | 0 | 6.40988e-17 | 0 | 0 |
grid.best_estimator_
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)