Hyperparameter Tuning of 4.2 Decision Tree and 4.3 Random Forest in Main Notebook
By Czarina Luna
%store -r X_train
%store -r y_train
from sklearn.model_selection import GridSearchCV
def grid_search(regressor, params):
'''Performs grid search on given regression model, returns best cross validation scores and parameters'''
gridsearch = GridSearchCV(estimator=regressor, param_grid=params, cv=5)
gridsearch.fit(X_train, y_train)
print(f'Best cv score: {gridsearch.best_score_ :.2%}')
print(f'Best parameters: {gridsearch.best_params_}')
import warnings
warnings.filterwarnings('ignore')
Decision Tree
%store -r dt_pipe
grid = {'decision_tree__criterion': ['squared_error', 'friedman_mse', 'absolute_error'],
'decision_tree__max_depth': [None, 1, 2],
'decision_tree__min_samples_split': [2, 3, 5]}
grid_search(dt_pipe, grid)
Best cv score: 73.24% Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_split': 5}
grid = {'decision_tree__criterion': ['friedman_mse'],
'decision_tree__max_depth': [None],
'decision_tree__min_samples_split': [5, 10],
'decision_tree__min_samples_leaf': [1, 2, 5]}
grid_search(dt_pipe, grid)
Best cv score: 76.86% Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 5}
grid = {'decision_tree__criterion': ['friedman_mse'],
'decision_tree__max_depth': [None],
'decision_tree__min_samples_split': [10, 15, 20],
'decision_tree__min_samples_leaf': [5, 10]}
grid_search(dt_pipe, grid)
Best cv score: 78.44% Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 20}
grid = {'decision_tree__criterion': ['friedman_mse'],
'decision_tree__max_depth': [None],
'decision_tree__min_samples_split': [20, 30, 50],
'decision_tree__min_samples_leaf': [5]}
grid_search(dt_pipe, grid)
Best cv score: 78.95% Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 30}
grid = {'decision_tree__criterion': ['friedman_mse'],
'decision_tree__max_depth': [None],
'decision_tree__min_samples_split': [25, 30, 45],
'decision_tree__min_samples_leaf': [5]}
grid_search(dt_pipe, grid)
Best cv score: 79.00% Best parameters: {'decision_tree__criterion': 'friedman_mse', 'decision_tree__max_depth': None, 'decision_tree__min_samples_leaf': 5, 'decision_tree__min_samples_split': 45}
Random Forest
%store -r rf_pipe
grid = {'random_forest__criterion': ['squared_error', 'friedman_mse', 'absolute_error'],
'random_forest__max_depth': [None, 1, 2],
'random_forest__min_samples_split': [2, 3, 5]}
grid_search(rf_pipe, grid)
Best cv score: 86.22% Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__min_samples_split': 2}
grid = {'random_forest__criterion': ['friedman_mse'],
'random_forest__max_depth': [None],
'random_forest__min_samples_split': [2, 3, 10],
'random_forest__min_samples_leaf': [1, 2, 5]}
grid_search(rf_pipe, grid)
Best cv score: 86.14% Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__min_samples_leaf': 1, 'random_forest__min_samples_split': 3}
grid = {'random_forest__criterion': ['friedman_mse'],
'random_forest__max_depth': [None],
'random_forest__min_samples_split': [1,2,3],
'random_forest__min_samples_leaf': [1],
'random_forest__n_estimators': [100, 150, 200]}
grid_search(rf_pipe, grid)
Best cv score: 86.25% Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__min_samples_leaf': 1, 'random_forest__min_samples_split': 2, 'random_forest__n_estimators': 150}
grid = {'random_forest__criterion': ['friedman_mse'],
'random_forest__max_depth': [None],
'random_forest__min_samples_split': [2],
'random_forest__min_samples_leaf': [1],
'random_forest__n_estimators': [150],
'random_forest__max_features': ['auto', 'sqrt', 'log2'],
'random_forest__n_jobs': [1, 2, 3]}
grid_search(rf_pipe, grid)
Best cv score: 86.29% Best parameters: {'random_forest__criterion': 'friedman_mse', 'random_forest__max_depth': None, 'random_forest__max_features': 'sqrt', 'random_forest__min_samples_leaf': 1, 'random_forest__min_samples_split': 2, 'random_forest__n_estimators': 150, 'random_forest__n_jobs': 2}