import pandas as pd adult_census = pd.read_csv("../datasets/adult-census.csv") target_name = "class" target = adult_census[target_name] data = adult_census.drop(columns=[target_name, "education-num"]) from sklearn.compose import make_column_selector as selector categorical_columns_selector = selector(dtype_include=object) categorical_columns = categorical_columns_selector(data) data_categorical = data[categorical_columns] from sklearn.pipeline import make_pipeline from sklearn.preprocessing import OrdinalEncoder from sklearn.linear_model import LogisticRegression # solution model = make_pipeline( OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1), LogisticRegression(max_iter=500), ) from sklearn.model_selection import cross_validate # solution cv_results = cross_validate(model, data_categorical, target) scores = cv_results["test_score"] print( "The mean cross-validation accuracy is: " f"{scores.mean():.3f} ± {scores.std():.3f}" ) from sklearn.dummy import DummyClassifier cv_results = cross_validate( DummyClassifier(strategy="most_frequent"), data_categorical, target ) scores = cv_results["test_score"] print( "The mean cross-validation accuracy is: " f"{scores.mean():.3f} ± {scores.std():.3f}" ) from sklearn.preprocessing import OneHotEncoder # solution model = make_pipeline( OneHotEncoder(handle_unknown="ignore"), LogisticRegression(max_iter=500) ) cv_results = cross_validate(model, data_categorical, target) scores = cv_results["test_score"] print( "The mean cross-validation accuracy is: " f"{scores.mean():.3f} ± {scores.std():.3f}" )