import numpy as np import pandas as pd from sklearn.datasets import make_moons feature_names = ["Feature #0", "Feature #1"] target_name = "class" X, y = make_moons(n_samples=100, noise=0.13, random_state=42) # We store both the data and target in a dataframe to ease plotting moons = pd.DataFrame( np.concatenate([X, y[:, np.newaxis]], axis=1), columns=feature_names + [target_name], ) data_moons, target_moons = moons[feature_names], moons[target_name] from sklearn.datasets import make_gaussian_quantiles X, y = make_gaussian_quantiles( n_samples=100, n_features=2, n_classes=2, random_state=42 ) gauss = pd.DataFrame( np.concatenate([X, y[:, np.newaxis]], axis=1), columns=feature_names + [target_name], ) data_gauss, target_gauss = gauss[feature_names], gauss[target_name] xor = pd.DataFrame( np.random.RandomState(0).uniform(low=-1, high=1, size=(200, 2)), columns=feature_names, ) target_xor = np.logical_xor(xor["Feature #0"] > 0, xor["Feature #1"] > 0) target_xor = target_xor.astype(np.int32) xor["class"] = target_xor data_xor = xor[feature_names] import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap _, axs = plt.subplots(ncols=3, figsize=(14, 4), constrained_layout=True) common_scatter_plot_params = dict( cmap=ListedColormap(["tab:red", "tab:blue"]), edgecolor="white", linewidth=1, ) axs[0].scatter( data_moons[feature_names[0]], data_moons[feature_names[1]], c=target_moons, **common_scatter_plot_params, ) axs[1].scatter( data_gauss[feature_names[0]], data_gauss[feature_names[1]], c=target_gauss, **common_scatter_plot_params, ) axs[2].scatter( data_xor[feature_names[0]], data_xor[feature_names[1]], c=target_xor, **common_scatter_plot_params, ) axs[0].set( title="The moons dataset", xlabel=feature_names[0], ylabel=feature_names[1], ) axs[1].set( title="The Gaussian quantiles dataset", xlabel=feature_names[0], ) axs[2].set( title="The XOR dataset", xlabel=feature_names[0], ) from sklearn.inspection import DecisionBoundaryDisplay def plot_decision_boundary(model, title=None): datasets = [ (data_moons, target_moons), (data_gauss, target_gauss), (data_xor, target_xor), ] fig, axs = plt.subplots( ncols=3, figsize=(14, 4), constrained_layout=True, ) for i, ax, (data, target) in zip( range(len(datasets)), axs, datasets, ): model.fit(data, target) DecisionBoundaryDisplay.from_estimator( model, data, response_method="predict_proba", plot_method="pcolormesh", cmap="RdBu", alpha=0.8, # Setting vmin and vmax to the extreme values of the probability to # ensure that 0.5 is mapped to white (the middle) of the blue-red # colormap. vmin=0, vmax=1, ax=ax, ) DecisionBoundaryDisplay.from_estimator( model, data, response_method="predict_proba", plot_method="contour", alpha=0.8, levels=[0.5], # 0.5 probability contour line linestyles="--", linewidths=2, ax=ax, ) ax.scatter( data[feature_names[0]], data[feature_names[1]], c=target, **common_scatter_plot_params, ) if i > 0: ax.set_ylabel(None) if title is not None: fig.suptitle(title) from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression logistic_regression = make_pipeline(StandardScaler(), LogisticRegression()) logistic_regression plot_decision_boundary(logistic_regression, title="Linear classifier") from sklearn.preprocessing import KBinsDiscretizer classifier = make_pipeline( KBinsDiscretizer(n_bins=5, encode="onehot"), # already the default params LogisticRegression(), ) classifier plot_decision_boundary(classifier, title="Binning classifier") from sklearn.preprocessing import SplineTransformer classifier = make_pipeline( SplineTransformer(degree=3, n_knots=5), LogisticRegression(), ) classifier plot_decision_boundary(classifier, title="Spline classifier") from sklearn.preprocessing import PolynomialFeatures classifier = make_pipeline( StandardScaler(), PolynomialFeatures(degree=3, include_bias=False), LogisticRegression(C=10), ) classifier plot_decision_boundary(classifier, title="Polynomial classifier") from sklearn.kernel_approximation import Nystroem classifier = make_pipeline( StandardScaler(), Nystroem(kernel="poly", degree=3, coef0=1, n_components=100), LogisticRegression(C=10), ) classifier plot_decision_boundary(classifier, title="Polynomial Nystroem classifier") from sklearn.kernel_approximation import Nystroem classifier = make_pipeline( StandardScaler(), Nystroem(kernel="rbf", gamma=1, n_components=100), LogisticRegression(C=5), ) classifier plot_decision_boundary(classifier, title="RBF Nystroem classifier") classifier = make_pipeline( KBinsDiscretizer(n_bins=5), Nystroem(kernel="rbf", gamma=1.0, n_components=100), LogisticRegression(), ) classifier plot_decision_boundary(classifier, title="Binning + Nystroem classifier") from sklearn.kernel_approximation import Nystroem classifier = make_pipeline( SplineTransformer(n_knots=5), Nystroem(kernel="rbf", gamma=1.0, n_components=100), LogisticRegression(), ) classifier plot_decision_boundary(classifier, title="Spline + RBF Nystroem classifier")