import numpy as np rng = np.random.RandomState(0) n_sample = 100 data_max, data_min = 1.4, -1.4 len_data = data_max - data_min # sort the data to make plotting easier later data = np.sort(rng.rand(n_sample) * len_data - len_data / 2) noise = rng.randn(n_sample) * 0.3 target = data**3 - 0.5 * data**2 + noise import pandas as pd full_data = pd.DataFrame({"input_feature": data, "target": target}) import seaborn as sns _ = sns.scatterplot( data=full_data, x="input_feature", y="target", color="black", alpha=0.5 ) # X should be 2D for sklearn: (n_samples, n_features) data = data.reshape((-1, 1)) data.shape def fit_score_plot_regression(model, title=None): model.fit(data, target) target_predicted = model.predict(data) mse = mean_squared_error(target, target_predicted) ax = sns.scatterplot( data=full_data, x="input_feature", y="target", color="black", alpha=0.5 ) ax.plot(data, target_predicted) if title is not None: _ = ax.set_title(title + f" (MSE = {mse:.2f})") else: _ = ax.set_title(f"Mean squared error = {mse:.2f}") from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error linear_regression = LinearRegression() linear_regression fit_score_plot_regression(linear_regression, title="Simple linear regression") print( f"weight: {linear_regression.coef_[0]:.2f}, " f"intercept: {linear_regression.intercept_:.2f}" ) from sklearn.tree import DecisionTreeRegressor tree = DecisionTreeRegressor(max_depth=3).fit(data, target) tree fit_score_plot_regression(tree, title="Decision tree regression") data.shape data_expanded = np.concatenate([data, data**2, data**3], axis=1) data_expanded.shape from sklearn.preprocessing import PolynomialFeatures polynomial_expansion = PolynomialFeatures(degree=3, include_bias=False) from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures polynomial_regression = make_pipeline( PolynomialFeatures(degree=3, include_bias=False), LinearRegression(), ) polynomial_regression fit_score_plot_regression(polynomial_regression, title="Polynomial regression") from sklearn.svm import SVR svr = SVR(kernel="linear") svr fit_score_plot_regression(svr, title="Linear support vector machine") svr = SVR(kernel="poly", degree=3) svr fit_score_plot_regression(svr, title="Polynomial support vector machine") from sklearn.preprocessing import KBinsDiscretizer binned_regression = make_pipeline( KBinsDiscretizer(n_bins=8), LinearRegression(), ) binned_regression fit_score_plot_regression(binned_regression, title="Binned regression") from sklearn.preprocessing import SplineTransformer spline_regression = make_pipeline( SplineTransformer(degree=3, include_bias=False), LinearRegression(), ) spline_regression fit_score_plot_regression(spline_regression, title="Spline regression") from sklearn.kernel_approximation import Nystroem nystroem_regression = make_pipeline( Nystroem(kernel="poly", degree=3, n_components=5, random_state=0), LinearRegression(), ) nystroem_regression fit_score_plot_regression( nystroem_regression, title="Polynomial Nystroem regression" )