import matplotlib.pyplot as plt from pylab import mpl, plt import numpy as np from sklearn import datasets from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression import datetime as dt import statsmodels.formula.api as smf import statsmodels.api as sm from scipy import stats plt.style.use('seaborn') mpl.rcParams['font.family'] = 'DejaVu Sans' mpl.rcParams["savefig.dpi"] = 500 np.set_printoptions(precision=5, suppress=True, formatter={"float": lambda x: f"{x:6.3f}"}) %matplotlib inline # Load the diabetes dataset diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) diabetes_X # Use only one feature diabetes_X = diabetes_X[:, np.newaxis, 2] # Split the data into training/testing sets diabetes_X_train = diabetes_X[:-20] diabetes_X_test = diabetes_X[-20:] # Split the targets into training/testing sets diabetes_y_train = diabetes_y[:-20] diabetes_y_test = diabetes_y[-20:] # Create linear regression object regr = LinearRegression() # Train the model using the training sets regr.fit(diabetes_X_train, diabetes_y_train) # Make predictions using the testing set diabetes_y_pred = regr.predict(diabetes_X_test) type(regr.coef_) np.ndarray.round(regr.coef_,4) # The coefficients print("Coefficients: \n", np.ndarray.round(regr.coef_,4)) # The mean squared error print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred)) # The coefficient of determination: 1 is perfect prediction print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred)) # Plot outputs plt.scatter(diabetes_X_test, diabetes_y_test, color="black") plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3) plt.xticks(()) plt.yticks(()) plt.show() import statsmodels.api as sm X_ols = sm.add_constant(diabetes_X_train) model = sm.OLS(diabetes_y_train, X_ols).fit() print(model.summary()) X_ols mod = sm.OLS(diabetes_y_train, sm.add_constant(diabetes_X_train)) res = mod.fit() print(res.summary()) import pandas as pd beta=np.linalg.inv(X_ols.T.dot(X_ols)).dot(X_ols.T.dot(diabetes_y_train)) pd.Series(beta)