from sklearn import datasets # Load the diabetes dataset diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target print X.shape print y.shape from sklearn.utils import shuffle X, y = shuffle(X, y, random_state=1) print X.shape print y.shape # Use only one column from data print(X.shape) X = X[:, 2:3] print(X.shape) train_set_size = 250 X_train = X[:train_set_size] # selects first 250 rows (examples) for train set X_test = X[train_set_size:] # selects from row 250 until the last one for test set print(X_train.shape) print(X_test.shape) y_train = y[:train_set_size] # selects first 250 rows (targets) for train set y_test = y[train_set_size:] # selects from row 250 until the last one for test set print(y_train.shape) print(y_test.shape) plt.scatter(X_train, y_train) plt.scatter(X_test, y_test) plt.xlabel('Data') plt.ylabel('Target'); from sklearn import linear_model regr = linear_model.LinearRegression() regr.fit(X_train, y_train); print(regr.coef_) print(regr.intercept_) # The mean square error print("Training error: ", np.mean((regr.predict(X_train) - y_train) ** 2)) print("Test error: ", np.mean((regr.predict(X_test) - y_test) ** 2)) # Visualises dots, where each dot represent a data exaple and corresponding teacher plt.scatter(X_train, y_train, color='black') # Plots the linear model plt.plot(X_train, regr.predict(X_train), color='blue', linewidth=3); plt.xlabel('Data') plt.ylabel('Target') # Visualises dots, where each dot represent a data exaple and corresponding teacher plt.scatter(X_test, y_test, color='black') # Plots the linear model plt.plot(X_test, regr.predict(X_test), color='blue', linewidth=3); plt.xlabel('Data') plt.ylabel('Target'); import pandas as pd Xdf = pd.DataFrame(diabetes.data) Xdf ydf = pd.DataFrame(diabetes.target) ydf multi_regression = regr.fit(Xdf, ydf) print(regr.coef_) coef = regr.coef_ print(regr.intercept_) print("error: ", np.mean((regr.predict(Xdf) - ydf) ** 2)) Xdf_alt = Xdf.iloc[:, 1:] multi_regression_alt = regr.fit(Xdf_alt, ydf) print(regr.coef_) coef_alt = regr.coef_ print(regr.intercept_) print("error: ", np.mean((regr.predict(Xdf_alt) - ydf) ** 2)) Xdf_alt_2 = Xdf.iloc[:, (0,1,2,3,5,6,7,8,9)] multi_regression_alt = regr.fit(Xdf_alt_2, ydf) print(regr.coef_) coef_alt_2 = regr.coef_ print(regr.intercept_) print("error: ", np.mean((regr.predict(Xdf_alt_2) - ydf) ** 2)) plot(range(1,10), coef_alt[0], label = "alt") plot([0,1,2,3,5,6,7,8,9], coef_alt_2[0], label = "alt 2") plot(range(10), coef[0], label = "regression") grid() legend(loc = 3) len(coef_alt[0]) len(coef)