from sklearn import datasets # Load the diabetes dataset diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target print X.shape print y.shape from sklearn.utils import shuffle X, y = shuffle(X, y, random_state=1) print X.shape print y.shape # Use only one column from data print(X.shape) X = X[:, 2:3] print(X.shape) train_set_size = 250 X_train = X[:train_set_size] # selects first 250 rows (examples) for train set X_test = X[train_set_size:] # selects from row 250 until the last one for test set print(X_train.shape) print(X_test.shape) y_train = y[:train_set_size] # selects first 250 rows (targets) for train set y_test = y[train_set_size:] # selects from row 250 until the last one for test set print(y_train.shape) print(y_test.shape) plt.scatter(X_train, y_train) plt.scatter(X_test, y_test) plt.xlabel('Data') plt.ylabel('Target'); from sklearn import linear_model regr = linear_model.LinearRegression() regr.fit(X_train, y_train); print(regr.coef_) print(regr.intercept_) # The mean square error print("Training error: ", np.mean((regr.predict(X_train) - y_train) ** 2)) print("Test error: ", np.mean((regr.predict(X_test) - y_test) ** 2)) # Visualises dots, where each dot represent a data exaple and corresponding teacher plt.scatter(X_train, y_train, color='black') # Plots the linear model plt.plot(X_train, regr.predict(X_train), color='blue', linewidth=3); plt.xlabel('Data') plt.ylabel('Target') # Visualises dots, where each dot represent a data exaple and corresponding teacher plt.scatter(X_test, y_test, color='black') # Plots the linear model plt.plot(X_test, regr.predict(X_test), color='blue', linewidth=3); plt.xlabel('Data') plt.ylabel('Target');