from sklearn import datasets
# Load the diabetes dataset
diabetes = datasets.load_diabetes()

X = diabetes.data
y = diabetes.target
print X.shape
print y.shape

from sklearn.utils import shuffle
X, y = shuffle(X, y, random_state=1)
print X.shape
print y.shape

# Use only one column from data
print(X.shape)
X = X[:, 2:3]
print(X.shape)

train_set_size = 250
X_train = X[:train_set_size]  # selects first 250 rows (examples) for train set
X_test = X[train_set_size:]   # selects from row 250 until the last one for test set
print(X_train.shape)
print(X_test.shape)

y_train = y[:train_set_size]   # selects first 250 rows (targets) for train set
y_test = y[train_set_size:]    # selects from row 250 until the last one for test set
print(y_train.shape)
print(y_test.shape)

plt.scatter(X_train, y_train)
plt.scatter(X_test, y_test)
plt.xlabel('Data')
plt.ylabel('Target');

from sklearn import linear_model
regr = linear_model.LinearRegression()

regr.fit(X_train, y_train);

print(regr.coef_)
print(regr.intercept_)

# The mean square error
print("Training error: ", np.mean((regr.predict(X_train) - y_train) ** 2))
print("Test     error: ", np.mean((regr.predict(X_test) - y_test) ** 2))


# Visualises dots, where each dot represent a data exaple and corresponding teacher
plt.scatter(X_train, y_train,  color='black')
# Plots the linear model
plt.plot(X_train, regr.predict(X_train), color='blue', linewidth=3);
plt.xlabel('Data')
plt.ylabel('Target')

# Visualises dots, where each dot represent a data exaple and corresponding teacher
plt.scatter(X_test, y_test,  color='black')
# Plots the linear model
plt.plot(X_test, regr.predict(X_test), color='blue', linewidth=3);
plt.xlabel('Data')
plt.ylabel('Target');