from sklearn import datasets
# Load the diabetes dataset
diabetes = datasets.load_diabetes()

X = diabetes.data
y = diabetes.target
print X.shape
print y.shape

from sklearn.utils import shuffle
X, y = shuffle(X, y, random_state=1)
print X.shape
print y.shape

# Use only one column from data
print(X.shape)
X = X[:, 2:3]
print(X.shape)

train_set_size = 250
X_train = X[:train_set_size]  # selects first 250 rows (examples) for train set
X_test = X[train_set_size:]   # selects from row 250 until the last one for test set
print(X_train.shape)
print(X_test.shape)

y_train = y[:train_set_size]   # selects first 250 rows (targets) for train set
y_test = y[train_set_size:]    # selects from row 250 until the last one for test set
print(y_train.shape)
print(y_test.shape)

plt.scatter(X_train, y_train)
plt.scatter(X_test, y_test)
plt.xlabel('Data')
plt.ylabel('Target');

from sklearn import linear_model
regr = linear_model.LinearRegression()

regr.fit(X_train, y_train);

print(regr.coef_)
print(regr.intercept_)

# The mean square error
print("Training error: ", np.mean((regr.predict(X_train) - y_train) ** 2))
print("Test     error: ", np.mean((regr.predict(X_test) - y_test) ** 2))


# Visualises dots, where each dot represent a data exaple and corresponding teacher
plt.scatter(X_train, y_train,  color='black')
# Plots the linear model
plt.plot(X_train, regr.predict(X_train), color='blue', linewidth=3);
plt.xlabel('Data')
plt.ylabel('Target')

# Visualises dots, where each dot represent a data exaple and corresponding teacher
plt.scatter(X_test, y_test,  color='black')
# Plots the linear model
plt.plot(X_test, regr.predict(X_test), color='blue', linewidth=3);
plt.xlabel('Data')
plt.ylabel('Target');

import pandas as pd
Xdf = pd.DataFrame(diabetes.data)
Xdf

ydf = pd.DataFrame(diabetes.target)
ydf

multi_regression = regr.fit(Xdf, ydf)

print(regr.coef_)
coef = regr.coef_
print(regr.intercept_)

print("error: ", np.mean((regr.predict(Xdf) - ydf) ** 2))

Xdf_alt = Xdf.iloc[:, 1:]
multi_regression_alt = regr.fit(Xdf_alt, ydf)
print(regr.coef_)
coef_alt = regr.coef_
print(regr.intercept_)
print("error: ", np.mean((regr.predict(Xdf_alt) - ydf) ** 2))

Xdf_alt_2 = Xdf.iloc[:, (0,1,2,3,5,6,7,8,9)]
multi_regression_alt = regr.fit(Xdf_alt_2, ydf)
print(regr.coef_)
coef_alt_2 = regr.coef_
print(regr.intercept_)
print("error: ", np.mean((regr.predict(Xdf_alt_2) - ydf) ** 2))

plot(range(1,10), coef_alt[0], label = "alt")
plot([0,1,2,3,5,6,7,8,9], coef_alt_2[0], label = "alt 2")
plot(range(10), coef[0], label = "regression")
grid()
legend(loc = 3)

len(coef_alt[0])

len(coef)