%run ../../common_functions/import_all.py
from common_functions.setup_notebook import set_css_style, setup_matplotlib, config_ipython
from sklearn.linear_model import LinearRegression
config_ipython()
setup_matplotlib()
set_css_style()
This will use the dataset about head size and brain weight, from a 1905 Biometrika paper, see references at the bottom. The data is provided in this repo for convenience, under folder datasets/
.
df = pd.read_csv('../../datasets/head_size_brain_weight.csv')
# Num samples
n = df.count()['Head_size(cm^3)']
# Invoking the regressor (fit the intercept as well)
lr = LinearRegression(fit_intercept=True)
# Getting x as head size columns and y as brain weight column
# Reshaping x from (num_rows,) to (num_rows,1) for the regressor fit to work
# (needed when using only one feature as fit method expects a matrix)
x = df['Head_size(cm^3)'].as_matrix().reshape(n, 1)
y = df['Brain_weight(g)'].as_matrix()
# Fit the model
fit = lr.fit(x, y)
# Plot the data and the fitting line
# Change the label index in the header_index key
plt.scatter(x, y, color='black');
plt.plot(x, fit.predict(x), color='blue')
plt.xlabel('Head size (cm^3)')
plt.ylabel('Brain Weight (g)')
plt.savefig('linreg.png', dpi=200)
plt.show();
# Display the fitted slope and intercept of the fitting line
print('Slope of the fit: ', fit.coef_)
print('Intercept of the fit: ', fit.intercept_)
/Users/martina/Desktop/Mallzee/repos/plantation/venv/lib/python3.7/site-packages/ipykernel_launcher.py:10: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead. # Remove the CWD from sys.path while we load stuff. /Users/martina/Desktop/Mallzee/repos/plantation/venv/lib/python3.7/site-packages/ipykernel_launcher.py:11: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead. # This is added back by InteractiveShellApp.init_path()
<matplotlib.collections.PathCollection at 0x11144f910>
[<matplotlib.lines.Line2D at 0x124609d10>]
Text(0.5, 0, 'Head size (cm^3)')
Text(0, 0.5, 'Brain Weight (g)')
Slope of the fit: [0.26342934] Intercept of the fit: 325.5734210494426