import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LinearRegression
import watermark
%load_ext watermark
%matplotlib inline
%watermark -i -n -v -m -g -iv
Python implementation: CPython Python version : 3.8.5 IPython version : 7.19.0 Compiler : Clang 10.0.0 OS : Darwin Release : 21.4.0 Machine : x86_64 Processor : i386 CPU cores : 16 Architecture: 64bit Git hash: 967e6d5ea0aa8fdb11b3a19bce642a60fcedbb9a numpy : 1.19.2 pandas : 1.1.3 matplotlib: 3.3.2 json : 2.0.9 watermark : 2.1.0 sklearn : 0.0
plt.style.use('./d4sci.mplstyle')
iris = pd.read_csv('data/iris.csv')
iris
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
Split the dataset across species for convenience
setosa = iris[['sepal_width', 'petal_width']][iris['species'] == 'setosa']
versicolor = iris[['sepal_width', 'petal_width']][iris['species'] == 'versicolor']
virginica = iris[['sepal_width', 'petal_width']][iris['species'] == 'virginica']
lm_setosa = LinearRegression()
lm_setosa.fit(setosa['sepal_width'].values.reshape(-1,1), setosa['petal_width'])
y_setosa = lm_setosa.predict(setosa['sepal_width'].values.reshape(-1,1))
lm_versicolor = LinearRegression()
lm_versicolor.fit(versicolor['sepal_width'].values.reshape(-1,1), versicolor['petal_width'])
y_versicolor = lm_versicolor.predict(versicolor['sepal_width'].values.reshape(-1,1))
lm_virginica = LinearRegression()
lm_virginica.fit(virginica['sepal_width'].values.reshape(-1,1), virginica['petal_width'])
y_virginica = lm_virginica.predict(virginica['sepal_width'].values.reshape(-1,1))
lm_full = LinearRegression()
lm_full.fit(iris['sepal_width'].values.reshape(-1,1), iris['petal_width'])
y_full = lm_full.predict(iris['sepal_width'].values.reshape(-1,1))
fig, axs = plt.subplots(ncols=2, sharey=True)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
setosa.plot.scatter(x='sepal_width', y='petal_width', label='setosa', ax=axs[0], c=colors[0])
versicolor.plot.scatter(x='sepal_width', y='petal_width', label='versicolor', ax=axs[0], c=colors[1])
virginica.plot.scatter(x='sepal_width', y='petal_width', label='virginica', ax=axs[0], c=colors[2])
l4, = axs[0].plot(iris['sepal_width'].values.reshape(-1,1), y_full, '-', c=colors[3])
setosa.plot.scatter(x='sepal_width', y='petal_width', ax=axs[1], c=colors[0])
versicolor.plot.scatter(x='sepal_width', y='petal_width', ax=axs[1], c=colors[1])
virginica.plot.scatter(x='sepal_width', y='petal_width', ax=axs[1], c=colors[2])
l1, = axs[1].plot(setosa['sepal_width'].values.reshape(-1,1), y_setosa, '-', c=colors[0])
l2, = axs[1].plot(versicolor['sepal_width'].values.reshape(-1,1), y_versicolor, '-', c=colors[1])
l3, = axs[1].plot(virginica['sepal_width'].values.reshape(-1,1), y_virginica, '-', c=colors[2])
axs[0].set_xlabel('Sepal Width')
axs[1].set_xlabel('Sepal Width')
axs[0].set_ylabel('Petal Width')
fig.subplots_adjust(bottom=0.3, wspace=0.33)
axs[0].legend(handles = [l1, l2, l3, l4] , labels=['Setosa', 'Versicolor', 'Virginica', 'Total'],
loc='lower left', bbox_to_anchor=(0, -0.4), ncol=2, fancybox=True, shadow=False)
<matplotlib.legend.Legend at 0x7fa800b226d0>
reduced = iris[iris['species'] != 'setosa'].copy()
lm_reduced = LinearRegression()
lm_reduced.fit(reduced['sepal_width'].values.reshape(-1,1), reduced['petal_width'])
y_reduced = lm_reduced.predict(reduced['sepal_width'].values.reshape(-1,1))
fig, axs = plt.subplots(ncols=1, sharey=True)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
versicolor.plot.scatter(x='sepal_width', y='petal_width', ax=axs, c=colors[1])
virginica.plot.scatter(x='sepal_width', y='petal_width', ax=axs, c=colors[2])
axs.plot(versicolor['sepal_width'].values.reshape(-1,1), y_versicolor, '-', c=colors[1], label='versicolor')
axs.plot(virginica['sepal_width'].values.reshape(-1,1), y_virginica, '-', c=colors[2], label='virginica')
axs.plot(reduced['sepal_width'].values.reshape(-1,1), y_reduced, '-', c=colors[3], label='reduced')
axs.set_xlabel('Sepal Width')
axs.set_ylabel('Petal Width')
plt.legend()
<matplotlib.legend.Legend at 0x7fa7b866aa00>