%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
dataset = pd.read_csv('~/Documents/ML/forestfires.csv')
def data_transform(dataset):
for i in range(0,517):
if dataset['month'][i] == 'jan':
dataset['month'][i] = 1
elif dataset['month'][i] == 'feb':
dataset['month'][i] = 2
elif dataset['month'][i] == 'mar':
dataset['month'][i] = 3
elif dataset['month'][i] == 'apr':
dataset['month'][i] = 4
elif dataset['month'][i] == 'may':
dataset['month'][i] = 5
elif dataset['month'][i] == 'jun':
dataset['month'][i] = 6
elif dataset['month'][i] == 'jul':
dataset['month'][i] = 7
elif dataset['month'][i] == 'aug':
dataset['month'][i] = 8
elif dataset['month'][i] == 'sep':
dataset['month'][i] = 9
elif dataset['month'][i] == 'oct':
dataset['month'][i] = 10
elif dataset['month'][i] == 'nov':
dataset['month'][i] = 11
elif dataset['month'][i] == 'dec':
dataset['month'][i] = 12
for i in range(0,517):
if dataset['day'][i] == 'mon':
dataset['day'][i] = 1
elif dataset['day'][i] == 'tue':
dataset['day'][i] = 2
elif dataset['day'][i] == 'wed':
dataset['day'][i] = 3
elif dataset['day'][i] == 'thu':
dataset['day'][i] = 4
elif dataset['day'][i] == 'fri':
dataset['day'][i] = 5
elif dataset['day'][i] == 'sat':
dataset['day'][i] = 6
elif dataset['day'][i] == 'sun':
dataset['day'][i] = 7
return dataset
data_transform(dataset)
/home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:22: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:18: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:20: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy # Remove the CWD from sys.path while we load stuff. /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:14: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:16: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy app.launch_new_instance() /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy after removing the cwd from sys.path. /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:26: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:12: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy if sys.path[0] == '': /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:24: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:39: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:33: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:41: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:31: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:35: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy /home/lizanoskova/.local/lib/python2.7/site-packages/ipykernel_launcher.py:37: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
X | Y | month | day | FFMC | DMC | DC | ISI | temp | RH | wind | rain | area | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7 | 5 | 3 | 5 | 86.2 | 26.2 | 94.3 | 5.1 | 8.2 | 51 | 6.7 | 0.0 | 0.00 |
1 | 7 | 4 | 10 | 2 | 90.6 | 35.4 | 669.1 | 6.7 | 18.0 | 33 | 0.9 | 0.0 | 0.00 |
2 | 7 | 4 | 10 | 6 | 90.6 | 43.7 | 686.9 | 6.7 | 14.6 | 33 | 1.3 | 0.0 | 0.00 |
3 | 8 | 6 | 3 | 5 | 91.7 | 33.3 | 77.5 | 9.0 | 8.3 | 97 | 4.0 | 0.2 | 0.00 |
4 | 8 | 6 | 3 | 7 | 89.3 | 51.3 | 102.2 | 9.6 | 11.4 | 99 | 1.8 | 0.0 | 0.00 |
5 | 8 | 6 | 8 | 7 | 92.3 | 85.3 | 488.0 | 14.7 | 22.2 | 29 | 5.4 | 0.0 | 0.00 |
6 | 8 | 6 | 8 | 1 | 92.3 | 88.9 | 495.6 | 8.5 | 24.1 | 27 | 3.1 | 0.0 | 0.00 |
7 | 8 | 6 | 8 | 1 | 91.5 | 145.4 | 608.2 | 10.7 | 8.0 | 86 | 2.2 | 0.0 | 0.00 |
8 | 8 | 6 | 9 | 2 | 91.0 | 129.5 | 692.6 | 7.0 | 13.1 | 63 | 5.4 | 0.0 | 0.00 |
9 | 7 | 5 | 9 | 6 | 92.5 | 88.0 | 698.6 | 7.1 | 22.8 | 40 | 4.0 | 0.0 | 0.00 |
10 | 7 | 5 | 9 | 6 | 92.5 | 88.0 | 698.6 | 7.1 | 17.8 | 51 | 7.2 | 0.0 | 0.00 |
11 | 7 | 5 | 9 | 6 | 92.8 | 73.2 | 713.0 | 22.6 | 19.3 | 38 | 4.0 | 0.0 | 0.00 |
12 | 6 | 5 | 8 | 5 | 63.5 | 70.8 | 665.3 | 0.8 | 17.0 | 72 | 6.7 | 0.0 | 0.00 |
13 | 6 | 5 | 9 | 1 | 90.9 | 126.5 | 686.5 | 7.0 | 21.3 | 42 | 2.2 | 0.0 | 0.00 |
14 | 6 | 5 | 9 | 3 | 92.9 | 133.3 | 699.6 | 9.2 | 26.4 | 21 | 4.5 | 0.0 | 0.00 |
15 | 6 | 5 | 9 | 5 | 93.3 | 141.2 | 713.9 | 13.9 | 22.9 | 44 | 5.4 | 0.0 | 0.00 |
16 | 5 | 5 | 3 | 6 | 91.7 | 35.8 | 80.8 | 7.8 | 15.1 | 27 | 5.4 | 0.0 | 0.00 |
17 | 8 | 5 | 10 | 1 | 84.9 | 32.8 | 664.2 | 3.0 | 16.7 | 47 | 4.9 | 0.0 | 0.00 |
18 | 6 | 4 | 3 | 3 | 89.2 | 27.9 | 70.8 | 6.3 | 15.9 | 35 | 4.0 | 0.0 | 0.00 |
19 | 6 | 4 | 4 | 6 | 86.3 | 27.4 | 97.1 | 5.1 | 9.3 | 44 | 4.5 | 0.0 | 0.00 |
20 | 6 | 4 | 9 | 2 | 91.0 | 129.5 | 692.6 | 7.0 | 18.3 | 40 | 2.7 | 0.0 | 0.00 |
21 | 5 | 4 | 9 | 1 | 91.8 | 78.5 | 724.3 | 9.2 | 19.1 | 38 | 2.7 | 0.0 | 0.00 |
22 | 7 | 4 | 6 | 7 | 94.3 | 96.3 | 200.0 | 56.1 | 21.0 | 44 | 4.5 | 0.0 | 0.00 |
23 | 7 | 4 | 8 | 6 | 90.2 | 110.9 | 537.4 | 6.2 | 19.5 | 43 | 5.8 | 0.0 | 0.00 |
24 | 7 | 4 | 8 | 6 | 93.5 | 139.4 | 594.2 | 20.3 | 23.7 | 32 | 5.8 | 0.0 | 0.00 |
25 | 7 | 4 | 8 | 7 | 91.4 | 142.4 | 601.4 | 10.6 | 16.3 | 60 | 5.4 | 0.0 | 0.00 |
26 | 7 | 4 | 9 | 5 | 92.4 | 117.9 | 668.0 | 12.2 | 19.0 | 34 | 5.8 | 0.0 | 0.00 |
27 | 7 | 4 | 9 | 1 | 90.9 | 126.5 | 686.5 | 7.0 | 19.4 | 48 | 1.3 | 0.0 | 0.00 |
28 | 6 | 3 | 9 | 6 | 93.4 | 145.4 | 721.4 | 8.1 | 30.2 | 24 | 2.7 | 0.0 | 0.00 |
29 | 6 | 3 | 9 | 7 | 93.5 | 149.3 | 728.6 | 8.1 | 22.8 | 39 | 3.6 | 0.0 | 0.00 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
487 | 5 | 4 | 8 | 2 | 95.1 | 141.3 | 605.8 | 17.7 | 26.4 | 34 | 3.6 | 0.0 | 16.40 |
488 | 4 | 4 | 8 | 2 | 95.1 | 141.3 | 605.8 | 17.7 | 19.4 | 71 | 7.6 | 0.0 | 46.70 |
489 | 4 | 4 | 8 | 3 | 95.1 | 141.3 | 605.8 | 17.7 | 20.6 | 58 | 1.3 | 0.0 | 0.00 |
490 | 4 | 4 | 8 | 3 | 95.1 | 141.3 | 605.8 | 17.7 | 28.7 | 33 | 4.0 | 0.0 | 0.00 |
491 | 4 | 4 | 8 | 4 | 95.8 | 152.0 | 624.1 | 13.8 | 32.4 | 21 | 4.5 | 0.0 | 0.00 |
492 | 1 | 3 | 8 | 5 | 95.9 | 158.0 | 633.6 | 11.3 | 32.4 | 27 | 2.2 | 0.0 | 0.00 |
493 | 1 | 3 | 8 | 5 | 95.9 | 158.0 | 633.6 | 11.3 | 27.5 | 29 | 4.5 | 0.0 | 43.32 |
494 | 6 | 6 | 8 | 6 | 96.0 | 164.0 | 643.0 | 14.0 | 30.8 | 30 | 4.9 | 0.0 | 8.59 |
495 | 6 | 6 | 8 | 1 | 96.2 | 175.5 | 661.8 | 16.8 | 23.9 | 42 | 2.2 | 0.0 | 0.00 |
496 | 4 | 5 | 8 | 1 | 96.2 | 175.5 | 661.8 | 16.8 | 32.6 | 26 | 3.1 | 0.0 | 2.77 |
497 | 3 | 4 | 8 | 2 | 96.1 | 181.1 | 671.2 | 14.3 | 32.3 | 27 | 2.2 | 0.0 | 14.68 |
498 | 6 | 5 | 8 | 2 | 96.1 | 181.1 | 671.2 | 14.3 | 33.3 | 26 | 2.7 | 0.0 | 40.54 |
499 | 7 | 5 | 8 | 2 | 96.1 | 181.1 | 671.2 | 14.3 | 27.3 | 63 | 4.9 | 6.4 | 10.82 |
500 | 8 | 6 | 8 | 2 | 96.1 | 181.1 | 671.2 | 14.3 | 21.6 | 65 | 4.9 | 0.8 | 0.00 |
501 | 7 | 5 | 8 | 2 | 96.1 | 181.1 | 671.2 | 14.3 | 21.6 | 65 | 4.9 | 0.8 | 0.00 |
502 | 4 | 4 | 8 | 2 | 96.1 | 181.1 | 671.2 | 14.3 | 20.7 | 69 | 4.9 | 0.4 | 0.00 |
503 | 2 | 4 | 8 | 3 | 94.5 | 139.4 | 689.1 | 20.0 | 29.2 | 30 | 4.9 | 0.0 | 1.95 |
504 | 4 | 3 | 8 | 3 | 94.5 | 139.4 | 689.1 | 20.0 | 28.9 | 29 | 4.9 | 0.0 | 49.59 |
505 | 1 | 2 | 8 | 4 | 91.0 | 163.2 | 744.4 | 10.1 | 26.7 | 35 | 1.8 | 0.0 | 5.80 |
506 | 1 | 2 | 8 | 5 | 91.0 | 166.9 | 752.6 | 7.1 | 18.5 | 73 | 8.5 | 0.0 | 0.00 |
507 | 2 | 4 | 8 | 5 | 91.0 | 166.9 | 752.6 | 7.1 | 25.9 | 41 | 3.6 | 0.0 | 0.00 |
508 | 1 | 2 | 8 | 5 | 91.0 | 166.9 | 752.6 | 7.1 | 25.9 | 41 | 3.6 | 0.0 | 0.00 |
509 | 5 | 4 | 8 | 5 | 91.0 | 166.9 | 752.6 | 7.1 | 21.1 | 71 | 7.6 | 1.4 | 2.17 |
510 | 6 | 5 | 8 | 5 | 91.0 | 166.9 | 752.6 | 7.1 | 18.2 | 62 | 5.4 | 0.0 | 0.43 |
511 | 8 | 6 | 8 | 7 | 81.6 | 56.7 | 665.6 | 1.9 | 27.8 | 35 | 2.7 | 0.0 | 0.00 |
512 | 4 | 3 | 8 | 7 | 81.6 | 56.7 | 665.6 | 1.9 | 27.8 | 32 | 2.7 | 0.0 | 6.44 |
513 | 2 | 4 | 8 | 7 | 81.6 | 56.7 | 665.6 | 1.9 | 21.9 | 71 | 5.8 | 0.0 | 54.29 |
514 | 7 | 4 | 8 | 7 | 81.6 | 56.7 | 665.6 | 1.9 | 21.2 | 70 | 6.7 | 0.0 | 11.16 |
515 | 1 | 4 | 8 | 6 | 94.4 | 146.0 | 614.7 | 11.3 | 25.6 | 42 | 4.0 | 0.0 | 0.00 |
516 | 6 | 3 | 11 | 2 | 79.5 | 3.0 | 106.7 | 1.1 | 11.8 | 31 | 4.5 | 0.0 | 0.00 |
517 rows × 13 columns
X = dataset.iloc[:,0:12]
y = dataset.iloc[:,-1]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train,y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
y_pred = model.predict(X_test)
from sklearn.metrics import mean_squared_error
mean_absolute_error(y_test, y_pred)
16.441192733314047
percentage = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99])
error = np.zeros(10)
for percent in percentage:
for i in range(10):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = percent, random_state = 0)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
error[i] = mean_squared_error(y_test, y_pred)
plt.plot(percentage,error)
plt.xlabel("Percentage of test data")
plt.ylabel("Error")
Text(0,0.5,'Error')