In [1]:

#importing libraries 
import numpy as np 
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:

import warnings
warnings.filterwarnings("ignore")

In [3]:

#Loading the Dataset
data = pd.read_csv('forecast.csv', sep=',')
data.head() 

Out[3]:

	YEAR	Annual	Jan-Feb	Mar-May	Jun-Sept	Oct-Dec	Annual_mavg	Jan_mavg	Mar_mavg	Jun_mavg	Oct_mavg
0	1878	26.775000	23.90	28.133333	27.475	26.400000	NaN	NaN	NaN	NaN	NaN
1	1879	26.041667	24.05	27.333333	26.825	25.033333	NaN	NaN	NaN	NaN	NaN
2	1880	26.608333	23.45	28.333333	26.875	26.633333	NaN	NaN	NaN	NaN	NaN
3	1881	26.883333	25.15	28.066667	27.300	26.300000	NaN	NaN	NaN	NaN	NaN
4	1882	26.300000	24.45	27.733333	26.650	25.633333	26.521667	24.2	27.92	27.025	26.0

In [4]:

#dropping missing values in mavg columns'
data=data.dropna(how='any')
data.head()

Out[4]:

	YEAR	Annual	Jan-Feb	Mar-May	Jun-Sept	Oct-Dec	Annual_mavg	Jan_mavg	Mar_mavg	Jun_mavg	Oct_mavg
4	1882	26.300000	24.45	27.733333	26.650	25.633333	26.521667	24.20	27.920000	27.025	26.000000
5	1883	26.100000	23.95	27.566667	26.725	25.233333	26.386667	24.21	27.806667	26.875	25.766667
6	1884	26.075000	22.85	27.366667	27.075	25.600000	26.393333	23.97	27.813333	26.925	25.880000
7	1885	26.275000	22.80	26.966667	27.450	26.333333	26.326667	23.84	27.540000	27.040	25.820000
8	1886	26.416667	23.30	27.833333	27.200	26.033333	26.233333	23.47	27.493333	27.020	25.766667

In [5]:

data.shape

Out[5]:

(138, 11)

In [6]:

data.isnull().sum()

Out[6]:

YEAR           0
Annual         0
Jan-Feb        0
Mar-May        0
Jun-Sept       0
Oct-Dec        0
Annual_mavg    0
Jan_mavg       0
Mar_mavg       0
Jun_mavg       0
Oct_mavg       0
dtype: int64

In [7]:

data.columns.values

Out[7]:

array(['YEAR', 'Annual', 'Jan-Feb', 'Mar-May', 'Jun-Sept', 'Oct-Dec',
       'Annual_mavg', 'Jan_mavg', 'Mar_mavg', 'Jun_mavg', 'Oct_mavg'],
      dtype=object)

In [8]:

line1=data['Annual_mavg'].plot()
line2=data['Jan_mavg'].plot()
line3=data['Mar_mavg'].plot()
line4=data['Jun_mavg'].plot()
line5=data['Oct_mavg'].plot()
plt.legend(['Annual','Jan-feb','March-may','June-sept','Oct-dec'])

Out[8]:

<matplotlib.legend.Legend at 0x7f0bd0f3a550>

In [9]:

#summary stats- checking mean and variance by plotting hist
data['Annual_mavg'].hist()

Out[9]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f0bcfc0be50>

In [10]:

sample_check=data['Annual_mavg'].values

In [11]:

split = round(len(sample_check) / 2)
sample_check1, sample_check2 = sample_check[0:split], sample_check[split:]
mean1, mean2 = sample_check1.mean(), sample_check2.mean()
var1, var2 = sample_check1.var(), sample_check2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

mean1=26.860471, mean2=27.608631
variance1=0.081734, variance2=0.060946

In [12]:

#Calculating ADF TEST via p-value interpretation
from pandas import read_csv
from statsmodels.tsa.stattools import adfuller
result = adfuller(sample_check)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
	print('\t%s: %.3f' % (key, value))

ADF Statistic: -1.078482
p-value: 0.723576
Critical Values:
	1%: -3.483
	5%: -2.885
	10%: -2.579

In [13]:

pip install statsmodels

Requirement already satisfied: statsmodels in /usr/local/lib/python3.7/dist-packages (0.10.2)
Requirement already satisfied: pandas>=0.19 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (1.1.5)
Requirement already satisfied: scipy>=0.18 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (1.4.1)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (1.19.5)
Requirement already satisfied: patsy>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (0.5.1)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.19->statsmodels) (2018.9)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.19->statsmodels) (2.8.1)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from patsy>=0.4.0->statsmodels) (1.15.0)

In [14]:

import statsmodels.api as sm
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
sample_check=data['Annual_mavg'].values
decomposition =sm.tsa.seasonal_decompose(sample_check, model='additive',freq=5)
fig = decomposition.plot()
plt.show()

In [15]:

#parameters selection for ARIMA
import itertools 
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2],12) for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

Examples of parameter combinations for Seasonal ARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 12)
SARIMAX: (0, 0, 1) x (0, 1, 0, 12)
SARIMAX: (0, 1, 0) x (0, 1, 1, 12)
SARIMAX: (0, 1, 0) x (1, 0, 0, 12)

In [16]:

#fitting arima model on annual mavg column
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(sample_check,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)
            results = mod.fit()
            print('ARIMA{}x{}12- AIC:{}'.format(param, param_seasonal, results.aic))
            
        except:
            continue

ARIMA(0, 0, 0)x(0, 0, 0, 12)12- AIC:1296.309382893793
ARIMA(0, 0, 0)x(0, 0, 1, 12)12- AIC:1052.4235828699072
ARIMA(0, 0, 0)x(0, 1, 0, 12)12- AIC:9.368840650942388
ARIMA(0, 0, 0)x(0, 1, 1, 12)12- AIC:-12.257093433083444
ARIMA(0, 0, 0)x(1, 0, 0, 12)12- AIC:-40.008612917967106
ARIMA(0, 0, 0)x(1, 0, 1, 12)12- AIC:-99.33541008738071
ARIMA(0, 0, 0)x(1, 1, 0, 12)12- AIC:-11.517651073785487
ARIMA(0, 0, 0)x(1, 1, 1, 12)12- AIC:-10.257051619240336
ARIMA(0, 0, 1)x(0, 0, 0, 12)12- AIC:1106.3824417316682
ARIMA(0, 0, 1)x(0, 0, 1, 12)12- AIC:879.033433549483
ARIMA(0, 0, 1)x(0, 1, 0, 12)12- AIC:-103.24372004254619
ARIMA(0, 0, 1)x(0, 1, 1, 12)12- AIC:-110.7921825195466

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:512: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

ARIMA(0, 0, 1)x(1, 0, 0, 12)12- AIC:-35.80532861285145

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:512: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

ARIMA(0, 0, 1)x(1, 0, 1, 12)12- AIC:-21.2030535266186
ARIMA(0, 0, 1)x(1, 1, 0, 12)12- AIC:-111.43052884321197
ARIMA(0, 0, 1)x(1, 1, 1, 12)12- AIC:-110.68764636032498
ARIMA(0, 1, 0)x(0, 0, 0, 12)12- AIC:-282.2725258059022
ARIMA(0, 1, 0)x(0, 0, 1, 12)12- AIC:-255.86785258021348
ARIMA(0, 1, 0)x(0, 1, 0, 12)12- AIC:-170.00809467080254
ARIMA(0, 1, 0)x(0, 1, 1, 12)12- AIC:-206.8741578353027
ARIMA(0, 1, 0)x(1, 0, 0, 12)12- AIC:-258.7793598258834
ARIMA(0, 1, 0)x(1, 0, 1, 12)12- AIC:-257.7693720308996

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:512: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

ARIMA(0, 1, 0)x(1, 1, 0, 12)12- AIC:-184.65042682527144
ARIMA(0, 1, 0)x(1, 1, 1, 12)12- AIC:-198.24810105395107
ARIMA(0, 1, 1)x(0, 0, 0, 12)12- AIC:-281.4213158441471
ARIMA(0, 1, 1)x(0, 0, 1, 12)12- AIC:-257.0945052379026
ARIMA(0, 1, 1)x(0, 1, 0, 12)12- AIC:-175.3817966533949
ARIMA(0, 1, 1)x(0, 1, 1, 12)12- AIC:-209.33185333244845
ARIMA(0, 1, 1)x(1, 0, 0, 12)12- AIC:-260.85426401304363
ARIMA(0, 1, 1)x(1, 0, 1, 12)12- AIC:-264.0396185069117
ARIMA(0, 1, 1)x(1, 1, 0, 12)12- AIC:-194.36568737445552
ARIMA(0, 1, 1)x(1, 1, 1, 12)12- AIC:-198.77172729770186
ARIMA(1, 0, 0)x(0, 0, 0, 12)12- AIC:-282.9032369335381
ARIMA(1, 0, 0)x(0, 0, 1, 12)12- AIC:-259.5891777018501

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:512: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

ARIMA(1, 0, 0)x(0, 1, 0, 12)12- AIC:-176.24224846613126
ARIMA(1, 0, 0)x(0, 1, 1, 12)12- AIC:-208.38276600143993
ARIMA(1, 0, 0)x(1, 0, 0, 12)12- AIC:-259.3568779591814
ARIMA(1, 0, 0)x(1, 0, 1, 12)12- AIC:-260.03253108661886
ARIMA(1, 0, 0)x(1, 1, 0, 12)12- AIC:-187.92722407353688
ARIMA(1, 0, 0)x(1, 1, 1, 12)12- AIC:-200.02940047867904
ARIMA(1, 0, 1)x(0, 0, 0, 12)12- AIC:-284.3559086676155
ARIMA(1, 0, 1)x(0, 0, 1, 12)12- AIC:-257.69792602224214
ARIMA(1, 0, 1)x(0, 1, 0, 12)12- AIC:-187.43241942616322
ARIMA(1, 0, 1)x(0, 1, 1, 12)12- AIC:-211.9559526095738
ARIMA(1, 0, 1)x(1, 0, 0, 12)12- AIC:-260.65857495106366
ARIMA(1, 0, 1)x(1, 0, 1, 12)12- AIC:-265.98182151995366
ARIMA(1, 0, 1)x(1, 1, 0, 12)12- AIC:-200.7796548478161
ARIMA(1, 0, 1)x(1, 1, 1, 12)12- AIC:-201.97341085181284
ARIMA(1, 1, 0)x(0, 0, 0, 12)12- AIC:-284.29442943999175
ARIMA(1, 1, 0)x(0, 0, 1, 12)12- AIC:-257.8928320532511
ARIMA(1, 1, 0)x(0, 1, 0, 12)12- AIC:-174.80072105804192
ARIMA(1, 1, 0)x(0, 1, 1, 12)12- AIC:-211.0466048314583
ARIMA(1, 1, 0)x(1, 0, 0, 12)12- AIC:-257.79603560985964
ARIMA(1, 1, 0)x(1, 0, 1, 12)12- AIC:-264.44801883626036
ARIMA(1, 1, 0)x(1, 1, 0, 12)12- AIC:-188.107642881015
ARIMA(1, 1, 0)x(1, 1, 1, 12)12- AIC:-200.62223913763313
ARIMA(1, 1, 1)x(0, 0, 0, 12)12- AIC:-279.4275203191638
ARIMA(1, 1, 1)x(0, 0, 1, 12)12- AIC:-255.1275582819045
ARIMA(1, 1, 1)x(0, 1, 0, 12)12- AIC:-174.35577500705958
ARIMA(1, 1, 1)x(0, 1, 1, 12)12- AIC:-207.38521120328798
ARIMA(1, 1, 1)x(1, 0, 0, 12)12- AIC:-255.8508205684696
ARIMA(1, 1, 1)x(1, 0, 1, 12)12- AIC:-262.13260154597884
ARIMA(1, 1, 1)x(1, 1, 0, 12)12- AIC:-190.2470731089748
ARIMA(1, 1, 1)x(1, 1, 1, 12)12- AIC:-196.79879619340096

In [18]:

#SARIMA-KD
mod = sm.tsa.statespace.SARIMAX(sample_check,
                                order=(0, 1, 1),
                                seasonal_order=(0, 1, 0, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])

==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ma.L1          0.3073      0.086      3.553      0.000       0.138       0.477
sigma2         0.0136      0.002      6.490      0.000       0.010       0.018
==============================================================================

In [17]:

from statsmodels.tsa.arima_model import ARIMA  #NO
model = ARIMA(sample_check, order=(0,1,1))
model_fit = model.fit(disp=0)
print(model_fit.summary())

                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                    D.y   No. Observations:                  137
Model:                 ARIMA(0, 1, 1)   Log Likelihood                 145.230
Method:                       css-mle   S.D. of innovations              0.084
Date:                Sat, 27 Feb 2021   AIC                           -284.459
Time:                        06:02:18   BIC                           -275.699
Sample:                             1   HQIC                          -280.899
                                                                              
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0103      0.008      1.246      0.215      -0.006       0.027
ma.L1.D.y      0.1612      0.083      1.943      0.054      -0.001       0.324
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
MA.1           -6.2022           +0.0000j            6.2022            0.5000
-----------------------------------------------------------------------------

In [30]:

#getting forecast
# Actual vs Fitted

model_fit.plot_predict(dynamic=False)
plt.show()

In [24]:

#out of time cross validation
from statsmodels.tsa.stattools import acf

# Create Training and Test
train = data['Annual_mavg'].iloc[:103]
test = data['Annual_mavg'].iloc[103:]


#now build the ARIMA model on training dataset, forecast and plot it.
model = ARIMA(train, order=(7, 2, 1))  
fitted = model.fit(disp=-1)  
# Forecast
fc, se, conf = fitted.forecast(35, alpha=0.05)  # 95% conf

# Make as pandas series
fc_series = pd.Series(fc, index=test.index)
lower_series = pd.Series(conf[:, 0], index=test.index)
upper_series = pd.Series(conf[:, 1], index=test.index)

# Plot
plt.figure(figsize=(12,5), dpi=100)
plt.plot(train, label='training')
plt.plot(test, label='actual')
plt.plot(fc_series, label='forecast')
plt.fill_between(lower_series.index, lower_series, upper_series, 
                 color='k', alpha=.15)
plt.title('Forecast vs Actuals')
plt.legend(loc='upper left', fontsize=8)
plt.show()

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

In [25]:

fc_series   #721

Out[25]:

107    27.417890
108    27.402556
109    27.464561
110    27.563547
111    27.567797
112    27.568134
113    27.554812
114    27.515729
115    27.439658
116    27.427558
117    27.439256
118    27.459968
119    27.474911
120    27.514961
121    27.517909
122    27.497286
123    27.465843
124    27.452741
125    27.428501
126    27.424209
127    27.434979
128    27.457485
129    27.460856
130    27.465645
131    27.459398
132    27.447267
133    27.425267
134    27.418449
135    27.414824
136    27.418462
137    27.421785
138    27.431780
139    27.431370
140    27.427116
141    27.417377
dtype: float64

In [27]:

#plotting using plotly  #721
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
y=fc_series
x=[2020,2021,2022,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032,2033,2034,
   2035,2035,2036,2037,2038,2039,2040,2041,2042,2043,2044,2045,2046]

#Plotting annual temp readings- Line plot

fig_1 = px.scatter(x=x, y=y[:28],trendline='lowess')
# Edit the layout
fig_1.update_layout(title='forecasted values for Annual Temperature()',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degree celsius)')
fig_1.show()

In [28]:

# Accuracy metrics
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, 'acf1':acf1, 
            'corr':corr, 'minmax':minmax})

forecast_accuracy(fc, test.values)

Out[28]:

{'acf1': 0.8746432945469665,
 'corr': -0.2415009041317794,
 'mae': 0.32863466252394846,
 'mape': 0.01178035990110283,
 'me': -0.32739935864515124,
 'minmax': 0.011780343643582492,
 'mpe': -0.011735425298794015,
 'rmse': 0.3842845711132103}

In [41]:

#JAN-FEB
#importing for jan
jan_series=data['Jan_mavg'].values
data['Jan_mavg'].plot()

Out[41]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f0bb3f860d0>

In [42]:

# ETS Decomposition 

import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8

decomposition=sm.tsa.seasonal_decompose(jan_series,model='additive',freq=5)
fig =decomposition.plot()
plt.show()

In [43]:

#parameters selection for ARIMA
import itertools 
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2],12) for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

Examples of parameter combinations for Seasonal ARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 12)
SARIMAX: (0, 0, 1) x (0, 1, 0, 12)
SARIMAX: (0, 1, 0) x (0, 1, 1, 12)
SARIMAX: (0, 1, 0) x (1, 0, 0, 12)

In [44]:

#fitting arima model on annual mavg column
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(jan_series,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)
            results = mod.fit()
            print('ARIMA{}x{}12- AIC:{}'.format(param, param_seasonal, results.aic))
            
        except:
            continue

ARIMA(0, 0, 0)x(0, 0, 0, 12)12- AIC:1267.0561904121537
ARIMA(0, 0, 0)x(0, 0, 1, 12)12- AIC:1025.5346765973159
ARIMA(0, 0, 0)x(0, 1, 0, 12)12- AIC:171.54779283162563
ARIMA(0, 0, 0)x(0, 1, 1, 12)12- AIC:149.35084174133158

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:512: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

ARIMA(0, 0, 0)x(1, 0, 0, 12)12- AIC:164.32810642003096
ARIMA(0, 0, 0)x(1, 0, 1, 12)12- AIC:97.20807375155154
ARIMA(0, 0, 0)x(1, 1, 0, 12)12- AIC:151.50740116847587
ARIMA(0, 0, 0)x(1, 1, 1, 12)12- AIC:150.39268393937095
ARIMA(0, 0, 1)x(0, 0, 0, 12)12- AIC:1078.4524640583074
ARIMA(0, 0, 1)x(0, 0, 1, 12)12- AIC:853.26252029278
ARIMA(0, 0, 1)x(0, 1, 0, 12)12- AIC:84.65942475064821
ARIMA(0, 0, 1)x(0, 1, 1, 12)12- AIC:62.438617320037416
ARIMA(0, 0, 1)x(1, 0, 0, 12)12- AIC:81.25755918080817
ARIMA(0, 0, 1)x(1, 0, 1, 12)12- AIC:24.112003442395984
ARIMA(0, 0, 1)x(1, 1, 0, 12)12- AIC:63.54210462243084
ARIMA(0, 0, 1)x(1, 1, 1, 12)12- AIC:63.4119346630293
ARIMA(0, 1, 0)x(0, 0, 0, 12)12- AIC:-35.632361279497005
ARIMA(0, 1, 0)x(0, 0, 1, 12)12- AIC:-38.3724079327895
ARIMA(0, 1, 0)x(0, 1, 0, 12)12- AIC:42.54838738142879
ARIMA(0, 1, 0)x(0, 1, 1, 12)12- AIC:-15.024753918793799
ARIMA(0, 1, 0)x(1, 0, 0, 12)12- AIC:-37.80867466148668
ARIMA(0, 1, 0)x(1, 0, 1, 12)12- AIC:-37.02258961012134
ARIMA(0, 1, 0)x(1, 1, 0, 12)12- AIC:6.910269538476256
ARIMA(0, 1, 0)x(1, 1, 1, 12)12- AIC:-12.172744987620927
ARIMA(0, 1, 1)x(0, 0, 0, 12)12- AIC:-33.728334453806575
ARIMA(0, 1, 1)x(0, 0, 1, 12)12- AIC:-36.2577077229085
ARIMA(0, 1, 1)x(0, 1, 0, 12)12- AIC:42.71573696245512
ARIMA(0, 1, 1)x(0, 1, 1, 12)12- AIC:-12.065486851678052
ARIMA(0, 1, 1)x(1, 0, 0, 12)12- AIC:-36.34946572867911
ARIMA(0, 1, 1)x(1, 0, 1, 12)12- AIC:-35.183257077167895
ARIMA(0, 1, 1)x(1, 1, 0, 12)12- AIC:7.591166944465927
ARIMA(0, 1, 1)x(1, 1, 1, 12)12- AIC:-9.420144733799415
ARIMA(1, 0, 0)x(0, 0, 0, 12)12- AIC:-35.062004633180315
ARIMA(1, 0, 0)x(0, 0, 1, 12)12- AIC:-32.353604466747875
ARIMA(1, 0, 0)x(0, 1, 0, 12)12- AIC:33.28056996845523
ARIMA(1, 0, 0)x(0, 1, 1, 12)12- AIC:-16.436973663360718
ARIMA(1, 0, 0)x(1, 0, 0, 12)12- AIC:-36.042803881935654
ARIMA(1, 0, 0)x(1, 0, 1, 12)12- AIC:-34.640158203387024
ARIMA(1, 0, 0)x(1, 1, 0, 12)12- AIC:0.7711173967054483
ARIMA(1, 0, 0)x(1, 1, 1, 12)12- AIC:-14.210923955953291
ARIMA(1, 0, 1)x(0, 0, 0, 12)12- AIC:-31.867846717326188

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:512: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

ARIMA(1, 0, 1)x(0, 0, 1, 12)12- AIC:-34.86506519033499
ARIMA(1, 0, 1)x(0, 1, 0, 12)12- AIC:32.107330693934216
ARIMA(1, 0, 1)x(0, 1, 1, 12)12- AIC:-13.860721261235827
ARIMA(1, 0, 1)x(1, 0, 0, 12)12- AIC:-34.64996523097726

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:512: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

ARIMA(1, 0, 1)x(1, 0, 1, 12)12- AIC:-33.786834162163785
ARIMA(1, 0, 1)x(1, 1, 0, 12)12- AIC:-1.221060764136391
ARIMA(1, 0, 1)x(1, 1, 1, 12)12- AIC:-12.384714617560114
ARIMA(1, 1, 0)x(0, 0, 0, 12)12- AIC:-33.71088449249947
ARIMA(1, 1, 0)x(0, 0, 1, 12)12- AIC:-36.90719880075477
ARIMA(1, 1, 0)x(0, 1, 0, 12)12- AIC:44.42378300043548
ARIMA(1, 1, 0)x(0, 1, 1, 12)12- AIC:-12.644792849364244
ARIMA(1, 1, 0)x(1, 0, 0, 12)12- AIC:-36.96750040874178
ARIMA(1, 1, 0)x(1, 0, 1, 12)12- AIC:-35.463611901141675
ARIMA(1, 1, 0)x(1, 1, 0, 12)12- AIC:8.832328580941478
ARIMA(1, 1, 0)x(1, 1, 1, 12)12- AIC:-10.194968650816122
ARIMA(1, 1, 1)x(0, 0, 0, 12)12- AIC:-43.046913108942405
ARIMA(1, 1, 1)x(0, 0, 1, 12)12- AIC:-52.297283152924884
ARIMA(1, 1, 1)x(0, 1, 0, 12)12- AIC:40.573874158847865
ARIMA(1, 1, 1)x(0, 1, 1, 12)12- AIC:-18.150375851099742
ARIMA(1, 1, 1)x(1, 0, 0, 12)12- AIC:-53.036977205732164
ARIMA(1, 1, 1)x(1, 0, 1, 12)12- AIC:-50.448663357616354
ARIMA(1, 1, 1)x(1, 1, 0, 12)12- AIC:9.122711957638113
ARIMA(1, 1, 1)x(1, 1, 1, 12)12- AIC:-18.214746551963877

In [33]:

pip install statsmodels

Requirement already satisfied: statsmodels in /usr/local/lib/python3.7/dist-packages (0.10.2)
Requirement already satisfied: patsy>=0.4.0 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (0.5.1)
Requirement already satisfied: pandas>=0.19 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (1.1.5)
Requirement already satisfied: scipy>=0.18 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (1.4.1)
Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.7/dist-packages (from statsmodels) (1.19.5)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from patsy>=0.4.0->statsmodels) (1.15.0)
Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.19->statsmodels) (2.8.1)
Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.19->statsmodels) (2018.9)

In [55]:

#model for JAN 
# fit an ARIMA model and plot residual errors
import statsmodels
from pandas import read_csv
from patsy import dmatrices
from statsmodels.tsa.arima_model import ARIMA
from matplotlib import pyplot

model = ARIMA(jan_series, order=(5,2,1))
result = model.fit()
# summary of fit model
print(result.summary())

                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                   D2.y   No. Observations:                  136
Model:                 ARIMA(5, 2, 1)   Log Likelihood                  45.203
Method:                       css-mle   S.D. of innovations              0.168
Date:                Sat, 27 Feb 2021   AIC                            -74.407
Time:                        07:08:51   BIC                            -51.105
Sample:                             2   HQIC                           -64.938
                                                                              
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       4.842e-05      0.000      0.216      0.829      -0.000       0.000
ar.L1.D2.y     0.0067      0.068      0.099      0.922      -0.127       0.140
ar.L2.D2.y    -0.0438      0.068     -0.640      0.524      -0.178       0.090
ar.L3.D2.y    -0.0779      0.068     -1.140      0.256      -0.212       0.056
ar.L4.D2.y     0.0149      0.069      0.216      0.830      -0.121       0.151
ar.L5.D2.y    -0.5962      0.070     -8.578      0.000      -0.732      -0.460
ma.L1.D2.y    -1.0000      0.021    -47.836      0.000      -1.041      -0.959
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            0.8785           -0.6752j            1.1080           -0.1043
AR.2            0.8785           +0.6752j            1.1080            0.1043
AR.3           -1.0945           -0.0000j            1.0945           -0.5000
AR.4           -0.3187           -1.0708j            1.1173           -0.2960
AR.5           -0.3187           +1.0708j            1.1173            0.2960
MA.1            1.0000           +0.0000j            1.0000            0.0000
-----------------------------------------------------------------------------

In [58]:

#out of time cross validation
from statsmodels.tsa.stattools import acf

# Create Training and Test
train = data['Jan_mavg'].iloc[:103]
test = data['Jan_mavg'].iloc[103:]


#now build the ARIMA model on training dataset, forecast and plot it.
model = ARIMA(train, order=(5, 2, 1))  
fitted = model.fit(disp=-1)  
# Forecast
fc, se, conf = fitted.forecast(35, alpha=0.05)  # 95% conf

# Make as pandas series
fc_series = pd.Series(fc, index=test.index)
lower_series = pd.Series(conf[:, 0], index=test.index)
upper_series = pd.Series(conf[:, 1], index=test.index)

# Plot
plt.figure(figsize=(12,5), dpi=100)
plt.plot(train, label='training')
plt.plot(test, label='actual')
plt.plot(fc_series, label='forecast')
plt.fill_between(lower_series.index, lower_series, upper_series, 
                 color='k', alpha=.15)
plt.title('Forecast vs Actuals')
plt.legend(loc='upper left', fontsize=8)
plt.show()

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

In [59]:

#plotting using plotly  #521
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
y=fc_series
x=[2020,2021,2022,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032,2033,2034,
   2035,2035,2036,2037,2038,2039,2040,2041,2042,2043,2044,2045,2046]

#Plotting annual temp readings- Line plot

fig_2 = px.scatter(x=x, y=y[:28],trendline='lowess')
# Edit the layout
fig_1.update_layout(title='forecasted values for JAN-FEB Temperature()',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degree celsius)')
fig_2.show()

In [57]:

# evaluate forecasts
# Accuracy metrics
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, 'acf1':acf1, 
            'corr':corr, 'minmax':minmax})

forecast_accuracy(fc, test.values)

Out[57]:

{'acf1': 0.8350917909665718,
 'corr': 0.6821888128386941,
 'mae': 0.23762389330170128,
 'mape': 0.00852287190921454,
 'me': -0.22286853216098998,
 'minmax': 0.008521829765743005,
 'mpe': -0.007985773584319952,
 'rmse': 0.26928798390924985}

In [122]:

#MARCH-MAY
march_series=data['Mar_mavg'].values
data['Mar_mavg'].plot()

Out[122]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f0bb3005c10>

In [123]:

import statsmodels.api as sm
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
march_series=data['Mar_mavg'].values
decomposition =sm.tsa.seasonal_decompose(march_series, model='additive',freq=5)
fig = decomposition.plot()
plt.show()

In [124]:

#model for march-may
# fit an ARIMA model and plot residual errors
import statsmodels
from pandas import read_csv
from patsy import dmatrices
from statsmodels.tsa.arima_model import ARIMA
from matplotlib import pyplot

# fit model
model = ARIMA(march_series, order=(5,2,2))
result = model.fit()
# summary of fit model
print(result.summary())

                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                   D2.y   No. Observations:                  136
Model:                 ARIMA(5, 2, 2)   Log Likelihood                  95.768
Method:                       css-mle   S.D. of innovations              0.116
Date:                Sat, 27 Feb 2021   AIC                           -173.536
Time:                        08:05:59   BIC                           -147.322
Sample:                             2   HQIC                          -162.883
                                                                              
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const       2.465e-05      0.000      0.144      0.885      -0.000       0.000
ar.L1.D2.y    -0.0397      0.118     -0.337      0.737      -0.271       0.191
ar.L2.D2.y    -0.0767      0.069     -1.106      0.271      -0.213       0.059
ar.L3.D2.y    -0.0450      0.071     -0.631      0.529      -0.185       0.095
ar.L4.D2.y     0.0663      0.070      0.942      0.348      -0.072       0.204
ar.L5.D2.y    -0.5734      0.071     -8.051      0.000      -0.713      -0.434
ma.L1.D2.y    -0.9102      0.145     -6.276      0.000      -1.194      -0.626
ma.L2.D2.y    -0.0898      0.143     -0.627      0.532      -0.371       0.191
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1           -1.0931           -0.0000j            1.0931           -0.5000
AR.2           -0.3090           -1.0559j            1.1002           -0.2953
AR.3           -0.3090           +1.0559j            1.1002            0.2953
AR.4            0.9133           -0.6957j            1.1481           -0.1036
AR.5            0.9133           +0.6957j            1.1481            0.1036
MA.1            1.0000           +0.0000j            1.0000            0.0000
MA.2          -11.1389           +0.0000j           11.1389            0.5000
-----------------------------------------------------------------------------

In [125]:

#out of time cross validation
from statsmodels.tsa.stattools import acf

# Create Training and Test
train = data['Mar_mavg'].iloc[:103]
test = data['Mar_mavg'].iloc[103:]


#now build the ARIMA model on training dataset, forecast and plot it.
model = ARIMA(train, order=(5, 2, 1))  
fitted = model.fit(disp=-1)  
# Forecast
fc, se, conf = fitted.forecast(35, alpha=0.05)  # 95% conf

# Make as pandas series
fc_series = pd.Series(fc, index=test.index)
lower_series = pd.Series(conf[:, 0], index=test.index)
upper_series = pd.Series(conf[:, 1], index=test.index)

# Plot
plt.figure(figsize=(12,5), dpi=100)
plt.plot(train, label='training')
plt.plot(test, label='actual')
plt.plot(fc_series, label='forecast')
plt.fill_between(lower_series.index, lower_series, upper_series, 
                 color='k', alpha=.15)
plt.title('Forecast vs Actuals')
plt.legend(loc='upper left', fontsize=8)
plt.show()

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

In [127]:

#plotting using plotly  #521
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
y=fc_series
x=[2020,2021,2022,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032,2033,2034,
   2035,2035,2036,2037,2038,2039,2040,2041,2042,2043,2044,2045,2046]

#Plotting annual temp readings- Line plot

fig_3 = px.scatter(x=x, y=y[:28],trendline='lowess')
# Edit the layout
fig_3.update_layout(title='forecasted values for March-May Temperature',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degree celsius)')
fig_3.show()

In [85]:

# evaluate forecasts
# Accuracy metrics
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, 'acf1':acf1, 
            'corr':corr, 'minmax':minmax})

forecast_accuracy(fc, test.values)

Out[85]:

{'acf1': 0.8745373022674245,
 'corr': -0.5140640741171221,
 'mae': 0.5257687856318348,
 'mape': 0.018073340163452675,
 'me': -0.49967811163888,
 'minmax': 0.01807096951413678,
 'mpe': -0.01715225378176968,
 'rmse': 0.6128899457971622}

In [86]:

#JUNE-SEPT PREDICTIONS
june_series=data['Jun_mavg'].values
data['Jun_mavg'].plot()

Out[86]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f0baf011590>

In [87]:

import statsmodels.api as sm
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
june_series=data['Jun_mavg'].values
decomposition =sm.tsa.seasonal_decompose(june_series, model='additive',freq=5)
fig = decomposition.plot()
plt.show()

In [98]:

#model for march-may
# fit an ARIMA model and plot residual errors
import statsmodels
from pandas import read_csv
from patsy import dmatrices
from statsmodels.tsa.arima_model import ARIMA
from matplotlib import pyplot

# fit model
model = ARIMA(june_series, order=(5,2,1))
result = model.fit()
# summary of fit model
print(result.summary())

                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                   D2.y   No. Observations:                  136
Model:                 ARIMA(5, 2, 1)   Log Likelihood                 154.892
Method:                       css-mle   S.D. of innovations              0.076
Date:                Sat, 27 Feb 2021   AIC                           -293.785
Time:                        07:31:15   BIC                           -270.484
Sample:                             2   HQIC                          -284.316
                                                                              
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0001      0.000     -0.941      0.348      -0.000       0.000
ar.L1.D2.y    -0.0120      0.078     -0.155      0.877      -0.164       0.140
ar.L2.D2.y    -0.1357      0.077     -1.760      0.081      -0.287       0.015
ar.L3.D2.y     0.0349      0.078      0.449      0.655      -0.118       0.188
ar.L4.D2.y    -0.0454      0.077     -0.590      0.556      -0.196       0.105
ar.L5.D2.y    -0.4307      0.077     -5.605      0.000      -0.581      -0.280
ma.L1.D2.y    -1.0000      0.021    -47.793      0.000      -1.041      -0.959
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            0.9350           -0.7324j            1.1877           -0.1058
AR.2            0.9350           +0.7324j            1.1877            0.1058
AR.3           -0.3558           -1.0845j            1.1413           -0.3005
AR.4           -0.3558           +1.0845j            1.1413            0.3005
AR.5           -1.2637           -0.0000j            1.2637           -0.5000
MA.1            1.0000           +0.0000j            1.0000            0.0000
-----------------------------------------------------------------------------

In [128]:

#out of time cross validation
from statsmodels.tsa.stattools import acf

# Create Training and Test
train = data['Jun_mavg'].iloc[:103]
test = data['Jun_mavg'].iloc[103:]


#now build the ARIMA model on training dataset, forecast and plot it.
model = ARIMA(train, order=(5, 2, 1))  
fitted = model.fit(disp=-1)  
# Forecast
fc, se, conf = fitted.forecast(35, alpha=0.05)  # 95% conf

# Make as pandas series
fc_series = pd.Series(fc, index=test.index)
lower_series = pd.Series(conf[:, 0], index=test.index)
upper_series = pd.Series(conf[:, 1], index=test.index)

# Plot
plt.figure(figsize=(12,5), dpi=100)
plt.plot(train, label='training')
plt.plot(test, label='actual')
plt.plot(fc_series, label='forecast')
plt.fill_between(lower_series.index, lower_series, upper_series, 
                 color='k', alpha=.15)
plt.title('Forecast vs Actuals')
plt.legend(loc='upper left', fontsize=8)
plt.show()

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

In [129]:

#plotting using plotly  #521
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
y=fc_series
x=[2020,2021,2022,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032,2033,2034,
   2035,2035,2036,2037,2038,2039,2040,2041,2042,2043,2044,2045,2046]

#Plotting annual temp readings- Line plot

fig_4 = px.scatter(x=x, y=y[:28],trendline='lowess')
# Edit the layout
fig_4.update_layout(title='forecasted values for JUNE-SEPT Temperature()',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degree celsius)')
fig_4.show()

In [101]:

# evaluate forecasts
# Accuracy metrics
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, 'acf1':acf1, 
            'corr':corr, 'minmax':minmax})

forecast_accuracy(fc, test.values)

Out[101]:

{'acf1': 0.6625679290870549,
 'corr': 0.4312525498730749,
 'mae': 0.09948347912387874,
 'mape': 0.0035367450777698344,
 'me': -0.06131839584231601,
 'minmax': 0.0035334250675903567,
 'mpe': -0.0021693103444550854,
 'rmse': 0.11375760815277244}

In [104]:

#OCT-DEC PREDICTIONS
oct_series=data['Oct_mavg'].values
data['Oct_mavg'].plot()

Out[104]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f0bae7e7a50>

In [105]:

import statsmodels.api as sm
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
oct_series=data['Oct_mavg'].values
decomposition =sm.tsa.seasonal_decompose(oct_series, model='additive',freq=5)
fig = decomposition.plot()
plt.show()

In [118]:

#model for oct-dec
# fit an ARIMA model and plot residual errors
import statsmodels
from pandas import read_csv
from patsy import dmatrices
from statsmodels.tsa.arima_model import ARIMA
from matplotlib import pyplot

# fit model
model = ARIMA(oct_series, order=(5,2,1))
result = model.fit()
# summary of fit model
print(result.summary())

                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                   D2.y   No. Observations:                  136
Model:                 ARIMA(5, 2, 1)   Log Likelihood                  86.835
Method:                       css-mle   S.D. of innovations              0.124
Date:                Sat, 27 Feb 2021   AIC                           -157.669
Time:                        08:01:24   BIC                           -134.368
Sample:                             2   HQIC                          -148.200
                                                                              
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const      -6.932e-05      0.000     -0.348      0.728      -0.000       0.000
ar.L1.D2.y     0.1256      0.074      1.696      0.092      -0.020       0.271
ar.L2.D2.y    -0.0911      0.075     -1.220      0.225      -0.237       0.055
ar.L3.D2.y     0.0923      0.075      1.235      0.219      -0.054       0.239
ar.L4.D2.y    -0.0222      0.076     -0.294      0.769      -0.170       0.126
ar.L5.D2.y    -0.5075      0.074     -6.838      0.000      -0.653      -0.362
ma.L1.D2.y    -0.9998      0.020    -49.020      0.000      -1.040      -0.960
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            0.9248           -0.6487j            1.1297           -0.0974
AR.2            0.9248           +0.6487j            1.1297            0.0974
AR.3           -0.3231           -1.0648j            1.1127           -0.2969
AR.4           -0.3231           +1.0648j            1.1127            0.2969
AR.5           -1.2472           -0.0000j            1.2472           -0.5000
MA.1            1.0002           +0.0000j            1.0002            0.0000
-----------------------------------------------------------------------------

In [130]:

#out of time cross validation
from statsmodels.tsa.stattools import acf

# Create Training and Test
train = data['Oct_mavg'].iloc[:103]
test = data['Oct_mavg'].iloc[103:]


#now build the ARIMA model on training dataset, forecast and plot it.
model = ARIMA(train, order=(5, 2, 1))  
fitted = model.fit(disp=-1)  
# Forecast
fc, se, conf = fitted.forecast(35, alpha=0.05)  # 95% conf

# Make as pandas series
fc_series = pd.Series(fc, index=test.index)
lower_series = pd.Series(conf[:, 0], index=test.index)
upper_series = pd.Series(conf[:, 1], index=test.index)

# Plot
plt.figure(figsize=(12,5), dpi=100)
plt.plot(train, label='training')
plt.plot(test, label='actual')
plt.plot(fc_series, label='forecast')
plt.fill_between(lower_series.index, lower_series, upper_series, 
                 color='k', alpha=.15)
plt.title('Forecast vs Actuals')
plt.legend(loc='upper left', fontsize=8)
plt.show()

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

/usr/local/lib/python3.7/dist-packages/statsmodels/tsa/base/tsa_model.py:215: ValueWarning:

An unsupported index was provided and will be ignored when e.g. forecasting.

/usr/local/lib/python3.7/dist-packages/statsmodels/base/model.py:492: HessianInversionWarning:

Inverting hessian failed, no bse or cov_params available

In [131]:

#plotting using plotly  #521
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
y=fc_series
x=[2020,2021,2022,2023,2024,2025,2026,2027,2028,2029,2030,2031,2032,2033,2034,
   2035,2035,2036,2037,2038,2039,2040,2041,2042,2043,2044,2045,2046]

#Plotting annual temp readings- Line plot

fig_5 = px.scatter(x=x, y=y[:28],trendline='lowess')
# Edit the layout
fig_5.update_layout(title='forecasted values for OCT-DEC Temperature()',
                   xaxis_title='Years',
                   yaxis_title='Temperature (degree celsius)')
fig_5.show()

In [121]:

# evaluate forecasts
# Accuracy metrics
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:,None], 
                              actual[:,None]]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae, 
            'mpe': mpe, 'rmse':rmse, 'acf1':acf1, 
            'corr':corr, 'minmax':minmax})

forecast_accuracy(fc, test.values)

Out[121]:

{'acf1': 0.6855144110996123,
 'corr': 0.8268709339091787,
 'mae': 0.10379754862316933,
 'mape': 0.0036989179724732503,
 'me': 0.0036583115354427978,
 'minmax': 0.003689921325245038,
 'mpe': 0.00015390904015119515,
 'rmse': 0.13009749921160044}