# Imports
import warnings
warnings.simplefilter(action="ignore", category=(FutureWarning, DeprecationWarning))
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import yfinance as yf
np.random.seed(0) # For reproducible results
Trends
Seasonal effects
Random Fluctuations
Additive
Multiplicative
log(Data) = log(Trend + Seasonal + Random)
.A time series is stationary if:
Where does the term unit root comes from?
In conclusion, the term "unit root" describes a specific condition in the autoregressive representation of a time series, indicating that the series is non-stationary.
Formulation of the test
ADF test equation
The ADF test model the time series with the following equation:
$$\Delta Y(t) = \alpha + \beta t + \gamma Y(t-1) + \sum_{i=1}^{p} \delta_i \Delta_{t-i} + \epsilon_t$$ADF test for AR(1)
This is exactly the ADF model above when:
Connection to unit root
Null and alternative hypotheses (revised)
Test statistics:
Choosing lag length:
# Generating a random walk time series
data = pd.Series(100 + np.random.normal(0, 1, 100).cumsum())
# Perform Augmented Dickey-Fuller test. The lag can be set manually
# with 'maxlag' or inferred automatically with autolag
result = adfuller(data, autolag='AIC') # You can change to 'BIC'
adf_statistic, p_value, usedlag, nobs, critical_values, icbest = result
print(f'ADF Statistic: {adf_statistic :.2f}')
print(f'p-value: {p_value :.2f}')
print(f'Used Lag: {usedlag}')
print(f'Number of Observations: {nobs}')
print(f"Critical Values: {[f'{k}: {r:.2f}' for r,k in zip(critical_values.values(), critical_values.keys())]}\n")
ADF Statistic: -1.13 p-value: 0.70 Used Lag: 0 Number of Observations: 99 Critical Values: ['1%: -3.50', '5%: -2.89', '10%: -2.58']
Types of ADF tests
'n'
).'c'
).'ct'
).# Function to perform ADF test
def perform_adf_test(series, title, regression_type):
out = adfuller(series, regression=regression_type)
print(f"Results for {title}:")
print(f'ADF Statistic: {out[0]:.2f}')
print(f'p-value: {out[1]:.3f}')
print(f"Critical Values: {[f'{k}: {r:.2f}' for r,k in zip(out[4].values(), out[4].keys())]}\n")
# 1. No Constant, no Trend
series_no_const_no_trend = pd.Series(np.random.normal(0, 1, 200))
# 2. Constant, but no Trend
series_const_no_trend = pd.Series(50 + np.random.normal(0, 1, 200))
# 3. Both Constant and Trend
series_const_trend = pd.Series(50 + np.linspace(0, 20, 200) + np.random.normal(0, 1, 200))
plt.figure(figsize=(12, 4))
plt.subplot(1, 3, 1)
series_no_const_no_trend.plot(title='No Constant, No Trend')
plt.subplot(1, 3, 2)
series_const_no_trend.plot(title='Constant, but No Trend')
plt.subplot(1, 3, 3)
series_const_trend.plot(title='Both Constant and Trend')
plt.tight_layout();
# 1. No Constant or Trend
perform_adf_test(series_no_const_no_trend, "No Constant, No Trend", 'n')
# 2. Constant, but No Trend
perform_adf_test(series_const_no_trend, "Constant, No Trend", 'c')
# 3. Both Constant and Trend
perform_adf_test(series_const_trend, "Constant and Trend", 'ct')
Results for No Constant, No Trend: ADF Statistic: -15.47 p-value: 0.000 Critical Values: ['1%: -2.58', '5%: -1.94', '10%: -1.62'] Results for Constant, No Trend: ADF Statistic: -13.95 p-value: 0.000 Critical Values: ['1%: -3.46', '5%: -2.88', '10%: -2.57'] Results for Constant and Trend: ADF Statistic: -14.68 p-value: 0.000 Critical Values: ['1%: -4.00', '5%: -3.43', '10%: -3.14']
'n'
in the last two examples and see for yourself.Example: Google stocks
We will download the Google Open-High-Low-Close-Volume (GOOG OHLCV) data from the 1st September 2004 to 31st August 2020 from Yahoo finance using the yfinance
Python package.
def get_data(tickerSymbol, period, start, end):
# Get data on the ticker
tickerData = yf.Ticker(tickerSymbol)
# Get the historical prices for this ticker
tickerDf = tickerData.history(period=period, start=start, end=end)
return tickerDf
data = get_data('GOOG', period='1d', start='2004-09-01', end='2020-08-31')
# Plotting the Closing Prices
plt.figure(figsize=(14, 5))
plt.plot(data['Close'], label='GOOG Closing Price')
plt.title('Google Stock Closing Prices (2004-2020)')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend();
# Perform the ADF test
perform_adf_test(data['Close'],"Google Stock Closing Prices", 'ct')
Results for Google Stock Closing Prices: ADF Statistic: -0.78 p-value: 0.968 Critical Values: ['1%: -3.96', '5%: -3.41', '10%: -3.13']
Testing for mean reversion and testing for stationarity are related but distinct concepts in time series analysis.
Key Differences:
Key Differences (cont.):
A time series can then be characterised in the following manner:
def hurst(ts):
# Create the range of lag values
lags = range(2, 100)
# Calculate the array of the variances of the lagged differences
tau = [np.var(np.subtract(ts[lag:], ts[:-lag])) for lag in lags]
# Use a linear fit to estimate the Hurst exponent
poly = np.polyfit(np.log(lags), np.log(tau), 1)
# Return the Hurst exponent from the polyfit output
return poly[0]/2.0
How do persistent and anti-persistent time series look like?
Let's define
def random_walk_memory(length, proba, min_lookback, max_lookback)
where:
proba
is the probability that the next increment will follow the trend.
min_lookback
and max_lookback
are the minimum and maximum window sizes to calculate trend direction
def random_walk_memory(length, proba=0.5, min_lookback=1, max_lookback=100):
series = [0.] * length
for i in range(1, length):
# If the series has not yet reached the min_lookback threshold
# the direction of the step is random (-1 or 1)
if i < min_lookback + 1:
direction = np.sign(np.random.randn())
# consider the historical values to determine the direction
else:
# randomly choose between min_lookback and the minimum of
# i-1 (to ensure not exceeding the current length) and max_lookback.
lookback = np.random.randint(min_lookback, min(i-1, max_lookback)+1)
# Decides whether to follow the recent trend or move against it,
# based on a comparison between proba and a random number between 0 and 1.
recent_trend = np.sign(series[i-1] - series[i-1-lookback])
change = np.sign(proba - np.random.uniform())
direction = recent_trend * change
series[i] = series[i-1] + np.fabs(np.random.randn()) * direction
return series
bm = random_walk_memory(2000, proba=0.5)
persistent = random_walk_memory(2000, proba=0.7)
antipersistent = random_walk_memory(2000, proba=0.3)
_, axes = plt.subplots(1,3, figsize=(24, 4))
axes[0].plot(bm)
axes[0].set_title(f"Random Walk, H: {hurst(bm):.2f}")
axes[1].plot(persistent)
axes[1].set_title(f"Persistent, H: {hurst(persistent):.2f}")
axes[2].plot(antipersistent)
axes[2].set_title(f"Anti-Persistent, H: {hurst(antipersistent):.2f}");
print(f"GOOG closing price, H: {hurst(data['Close'].values):.2f}")
GOOG closing price, H: 0.41
Case 1: $H = 0.5$
Case 2: $H < 0.5$
Case 3: $H > 0.5$
Before applying these interpretations, investors should also make the following considerations:
Variance of Brownian Motion at time lag $\tau$
The GBM is defined as:
$$S(t) = S_0 \exp\left((\mu - \frac{1}{2} \sigma^2)t + \sigma W(t)\right)$$S0 = 100 # Initial stock price
mu = 0.09 # Expected annual return (9%)
sigma = 0.25 # Annual volatility (25%)
T = 2 # Time horizon in years
dt = 1/252 # Time step in years, assuming 252 trading days per year
N = int(T/dt) # Number of time steps
t = np.linspace(0, T, N) # Time vector
# Brownian Motion
dW = np.random.normal(0, np.sqrt(dt), N)
W = np.cumsum(dW)
# Geometric Brownian Motion
S = S0 * np.exp((mu - 0.5 * sigma**2) * t + sigma * W)
plt.figure(figsize=(12, 4))
plt.plot(S)
plt.title('Geometric Brownian Motion');
# Step 1: Get the "training" data (e.g., 2020-2022)
data2 = get_data('GOOG', period='1d', start='2019-12-31', end='2022-12-31')
# Get "test" data, for comparison (e.g., 2023)
data3 = get_data('GOOG', period='1d', start='2022-12-31', end='2023-12-31')
test_days = len(data3)
# Step 2: Calculate Daily Returns
returns = data2['Close'].pct_change() # We are interested in returns, so we get the changes in %
# Step 3: Estimate Parameters for GBM
mu = returns.mean() * 252 # Annualize the mean
sigma = returns.std() * np.sqrt(252) # Annualize the std deviation
# Step 4: Set GBM parameters
T = 1 # Time horizon in years
dt = 1/test_days # Time step in years, assuming 252 trading days per year
N = int(T/dt) # Number of time steps
time_step = np.linspace(0, T, N)
S0 = data2['Close'].iloc[-1] # Starting stock price (latest close price)
# Step 5: Compute Simulation
W = np.random.standard_normal(size=N)
W = np.cumsum(W)*np.sqrt(dt) # Cumulative sum for the Wiener process
X = (mu - 0.5 * sigma**2) * time_step + sigma * W
S = S0 * np.exp(X) # GBM formula
# Plot the results
plt.figure(figsize=(12, 5))
plt.plot(data2['Close'], label='GOOGL Historical Closing Prices')
plt.plot(data3.index, S, label='Simulated GBM Prices')
plt.plot(data3['Close'], label='Real Prices')
plt.legend()
plt.title('Google Stock Prices and Simulated GBM')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45);
# Simulate multiple paths
n_paths = 10
paths = []
for _ in range(n_paths):
W = np.cumsum(np.random.standard_normal(size=N))*np.sqrt(dt)
X = (mu - 0.5 * sigma**2) * time_step + sigma * W
paths.append(S0 * np.exp(X))
path_mean = np.array(paths).mean(axis=0)
path_std = np.array(paths).std(axis=0)
plt.figure(figsize=(12, 5))
plt.plot(data2['Close'], label='GOOGL Historical Closing Prices')
plt.plot(data3.index, path_mean, label='Simulated GBM Prices')
plt.fill_between(data3.index, path_mean-1.96*path_std, path_mean+1.96*path_std, color='tab:orange', alpha=0.2, label='95% CI')
plt.plot(data3['Close'], label='Real Prices')
plt.legend(loc='upper left')
plt.title('Google Stock Prices and Simulated GBM')
plt.xlabel('Date')
plt.ylabel('Price')
plt.xticks(rotation=45);
In this lecture we covered:
TSLA
) and Equinor (EQNR
) for the years 2019-12-31
- 2022-12-31
.