%pylab inline
%load_ext rmagic
Populating the interactive namespace from numpy and matplotlib
recorded data plotted over time. This scrutiny often suggests the method of analysis (e.g. time domain vs frequqncy domain) as well as statistics that will be of use in summarizing the information in the data.
%%R
library("astsa")
%%R
data(jj)
plot(jj, type="o", ylab="quarterly earnings per share")
%%R
data(gtemp)
plot.ts(gtemp, type="o", ylab="Global Temperature Deviations")
aaa ... hhh
aaa ... hhh
%%R
data(speech)
plot(speech, main = "speech")
%%R
data(nyse)
plot(nyse, ylab = "NYSE Returns")
%%R
data(soi)
data(rec)
par(mfrow = c(2, 1))
plot(soi, main="Southern Oscillation Index")
plot(rec, main = "Recruitment")
%%R
data(fmri1)
par(mfrow=c(2, 1), mar=c(3, 2, 1, 0)+.5, mgp = c(1.5, .6, 0))
ts.plot(fmri1[,2:5], lty=1:4, ylab="BOLD", main="Cortex")
ts.plot(fmri1[,6:9], lty=1:4, ylab="BOLD", main="Thalamus & Cerebellum")
mtext("time (1pt = 2sec)", side = 1, line = 2)
print(colnames(fmri1))
[1] "time" "cort1" "cort2" "cort3" "cort4" "thal1" "thal2" "cere1" "cere2"
%%R
data(EQ5)
data(EXP6)
par(mfrow = c(2, 1))
plot(EQ5, main = "Earthquake")
plot(EXP6, main = "Explosion")
The fundamental visual characteristic distinguishing the different series above is their differing degrees of smoothness.
a ts can be defined as a collection of random varaibles indexed according to the order they are obtained.
its smoothness is induced by the supposition that adjancent points in time are correlated.
some fundamental models (templates) for time series include
if all time series can be modelled as white noise model, classical statistical methods would suffice. Two ways of introducing serial correlation and more smoothness into series are moving average and augoregression
$$v_t = \frac{1}{3} (w_{t-1}+w_t+w_{t+1})$$
you might start to notice the similiarity between moving averages on white noise and SOI or fMRI
$$x_t = x_{t-1} - .9x_{t-2} + w_t$$ The equation above represents a regression or prediction of the current value $x_t$ of a time series as a function of the past two values of the series, and, hence, the term autogression is suggested.
And in the plot, we notice that it starts to display periodic behavior, which is similar to that displayed by the speech data.
$$x_t = \delta + x_{t-1} + w_{t}$$ where $w_t$ is the white noise and $\delta$ is a constant called drift. and when $\delta=0$, it is simply called a random walk. Note that we may rewrite the recursion function for random walk as the cumulative sum of white noise variable, e.g. $$x_t = {\delta}t+\sum_{j=1}^{t}{w_j}$$
$$x_t = 2cos(2{\pi}t/50+.6{\pi})+w_t$$ where the signal part is a sinusoidal waveform that can be generally written as $$Acos(2{\pi}{\omega}t+\phi)$$ with $A$ is the amplitude, $\omega$ si the frequency of oscillation and $\phi$ is a phase shift. e.g. $\omega=1/50$ above means one cycle per 50 time units (points)
%%R
## WHITE NOISE
wn <- rnorm(500, 0, 1) # 500 N(0, 1) variates
## MOVING AVERAGE by filtering
mv <- filter(wn, sides = 2, rep(1/3, 3)) # 1/3[w(t-1)+w(t)+w(t+1)]
## - a linear combination of time series values is referred to as a filtered series
## AUTOREGRESSION
w <- rnorm(550, 0, 1) # 50 extra to avoid startup problems
ar <- filter(w, filter = c(1, -.9), method ="recursive")[-(1:50)]
plot.ts(wn, main="white noise")
plot.ts(mv, main="moving average")
## Random Walk with Drift
set.seed(1)
w <- rnorm(200, 0, 1); x <- cumsum(w) # drift = 0
wd <- w + .2; xd <- cumsum(wd) # drift = .2
plot.ts(ar, main="autoregression")
plot.ts(xd, ylim = c(-5, 55), main="Random Walk with Drift", col=1)
lines(x, col=2)
legend("topleft", c("drift=0.2", "drift=0"), col=c(1, 2), lty=1)
## Signal in Noise
cs <- 2*cos(2*pi*(1:500)/50 + .6*pi)
w <- rnorm(500, 0, 1)
par(mfrow=c(3, 1), mar=c(3, 2, 2, 1), cex.main=1.5)
plot.ts(cs, main = expression(2*cos(2*pi*t/50+.6*pi)))
plot.ts(cs+w, main = expression(2*cos(2*pi*t/50+.6*pi) + N(0, 1)))
plot.ts(cs+5*w, main = expression(2*cos(2*pi*t/50+.6*pi) + N(0, 25)))
## the method parameter to filter - "recursive" for autoregression
## - "convolution" for moving average
the series $x_t$ is said to lead $y_t$ for $l>0$ and is said to lag $y_t$ for $l<0$
The concept of weak stationarity forms the basis for much of the analysis performed with time series
## ACF of typical time series
## white noise - wn
#Acf(wn)
## autoregression - ar
## random walk with drift - xd
## signal in noise = cs+5
%%R
library(forecast)
par(mfrow=c(3, 2))
Acf(wn, main = "white noise")
Acf(na.omit(mv), main = "three-point moving average")
Acf(ar, main = "autoregression")
Acf(xd, main = "random walk with drift = 0.2")
Acf(cs+5, main = "signal with random noise", lag.max = 50)
This is forecast 4.8
%%R
## as a comparison ,the ACF of some real data
par(mfrow=c(2, 2))
Acf(speech, main = "speech aaa...hhh", lag.max=150)
mtext(side=1, line = 3, "repeating peaks spaced at about 106-109\n it contains a series of repeating short signals")
Acf(soi, main = "SOI Temp", lag.max=50)
mtext(side = 1, line = 3, "repeating peaks located at around 12")
Acf(rec, main = "Rec Fish Amount", lag.max=50)
ccf(soi, rec, lag.max=50, main = "ccf between SOI and Rec")
mtext(side = 1, line = 3, "repeating peak located at around -6 \n so soi is -6 leading of rec")
%%R
par(mfrow=c(2, 1))
## Cross Corrleation Function
## white noise v.s. moving average
ccf(wn, na.omit(mv), main = "white noise vs 3point moving average")
## moving average v.s. autoregression
ccf(na.omit(mv), ar, main = "3point moving average vs autoregresion")
Acf conveys information of signatures of time series, but it is not necessary to tell whether a time sereis is stationary or not as the correlation plot is not against time