Time Series¶

In [ ]:

import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.set_printoptions(precision=4, suppress=True)

Date and Time Data Types and Tools¶

In [ ]:

from datetime import datetime
now = datetime.now()
now
now.year, now.month, now.day

In [ ]:

delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta
delta.days
delta.seconds

In [ ]:

from datetime import timedelta
start = datetime(2011, 1, 7)
start + timedelta(12)
start - 2 * timedelta(12)

Converting Between String and Datetime¶

In [ ]:

stamp = datetime(2011, 1, 3)
str(stamp)
stamp.strftime('%Y-%m-%d')

In [ ]:

value = '2011-01-03'
datetime.strptime(value, '%Y-%m-%d')
datestrs = ['7/6/2011', '8/6/2011']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

In [ ]:

from dateutil.parser import parse
parse('2011-01-03')

In [ ]:

parse('Jan 31, 1997 10:45 PM')

In [ ]:

parse('6/12/2011', dayfirst=True)

In [ ]:

datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']
pd.to_datetime(datestrs)

In [ ]:

idx = pd.to_datetime(datestrs + [None])
idx
idx[2]
pd.isnull(idx)

Time Series Basics¶

In [ ]:

from datetime import datetime
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts

In [ ]:

ts.index

In [ ]:

ts + ts[::2]

In [ ]:

ts.index.dtype

In [ ]:

stamp = ts.index[0]
stamp

Indexing, Selection, Subsetting¶

In [ ]:

stamp = ts.index[2]
ts[stamp]

In [ ]:

ts['1/10/2011']
ts['20110110']

In [ ]:

longer_ts = pd.Series(np.random.randn(1000),
                      index=pd.date_range('1/1/2000', periods=1000))
longer_ts
longer_ts['2001']

In [ ]:

longer_ts['2001-05']

In [ ]:

ts[datetime(2011, 1, 7):]

In [ ]:

ts
ts['1/6/2011':'1/11/2011']

In [ ]:

ts.truncate(after='1/9/2011')

In [ ]:

dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = pd.DataFrame(np.random.randn(100, 4),
                       index=dates,
                       columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])
long_df.loc['5-2001']

Time Series with Duplicate Indices¶

In [ ]:

dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

In [ ]:

dup_ts.index.is_unique

In [ ]:

dup_ts['1/3/2000']  # not duplicated
dup_ts['1/2/2000']  # duplicated

In [ ]:

grouped = dup_ts.groupby(level=0)
grouped.mean()
grouped.count()

Date Ranges, Frequencies, and Shifting¶

In [ ]:

ts
resampler = ts.resample('D')

Generating Date Ranges¶

In [ ]:

index = pd.date_range('2012-04-01', '2012-06-01')
index

In [ ]:

pd.date_range(start='2012-04-01', periods=20)
pd.date_range(end='2012-06-01', periods=20)

In [ ]:

pd.date_range('2000-01-01', '2000-12-01', freq='BM')

In [ ]:

pd.date_range('2012-05-02 12:56:31', periods=5)

In [ ]:

pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)

Frequencies and Date Offsets¶

In [ ]:

from pandas.tseries.offsets import Hour, Minute
hour = Hour()
hour

In [ ]:

four_hours = Hour(4)
four_hours

In [ ]:

pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')

In [ ]:

Hour(2) + Minute(30)

In [ ]:

pd.date_range('2000-01-01', periods=10, freq='1h30min')

Week of month dates¶

In [ ]:

rng = pd.date_range('2012-01-01', '2012-09-01', freq='WOM-3FRI')
list(rng)

Shifting (Leading and Lagging) Data¶

In [ ]:

ts = pd.Series(np.random.randn(4),
               index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts
ts.shift(2)
ts.shift(-2)

ts / ts.shift(1) - 1

In [ ]:

ts.shift(2, freq='M')

In [ ]:

ts.shift(3, freq='D')
ts.shift(1, freq='90T')

Shifting dates with offsets¶

In [ ]:

from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
now + 3 * Day()

In [ ]:

now + MonthEnd()
now + MonthEnd(2)

In [ ]:

offset = MonthEnd()
offset.rollforward(now)
offset.rollback(now)

In [ ]:

ts = pd.Series(np.random.randn(20),
               index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts
ts.groupby(offset.rollforward).mean()

In [ ]:

ts.resample('M').mean()

Time Zone Handling¶

In [ ]:

import pytz
pytz.common_timezones[-5:]

In [ ]:

tz = pytz.timezone('America/New_York')
tz

Time Zone Localization and Conversion¶

In [ ]:

rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

In [ ]:

print(ts.index.tz)

In [ ]:

pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

In [ ]:

ts
ts_utc = ts.tz_localize('UTC')
ts_utc
ts_utc.index

In [ ]:

ts_utc.tz_convert('America/New_York')

In [ ]:

ts_eastern = ts.tz_localize('America/New_York')
ts_eastern.tz_convert('UTC')
ts_eastern.tz_convert('Europe/Berlin')

In [ ]:

ts.index.tz_localize('Asia/Shanghai')

Operations with Time Zone−Aware Timestamp Objects¶

In [ ]:

stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('America/New_York')

In [ ]:

stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
stamp_moscow

In [ ]:

stamp_utc.value
stamp_utc.tz_convert('America/New_York').value

In [ ]:

from pandas.tseries.offsets import Hour
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp
stamp + Hour()

In [ ]:

stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp
stamp + 2 * Hour()

Operations Between Different Time Zones¶

In [ ]:

rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')
result = ts1 + ts2
result.index

Periods and Period Arithmetic¶

In [ ]:

p = pd.Period(2007, freq='A-DEC')
p

In [ ]:

p + 5
p - 2

In [ ]:

pd.Period('2014', freq='A-DEC') - p

In [ ]:

rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')
rng

In [ ]:

pd.Series(np.random.randn(6), index=rng)

In [ ]:

values = ['2001Q3', '2002Q2', '2003Q1']
index = pd.PeriodIndex(values, freq='Q-DEC')
index

Period Frequency Conversion¶

In [ ]:

p = pd.Period('2007', freq='A-DEC')
p
p.asfreq('M', how='start')
p.asfreq('M', how='end')

In [ ]:

p = pd.Period('2007', freq='A-JUN')
p
p.asfreq('M', 'start')
p.asfreq('M', 'end')

In [ ]:

p = pd.Period('Aug-2007', 'M')
p.asfreq('A-JUN')

In [ ]:

rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts.asfreq('M', how='start')

In [ ]:

ts.asfreq('B', how='end')

Quarterly Period Frequencies¶

In [ ]:

p = pd.Period('2012Q4', freq='Q-JAN')
p

In [ ]:

p.asfreq('D', 'start')
p.asfreq('D', 'end')

In [ ]:

p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
p4pm
p4pm.to_timestamp()

In [ ]:

rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = pd.Series(np.arange(len(rng)), index=rng)
ts
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
ts.index = new_rng.to_timestamp()
ts

Converting Timestamps to Periods (and Back)¶

In [ ]:

rng = pd.date_range('2000-01-01', periods=3, freq='M')
ts = pd.Series(np.random.randn(3), index=rng)
ts
pts = ts.to_period()
pts

In [ ]:

rng = pd.date_range('1/29/2000', periods=6, freq='D')
ts2 = pd.Series(np.random.randn(6), index=rng)
ts2
ts2.to_period('M')

In [ ]:

pts = ts2.to_period()
pts
pts.to_timestamp(how='end')

Creating a PeriodIndex from Arrays¶

In [ ]:

data = pd.read_csv('examples/macrodata.csv')
data.head(5)
data.year
data.quarter

In [ ]:

index = pd.PeriodIndex(year=data.year, quarter=data.quarter,
                       freq='Q-DEC')
index
data.index = index
data.infl

Resampling and Frequency Conversion¶

In [ ]:

rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts.resample('M').mean()
ts.resample('M', kind='period').mean()

Downsampling¶

In [ ]:

rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts

In [ ]:

ts.resample('5min', closed='right').sum()

In [ ]:

ts.resample('5min', closed='right').sum()

In [ ]:

ts.resample('5min', closed='right', label='right').sum()

In [ ]:

ts.resample('5min', closed='right',
            label='right', loffset='-1s').sum()

Open-High-Low-Close (OHLC) resampling¶

In [ ]:

ts.resample('5min').ohlc()

Upsampling and Interpolation¶

In [ ]:

frame = pd.DataFrame(np.random.randn(2, 4),
                     index=pd.date_range('1/1/2000', periods=2,
                                         freq='W-WED'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

In [ ]:

df_daily = frame.resample('D').asfreq()
df_daily

In [ ]:

frame.resample('D').ffill()

In [ ]:

frame.resample('D').ffill(limit=2)

In [ ]:

frame.resample('W-THU').ffill()

Resampling with Periods¶

In [ ]:

frame = pd.DataFrame(np.random.randn(24, 4),
                     index=pd.period_range('1-2000', '12-2001',
                                           freq='M'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]
annual_frame = frame.resample('A-DEC').mean()
annual_frame

In [ ]:

# Q-DEC: Quarterly, year ending in December
annual_frame.resample('Q-DEC').ffill()
annual_frame.resample('Q-DEC', convention='end').ffill()

In [ ]:

annual_frame.resample('Q-MAR').ffill()

Moving Window Functions¶

In [ ]:

close_px_all = pd.read_csv('examples/stock_px_2.csv',
                           parse_dates=True, index_col=0)
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]
close_px = close_px.resample('B').ffill()

In [ ]:

close_px.AAPL.plot()
close_px.AAPL.rolling(250).mean().plot()

In [ ]:

plt.figure()

In [ ]:

appl_std250 = close_px.AAPL.rolling(250, min_periods=10).std()
appl_std250[5:12]
appl_std250.plot()

In [ ]:

expanding_mean = appl_std250.expanding().mean()

In [ ]:

plt.figure()

In [ ]:

close_px.rolling(60).mean().plot(logy=True)

In [ ]:

close_px.rolling('20D').mean()

Exponentially Weighted Functions¶

In [ ]:

plt.figure()

In [ ]:

aapl_px = close_px.AAPL['2006':'2007']
ma60 = aapl_px.rolling(30, min_periods=20).mean()
ewma60 = aapl_px.ewm(span=30).mean()
ma60.plot(style='k--', label='Simple MA')
ewma60.plot(style='k-', label='EW MA')
plt.legend()

Binary Moving Window Functions¶

In [ ]:

plt.figure()

In [ ]:

spx_px = close_px_all['SPX']
spx_rets = spx_px.pct_change()
returns = close_px.pct_change()

In [ ]:

corr = returns.AAPL.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()

In [ ]:

plt.figure()

In [ ]:

corr = returns.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()

User-Defined Moving Window Functions¶

In [ ]:

plt.figure()

In [ ]:

from scipy.stats import percentileofscore
score_at_2percent = lambda x: percentileofscore(x, 0.02)
result = returns.AAPL.rolling(250).apply(score_at_2percent)
result.plot()

In [ ]:

pd.options.display.max_rows = PREVIOUS_MAX_ROWS

Time Series¶

Date and Time Data Types and Tools¶

Converting Between String and Datetime¶

Time Series Basics¶

Indexing, Selection, Subsetting¶

Time Series with Duplicate Indices¶

Date Ranges, Frequencies, and Shifting¶

Generating Date Ranges¶

Frequencies and Date Offsets¶

Week of month dates¶

Shifting (Leading and Lagging) Data¶

Shifting dates with offsets¶

Time Zone Handling¶

Time Zone Localization and Conversion¶

Operations with Time Zone−Aware Timestamp Objects¶

Operations Between Different Time Zones¶

Periods and Period Arithmetic¶

Period Frequency Conversion¶

Quarterly Period Frequencies¶

Converting Timestamps to Periods (and Back)¶

Creating a PeriodIndex from Arrays¶

Resampling and Frequency Conversion¶

Downsampling¶

Open-High-Low-Close (OHLC) resampling¶

Upsampling and Interpolation¶

Resampling with Periods¶

Moving Window Functions¶

Exponentially Weighted Functions¶

Binary Moving Window Functions¶

User-Defined Moving Window Functions¶

Conclusion¶