#!/usr/bin/env python # coding: utf-8 # # Representing Time Periods # # A notebook that demonstrates how to cast various time periods (weekly, monthly, quarterly) using *pandas*. # ## Quarterly Time Series # # Time series such as the NHS Digital Accident and Emergency figures are published as quarterly and monthly time series. # # The following example shows how to cast the quarterly reports to a quarterly time period in *pandas*, with quarters aligned by financial year. # In[5]: #Download example data file url='https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/2016/06/Quarterly-timeseries-May-2017.xls' get_ipython().system('wget -P data/ {url}') # In[6]: import pandas as pd #Preview data dfw=pd.read_excel('data/Quarterly-timeseries-May-2017.xls', skiprows=15,header=None,na_values='-').dropna(how='all').dropna(axis=1,how='all') dfw.iloc[0,0]='Reporting' dfw.iloc[1,0]='Code' dfw= dfw.fillna(axis=1,method='ffill').T.set_index([0,1]).T.dropna(how='all').dropna(axis=1,how='all') dfw=dfw.reset_index(drop=True) dfw.head() # Note how the time series is described in the data file across two columns: the financial year and the quarter: # In[8]: dfw[[('Reporting','Year'),('Reporting','Quarter')]].head() # Fill down on the financial year, and create a new *period* column joining those two elements. # In[9]: dfw['Reporting','Year'].fillna(method='ffill',inplace=True) dfw['Reporting','period']=dfw['Reporting','Year']+' '+dfw['Reporting','Quarter'] dfw[[('Reporting','Year'),('Reporting','Quarter'),('Reporting','period')]].head() # Use the *pandas* ["anchored offsets"](http://pandas.pydata.org/pandas-docs/version/0.12.0/timeseries.html#anchored-offsets) to specifiy the quarterly period relative to the financial year. # In[16]: #Return a date that we can use to anchor a period on def getMonthYear(row): if not row['Reporting','Quarter'].startswith('Q'): return '' month=row['Reporting','Quarter'].split(':')[1].split('-')[0].strip() year=int(row['Reporting','Year'].split('-')[0]) if month in ['Jan']: year= year+1 #Following the conversion, the _quarter year specifies the calendar year in which the financial year ends #Return the date corresponding to the first day of the month in the calendar year the financial quarter applies to return pd.to_datetime("01-{}-{}".format(month[:3],year),format='%d-%b-%Y') #Get a date corrsponding to the first day in the month of the first month in the quarter, by calendar year #Use this date to align a particular quarterly period #Q-MAR specifies a quarterly frequency, with year ending in March dfw['Reporting','_quarter']=pd.PeriodIndex(dfw.apply(getMonthYear,axis=1), freq='Q-MAR') #Sort the columns dfw=dfw.sort_index(axis=1) dfw=dfw.reindex(columns=['Reporting']+[c for c in dfw.columns.levels[0] if c!='Reporting'],level=0) #Preview the data with the quarter time period column defined dfw[[('Reporting','Year'),('Reporting','Quarter'),('Reporting','period'),('Reporting','_quarter')]].head() # ## Monthly Time Series # # NHS Digital Accident and Emergency figures are also published as a monthly time series. # In[18]: url='https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/2017/06/May-17-Monthly-AE-Timeseries-with-growth-charts.xlsx' get_ipython().system('wget -P data/ {url}') # In[20]: dfw=pd.read_excel('data/May-17-Monthly-AE-Timeseries-with-growth-charts.xlsx',skiprows=16,header=None,na_values='-').dropna(how='all').dropna(axis=1,how='all') dfw.iloc[0,0]='Reporting' dfw.iloc[1,0]='Code' dfw= dfw.fillna(axis=1,method='ffill').T.set_index([0,1]).T.dropna(how='all').dropna(axis=1,how='all') dfw.head() # The *pandas* `dt.to_period('M')` function returns a monthly period corresponding to the month within which a specified date falls. # In[27]: #Cast the date corresponding to the month start as a month period starting on that date dfw['Reporting','_period']=pd.to_datetime(dfw['Reporting','Period'], format='%d/%m/%Y').dt.to_period('M') dfw=dfw.sort_index(axis=1) dfw=dfw.reindex(columns=['Reporting']+[c for c in dfw.columns.levels[0] if c!='Reporting'],level=0) dfw[[('Reporting','Period'),('Reporting','_period')]].head() # ## Weekly Time Series # # The NHS used to publish weekly time series for A & E figures. Archival data is [still available](https://www.england.nhs.uk/statistics/statistical-work-areas/ae-waiting-times-and-activity/statistical-work-areasae-waiting-times-and-activityweekly-ae-sitreps-2015-16/). # In[28]: url='https://www.england.nhs.uk/statistics/wp-content/uploads/sites/2/2015/04/2015.06.28-AE-TimeseriesBaG87.xls' get_ipython().system('wget -P data/ {url}') # In[33]: dfw=pd.read_excel('data/2015.06.28-AE-TimeseriesBaG87.xls', skiprows=14,header=None,na_values='-').dropna(how='all').dropna(axis=1,how='all') dfw.iloc[0,0]='Reporting' dfw.iloc[1,0]='Code' dfw= dfw.fillna(axis=1,method='ffill').T.set_index([0,1]).T.dropna(how='all').dropna(axis=1,how='all') dfw.head() # The *pandas* `.to_period('W')` function returns a week period, starting on a Monday, that a given date falls within. # In[34]: #Filter to retain rows that have a W/E period specified dfw=dfw[dfw[('Reporting','Period')].str.startswith('W/E')] #Remove the W/E prefix and use the date to define dfw['Reporting','_period']=pd.to_datetime(dfw['Reporting','Period'].str.replace('W/E ',''), format='%d/%m/%Y').dt.to_period('W') dfw[[('Reporting','Period'),('Reporting','_period')]].head() # In[ ]: