import os, datetime
import pandas as pd
import pickle
import folium
from pandas import Series, DataFrame, Panel
from datetime import datetime, date, time
#cuts gp prescribing data for rows containing patterns of interest as dataframes
def pattern_timeseries(pattern):
pathtodata = '/media/mydisk/prescribing_data/bnf/'#set to the directory with bnf CSVs in
pathtopatterns = '/media/mydisk/prescribing_data/patterns/'
pathtopickles = '/media/mydisk/prescribing_data/pickles/'
os.chdir(pathtodata)
files = !ls T2*
global pattern_df
clean_filenames = []
for f in files.l:
clean_filenames.append(f[:7]) #clean up filenames so that grep will work
for i, item in enumerate(clean_filenames):
date = clean_filenames[i].replace('T', '')
date = pd.to_datetime(date, format="%Y%m") #make pandas know the date is a date
name = "bnf_%s_%.10s.csv" % (pattern, date) #create a name for new csv of grep for pattern using date from file name
print "writing %s" % name
!fgrep $pattern {clean_filenames[i]}* > $pathtopatterns/$name #grep for pattern in csv files and write to file
pattern_files = !ls $pathtopatterns*$pattern*
cols = ['SHA', 'PCT', 'PRACTICE', 'BNF_CODE', 'BNF_NAME', 'ITEMS', 'NIC', 'ACT_COST', 'QUANTITY', 'DateTime', 'Index']
practices_est = 10000 #estimated number of practices
df_list = [pd.read_csv(file, names=cols) for file in pattern_files]
pattern_df = pd.concat(df_list)
pattern_df['DateTime'] = pattern_df['DateTime'].astype('|S6')
pattern_df['DateTime'] = pd.to_datetime(pattern_df['DateTime'], format="%Y%m")
os.chdir(pathtopickles)
pattern_df.to_pickle('%s.pkl' % pattern)
return(pattern_df)
#define our opiates
Opiates = ['Codeine', 'Dihydrocodeine', 'Tramadol', 'Tapentadol', 'Buprenorphine', 'Fentanyl', 'Methadone', 'Morph', 'Oxycodone']
#commented because have run to Feb 2014
#for opiate in Opiates:
# pattern_timeseries(opiate)
#load prev outputs from above
pathtopickles = '/media/mydisk/prescribing_data/pickles/'
os.chdir(pathtopickles)
#load dataframes
Codeine = pd.read_pickle('Codeine.pkl')
Dihydrocodeine = pd.read_pickle('Dihydrocodeine.pkl')
Tramadol = pd.read_pickle('Tramadol.pkl')
Tapentadol = pd.read_pickle('Tapentadol.pkl')
Buprenorphine = pd.read_pickle('Buprenorphine.pkl')
Fentanyl = pd.read_pickle('Fentanyl.pkl')
Methadone = pd.read_pickle('Methadone.pkl')
Morph = pd.read_pickle('Morph.pkl')
Oxycodone = pd.read_pickle('Oxycodone.pkl')
#make a list of dataframes
df_list = [Codeine, Dihydrocodeine, Tramadol, Tapentadol, Buprenorphine, Fentanyl, Methadone, Morph, Oxycodone]
opiate_lookup = {0:'Codeine', 1:'Dihydrocodeine', 2:'Tramadol', 3:'Tapentadol', 4:'Buprenorphine', 5:'Fentanyl', 6:'Methadone', 7:'Morphine', 8:'Oxycodone'}
#prescribing data don't contain a quantity that's a quantity
#work out quantity in mg from prep information and quantity..
pathtocsv = '/media/mydisk/prescribing_data/xls/csv'
os.chdir(pathtocsv)
Codeine['BNF_NAME'] = Codeine['BNF_NAME'].map(str.strip) #get rid of white space
codeine_preps = Codeine['BNF_NAME'].unique().tolist() #get a list of preparation names
codeine_doses = [3, 3, 0.6, 60, 5, 15, 30, 60, 3.2, 12.8, 8, 1.35, 12.8, 2, 8, 6, 1, 0.6, 10, 5, 30, 15, 30, 12] #codeine in mg for each prep
codeine_dose_lookup = dict(zip(codeine_preps, codeine_doses)) #make dict of prep names and codeine doses
Codeine['QUANTITY_IN_MG'] = Codeine['BNF_NAME'].map(lambda x: codeine_dose_lookup[x]) * Codeine['QUANTITY']
#Dihydrocodeine
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Dihydrocodeine.csv') #lookup prepared by colleague
Dihydrocodeine['BNF_NAME'] = Dihydrocodeine['BNF_NAME'].map(str.strip) #get rid of white space
dihydrocodeine_preps = Dihydrocodeine['BNF_NAME'].unique().tolist() #get a list of preparation names
dihydrocodeine_doses = df['mg of drug per unit of preparation'].str.replace('mg', '').astype(float).tolist() #remove the mg my collaborator added
dihydrocodeine_dose_lookup = dict(zip(dihydrocodeine_preps, dihydrocodeine_doses)) #make dict of prep names and codeine doses
Dihydrocodeine['QUANTITY_IN_MG'] = Dihydrocodeine['BNF_NAME'].map(lambda x: dihydrocodeine_dose_lookup[x]) * Dihydrocodeine['QUANTITY']
#Tramadol
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Tramadol.csv') #lookup prepared by colleague
Tramadol['BNF_NAME'] = Tramadol['BNF_NAME'].map(str.strip) #get rid of white space
tramadol_preps = Tramadol['BNF_NAME'].unique().tolist() #get a list of preparation names
tramadol_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added
tramadol_dose_lookup = dict(zip(tramadol_preps, tramadol_doses)) #make dict of prep names and codeine doses
Tramadol['QUANTITY_IN_MG'] = Tramadol['BNF_NAME'].map(lambda x: tramadol_dose_lookup[x]) * Tramadol['QUANTITY']
#Tapentadol
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Tapentadol.csv') #lookup prepared by colleague
Tapentadol['BNF_NAME'] = Tapentadol['BNF_NAME'].map(str.strip) #get rid of white space
tapentadol_preps = Tapentadol['BNF_NAME'].unique().tolist() #get a list of preparation names
tapentadol_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added
tapentadol_dose_lookup = dict(zip(tapentadol_preps, tapentadol_doses)) #make dict of prep names and codeine doses
Tapentadol['QUANTITY_IN_MG'] = Tapentadol['BNF_NAME'].map(lambda x: tapentadol_dose_lookup[x]) * Tapentadol['QUANTITY']
#Buprenorphine
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Buprenorphine.csv') #lookup prepared by colleague
Buprenorphine['BNF_NAME'] = Buprenorphine['BNF_NAME'].map(str.strip) #get rid of white space
buprenorphine_preps = Buprenorphine['BNF_NAME'].unique().tolist() #get a list of preparation names
buprenorphine_doses = df['mg of drug per unit of preparation'].str.replace('mg', '').astype(float).tolist() #remove the mg my collaborator added
buprenorphine_dose_lookup = dict(zip(buprenorphine_preps, buprenorphine_doses)) #make dict of prep names and codeine doses
Buprenorphine['QUANTITY_IN_MG'] = Buprenorphine['BNF_NAME'].map(lambda x: buprenorphine_dose_lookup[x]) * Buprenorphine['QUANTITY']
#Fentanyl
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Fentanyl.csv') #lookup prepared by colleague
Fentanyl['BNF_NAME'] = Fentanyl['BNF_NAME'].map(str.strip) #get rid of white space
fentanyl_preps = Fentanyl['BNF_NAME'].unique().tolist() #get a list of preparation names
fentanyl_doses = df['mg of drug per unit of preparation'].tolist() #remove the mg my collaborator added
fentanyl_dose_lookup = dict(zip(fentanyl_preps, fentanyl_doses)) #make dict of prep names and codeine doses
Fentanyl['QUANTITY_IN_MG'] = Fentanyl['BNF_NAME'].map(lambda x: fentanyl_dose_lookup[x]) * Fentanyl['QUANTITY']
#Methadone
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Methadone.csv') #lookup prepared by colleague
Methadone['BNF_NAME'] = Methadone['BNF_NAME'].map(str.strip) #get rid of white space
methadone_preps = Methadone['BNF_NAME'].unique().tolist() #get a list of preparation names
methadone_doses = df['mg of drug per unit of preparation'].str.replace('tbc', '0')#remove the mg my collaborator added
methadone_doses = methadone_doses.astype(float).tolist()
methadone_dose_lookup = dict(zip(methadone_preps, methadone_doses)) #make dict of prep names and codeine doses
Methadone['QUANTITY_IN_MG'] = Methadone['BNF_NAME'].map(lambda x: methadone_dose_lookup[x]) * Methadone['QUANTITY']
#Morphine
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Morphine.csv') #lookup prepared by colleague
Morph['BNF_NAME'] = Morph['BNF_NAME'].map(str.strip) #get rid of white space
morphine_preps = Morph['BNF_NAME'].unique().tolist() #get a list of preparation names
morphine_doses = df['mg of drug per unit of preparation'].str.replace('tbc', '0')
morphine_doses = morphine_doses.astype(float).tolist() #remove the mg my collaborator added
morphine_dose_lookup = dict(zip(morphine_preps, morphine_doses)) #make dict of prep names and codeine doses
Morph['QUANTITY_IN_MG'] = Morph['BNF_NAME'].map(lambda x: morphine_dose_lookup[x]) * Morph['QUANTITY']
#Oxycodone
#work out quantity in mg from prep information and quantity
df = pd.read_csv('Oxycodone.csv') #lookup prepared by colleague
Oxycodone['BNF_NAME'] = Oxycodone['BNF_NAME'].map(str.strip) #get rid of white space
oxycodone_preps = Oxycodone['BNF_NAME'].unique().tolist() #get a list of preparation names
oxycodone_doses = df['mg of drug per unit of preparation'].tolist()
oxycodone_dose_lookup = dict(zip(oxycodone_preps, oxycodone_doses)) #make dict of prep names and codeine doses
Oxycodone['QUANTITY_IN_MG'] = Oxycodone['BNF_NAME'].map(lambda x: oxycodone_dose_lookup[x]) * Oxycodone['QUANTITY']
#ccg mid-year population estimates from ONS for 2012
#from http://www.ons.gov.uk/ons/publications/re-reference-tables.html?edition=tcm%3A77-325526
#has CCG code in new format
pathtogpdata = '/home/sam/Documents/OpenDataAbstract/'
os.chdir(pathtogpdata)
df = pd.read_csv('SAPE7DT1-Mid-2012-ccg-syoa-file.csv')
df.icol(3).dropna().nunique() #column 3 has our 211 ccgs
df1 = df[~df.icol(3).isnull()] #throw away rows that don't relate to our ccgs
df2 = df1.icol([0,3,4]) #just use all ages and throw away the rest
df2.columns = ['CCG13CD', 'CCG_Name', 'Population'] #name the columns sensibly
#csv of old CCG codes matched onto new ones
#from http://www.erpho.org.uk/viewResource.aspx?id=22125
df3 = pd.read_csv('ccgcodemap.csv')
#merge our dataframes to make a frame with CCG names and old and new codes and population sizes
df4 = pd.merge(df2, df3, on='CCG13CD')
#gp to ccg mapping 1
#wget http://www.connectingforhealth.nhs.uk/systemsandservices/data/ods/ccginterim/interimpcmem_v5.zip
#has CCG code in old format and contains too many CCGs (>211)
df5 = pd.read_csv('interimpcmem_v5.csv')
df5.rename(columns = {'PRACTICECODE': 'PRACTICE'}, inplace=True)
df5 = df5[['PRACTICE', 'CCGCODE']] #throw away columns we don't care about
pathtopickles = '/media/mydisk/prescribing_data/pickles/'
os.chdir(pathtopickles)
df4.to_pickle('CCGPopData.pkl')
df5.to_pickle('GPCCGMap.pkl')
CCGPopData = pd.read_pickle('CCGPopData.pkl')
GPCCGMap = pd.read_pickle('GPCCGMap.pkl')
CCGPopData['Population'] = CCGPopData['Population'].str.replace(',','')
CCGPopData['Population'] = CCGPopData['Population'].astype(float)
for i, item in enumerate(df_list):
df_list[i] = df_list[i][['DateTime', 'PRACTICE', 'QUANTITY_IN_MG']]
df_list[i] = pd.merge(df_list[i], GPCCGMap, on='PRACTICE')
df_list[i] = df_list[i].groupby(['CCGCODE', 'DateTime']).QUANTITY_IN_MG.sum() #group by ccg and date time, sum the mg of codeine
df_list[i] = DataFrame(df_list[i]).reset_index()
df_list[i] = pd.merge(df_list[i], CCGPopData, on='CCGCODE')
df_list[i]['QUANTITY_IN_MG_PER_PERSON'] = df_list[i]['QUANTITY_IN_MG'] / df_list[i]['Population']
df_list[i]['Drug'] = opiate_lookup[i]
df_list[i] = df_list[i][['DateTime', 'CCG_Name', 'CCG13CD', 'Drug', 'QUANTITY_IN_MG', 'Population', 'QUANTITY_IN_MG_PER_PERSON']]
df = pd.concat(df_list)
df.index = df.DateTime
df['CCG_Name'] = df['CCG_Name'].str.strip()
print df.hist()
print df.describe()
[[Axes(0.125,0.563043;0.336957x0.336957) Axes(0.563043,0.563043;0.336957x0.336957)] [Axes(0.125,0.125;0.336957x0.336957) Axes(0.563043,0.125;0.336957x0.336957)]] QUANTITY_IN_MG Population QUANTITY_IN_MG_PER_PERSON count 77816.000000 77816.000000 77816.000000 mean 2558732.521669 254540.299424 10.445597 std 5847471.794484 135385.424098 21.758098 min 1.740000 63073.000000 0.000015 25% 13431.910000 167782.000000 0.057381 50% 290580.000000 222811.000000 1.253180 75% 1686307.500000 302739.000000 7.870421 max 63228150.000000 869408.000000 178.623148
df.groupby('DateTime')['QUANTITY_IN_MG_PER_PERSON'].sum().plot(ylim=(0,22000))
<matplotlib.axes.AxesSubplot at 0x17b989d0>
for drug in df['2013'].Drug.unique():
print drug
df['2013'][df['2013']['Drug'] == drug]['QUANTITY_IN_MG_PER_PERSON'].hist(bins=100)
plt.show()
Codeine
Dihydrocodeine
Tramadol
Tapentadol
Buprenorphine
Fentanyl
Methadone
Morphine
Oxycodone