#!/usr/bin/env python # coding: utf-8 # #NHS Complaints # # Via a tweet from @HSCICOpenData today - *Data on Written Complaints in the NHS - CSV datasets on http://data.gov.uk at http://bit.ly/1IfyLE2* I grabbed a spreadsheet from http://data.gov.uk/dataset/data_on_written_complaints_in_the_nhs and had a quick play... # ## Some set up stuff... # # Get some metadata into play about GP and dental practices so we can make sense of the practice codes that appear in the complaints dataset... # ### GP Practices Administrative Data # In[21]: #Downloads: http://systems.hscic.gov.uk/data/ods/datadownloads/index # In[2]: import pandas as pd # In[3]: #epracurr is administrative info about GP practices - practice codes, address, etc etc #http://systems.hscic.gov.uk/data/ods/datadownloads/gppractice/index_html #http://systems.hscic.gov.uk/data/ods/datadownloads/data-files/xls/epraccur.zip xl=pd.ExcelFile('epraccur/epraccur.xls') xl.sheet_names # In[4]: import requests, zipfile, StringIO def zipgrabber(url, f): r = requests.get(url) z = zipfile.ZipFile(StringIO.StringIO(r.content)) return z.open(f) # In[5]: url='http://systems.hscic.gov.uk/data/ods/datadownloads/data-files/xls/epraccur.zip' xl=pd.ExcelFile(zipgrabber(url,'epraccur.xls')) xl.sheet_names # In[6]: cols=['Organisation Code','Name','National Grouping','High Level Health Geography', 'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode', 'Open Date','Close Date','Status Code','Organisation Sub-Type code', 'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number', 'Available for future use','Available for future use','Available for future use', 'Amended Record Indicator','Available for future use', 'Provider/Purchaser','Available for future use','Prescribing Setting'] # In[7]: gp=xl.parse('epraccur',header=None) gp.columns=cols gp.head() # ###Dental Practices Administrative Data # In[9]: #egdpprac is administrative info about dental practices - practice codes, address, etc etc ##http://systems.hscic.gov.uk/data/ods/datadownloads/misc/index_html #http://systems.hscic.gov.uk/data/ods/datadownloads/data-files/egdpprac.zip #dentists=pd.read_csv('egdpprac/egdpprac.csv',header=None) url='http://systems.hscic.gov.uk/data/ods/datadownloads/data-files/egdpprac.zip' dentists=pd.read_csv(zipgrabber(url,'egdpprac.csv')) cols2=['Organisation Code','Name','National Grouping','High Level Health Geography', 'Address Line 1','Address Line 2','Address Line 3','Address Line 4','Address Line 5','Postcode', 'Open Date','Close Date','Status Code','Organisation Sub-Type code', 'Parent Organisation Code','Join Parent Date','Left Parent Date','Contact Telephone Number', 'Available for future use','Available for future use','Available for future use', 'Amended Record Indicator','Available for future use','Available for future use', 'Available for future use','Available for future use','Available for future use'] dentists.columns=cols2 dentists.head() # ## Complaints Data # # via: http://data.gov.uk/dataset/data_on_written_complaints_in_the_nhs # In[10]: url='http://www.hscic.gov.uk/catalogue/PUB18021/data-writ-comp-nhs-2014-2015-csv.zip' #complaints_a=pd.read_csv('data-writ-comp-nhs-2014-2015-csv/Data on Written Complaints in the NHS 2014-15 KO41a csv.csv') complaints_a=pd.read_csv(zipgrabber(url,'Data on Written Complaints in the NHS 2014-15 KO41a csv.csv')) complaints_a.dropna(how='all',axis=1,inplace=True) complaints_a.head(3) # In[11]: #http://data.gov.uk/dataset/data_on_written_complaints_in_the_nhs url='http://www.hscic.gov.uk/catalogue/PUB18021/data-writ-comp-nhs-2014-2015-csv.zip' #xl=pd.ExcelFile('data-writ-comp-nhs-2014-2015-csv/NHS Written Complaints 2014_15 KO41b meta data.xls') xl=pd.ExcelFile(zipgrabber(url,'NHS Written Complaints 2014_15 KO41b meta data.xls')) xl.sheet_names complaints_meta=xl.parse('KO41b Metadata',header=None) complaints_meta.columns=['Name','typ','meta'] complaints_meta # In[12]: cols=['::'.join(x) for x in zip(complaints_meta['Name'],complaints_meta['meta'])] # In[23]: url='http://www.hscic.gov.uk/catalogue/PUB18021/data-writ-comp-nhs-2014-2015-csv.zip' complaints_b=pd.read_csv(zipgrabber(url,'Data on Written Complaints in the NHS 2014-15 KO41b csv.csv')) realcols = [c for c in complaints_b.columns if not c.startswith('Unnamed')] complaints_b=complaints_b[realcols] #complaints_b.dropna(how='all',axis=1,inplace=True) complaints_b.columns=cols complaints_b.head(3) # In[14]: #wight: 10L (and R1F?) # In[15]: #join complaints parent codes parentGP=gp[['Organisation Code','Parent Organisation Code']] parentDentist=dentists[['Organisation Code','Parent Organisation Code']] codeparent=pd.concat([parentGP,parentDentist]) codeparent.head(3) # In[16]: iwcodes=codeparent[(codeparent['Parent Organisation Code']=='10L')] iwcodes # In[17]: complaints_iw=pd.merge(complaints_b,iwcodes, left_on='Practice_Code::Dental/GP Practice Code', right_on='Organisation Code') complaints_iw # In[18]: gp[gp['Organisation Code'].isin(complaints_iw['Practice_Code::Dental/GP Practice Code'].tolist())] # In[26]: def epracurrDetails(code): return gp[gp['Organisation Code']==code].to_dict(orient='records')[0] def getAddress(d): c=[x for x in d.keys() if 'address line' in x.lower()] return [d[x] for x in c if ((d[x] is not None) and not pd.isnull(d[x]))] def receivedUpheld(d,k): rxd=d[k]['received'] if 'received' in d[k] else 0 upheld=d[k]['upheld'] if 'upheld' in d[k] else 0 return rxd,upheld def complaintReport(item): details=epracurrDetails(item['Practice_Code::Dental/GP Practice Code']) txt='\n---------------\n' txt=txt+'Complaint report for: {name} ({code})\n'.format(name=details['Name'], code=item['Practice_Code::Dental/GP Practice Code']) txt=txt+'\nAddress: {addr}.'.format(addr=', '.join(getAddress(details))) txt=txt+'\n' complaints_received={'Complaint by subject':{},'Complaint by area':{}} complaints_upheld={'Complaint by subject':{},'Complaint by area':{}} complaints_by_area={} complaints_by_subject={} for col in item.index.values: if 'complaint' in col.lower() and item[col]>0: k=col.replace('\n','').split('Total number of written complaints ') if k[0]=='': k[0]='Total' k[0]=k[0].strip() k[1]=k[1] if 'received' in k[1]: complaints_received[k[1].split('::')[1]][k[0]]=int(item[col]) elif 'upheld' in k[1]: complaints_upheld[k[1].split('::')[1]][k[0]]=int(item[col]) if 'area' in k[1]: if k[0] not in complaints_by_area: complaints_by_area[k[0]]={} complaints_by_area[k[0]][k[1].split('::')[0]]=int(item[col]) elif 'subject' in k[1]: if k[0] not in complaints_by_subject: complaints_by_subject[k[0]]={} complaints_by_subject[k[0]][k[1].split('::')[0]]=int(item[col]) txt=txt+'\n' if complaints_by_area!={}: rxd,upheld=receivedUpheld(complaints_by_area,"Total") txt=txt+'Complaints by area ({} received, of which {} upheld):'.format(rxd,upheld) for complaint in complaints_by_area: if complaint=='Total': continue rxd,upheld=receivedUpheld(complaints_by_area,complaint) txt=txt+'\n - {}: {} received, of which {} upheld.'.format(complaint,rxd,upheld) txt=txt+'\n\n' if complaints_by_subject!={}: rxd,upheld=receivedUpheld(complaints_by_subject,"Total") txt=txt+'Complaints by subject ({} received, of which {} upheld):'.format(rxd,upheld) for complaint in complaints_by_subject: if complaint=='Total': continue rxd,upheld=receivedUpheld(complaints_by_subject,complaint) txt=txt+'\n - {}: {} received, of which {} upheld.'.format(complaint,rxd,upheld) #print(txt) #print('\n---------------\n') return txt reports=complaints_iw.apply(lambda x: complaintReport(x), axis=1 ) for report in reports: print(report)