#!/usr/bin/env python # coding: utf-8 # # UK Parliament Members Data Platform OData Feed # # As well as offering an [XML/JSON Search API](http://data.parliament.uk/membersdataplatform/memberquery.aspx), the *UK Parliament Members' Names Data Platform* also provides an [OData service](http://data.parliament.uk/membersdataplatform/open.aspx) from the OData endpoint http://data.parliament.uk/membersdataplatform/open/OData.svc. # # Python support for OData seems limited, although a wrapper is provided as part of the *Python package for Standards in Learning Education and Training, [pyslet](http://pyslet.readthedocs.org/en/latest/odatav2_consumer.html). # # So here's a quick exploration of the package and the UK Parliament OData service. # # Note that I'm completely new to OData, so there may be both better ways of using it, and more that can be done! # In[182]: #First, we need to install the pyslet package if it isn't already installed #!pip install pyslet # ## Getting Started # # The first thing we need to do is generate a client for the OData service, which is rooted on the endpoint http://data.parliament.uk/membersdataplatform/open/OData.svc. # In[183]: from pyslet.odata2.client import Client c = Client("http://data.parliament.uk/membersdataplatform/open/OData.svc") # The most useful way of probing the endpoint seems to be to request the available feeds. All the feeds seem to correspond to resources of type `EntitySet`, which I assume to be akin to tables... # In[184]: c.feeds # ## Looking Inside an Entity Set # # An *entity set* resembles a relational database table and can be accessed using the `.OpenCollection()` method. The `.OpenCollection()` method brokers a connection the OData database and as such should be closed using the `.close()` method when finished with. # # Let's start by exploring the `Committees` feed. # In[185]: cc=c.feeds['Committees'].OpenCollection() #Close the connection with: cc.close() #If used within a with block, the connection will be closed automatically on leaving the block # The collection returns a dictionary of entities. In the case of the Parliament API, these are numerically keyed. Each entity also takes the form of dictionary; so one way of exploring the data model is to look at the keys of one of the `Committee` entities... # In[186]: cc[1].keys() # In[187]: for k in cc[1].keys(): print(k,cc[1][k]) # We can inspect the keys and values of a particular entity record: # In[188]: for k, v in cc[1].data_items(): print(k, v, v.value) # OData uses HTTP URLs to make data requests. We can view the URL for a requested resource using the `get_location()` method. # In[189]: print(cc[1].get_location()) # We can also preview various components of the URL, such as the `host` or `abs_path`: # In[190]: cc[1].get_location().abs_path # We can retrieve the records in turn and display the values of the various record elements. # In[191]: ii=1 for i in cc: print(cc[i]['Name'].value) if ii==10: break else: ii=ii+1 # Records are returned as paged results. We can limit the records returns using the `set_page()` method: # In[192]: NUM_ITEMS=4 START_INDEX=2 cc.set_page(NUM_ITEMS,START_INDEX) for p in cc.iterpage(True): print p.key(),p.keys(), p['Name'].value # ## Filtering Results # # When making a query, we often want to run a search over the data. The `set_filter()` method can be used to define search queries. # # *See __Section 4.5: Filter System Query Options__ of the [OData URI Conventions (OData Version 2.0) sprecification](http://www.odata.org/documentation/odata-version-2-0/uri-conventions/) for additional search constructs.* # In[193]: import pyslet.odata2.core as core #Construct the search string as a filter #Note that we can search over several fields using Boolean operators. #For example, search for "Science" as part of the committee name and further limit to Lords' committees odfilter = core.CommonExpression.from_str("substringof('Science',Name) and IsLords eq true") cc.set_filter(odfilter) for p in cc.itervalues(): print(p.key(), p.keys(),p['Name'].value,p['IsLords'].value) cc.set_filter(None) # ### Ordering Search Results # # The *order* of the results can be controlled using the `orderby()` method. # In[194]: #Set the ordering: asc or desc within a particular field (?or one or more comma separated fields?) ordering=core.CommonExpression.OrderByFromString("Name desc") cc.set_orderby(ordering) #Add in a filter odfilter = core.CommonExpression.from_str("substringof('Westminster',Name)") cc.set_filter(odfilter) #View the response for p in cc.itervalues(): print(p.key(), p['Name'].value) # In[195]: #Reset the filter and order limits cc.set_filter(None) cc.set_orderby(None) # ### "Projecting" Search Results - Select # # In relational databases, a *projection* limits the columns that are returned from a query. Use the `select` field of the `set_expand()` method to limit the attribute data that is retrieved. The formulation is similar to the approach used in MongoDB, albeit the logic seems warped! To retrieve all data elements, set the `select` to `None`; to retrieve particular elements, set the `select` to a dictionary whose keys identify the fields you want to retrieve data for. Note that to maintain the integrity of the response dict, all keys are present, but value for the unselected elements are set to `None`. (If a selected element returns `None`, you presumably know it's value is `null` in the database?) # In[196]: #Open a new collection - Members m=c.feeds['Members'].OpenCollection() # Let's start by seeing the values for all value bearing fields: # In[197]: m.set_expand(None, None) for k, v in m[172].data_items(): print(k,v.value) # Limit the data retrieval to just the specified selection of fields: # In[198]: #Get the Forename and Surname data m.set_expand(None, {'Forename':None,'Surname':None}) for k, v in m[172].data_items(): if v.value is not None: print(k,v.value) # Note that there are also elements that are navigation objects into other collections, as well well as atomic value elements. # In[199]: for k in m[172].keys(): print(k,m[172][k]) # For example, we can get a link into a collection of committees that a member is associated with. # In[200]: #"Navigation properties are represented as DeferredValue instances." #"All deferred values can be treated as an entity collection" mcm=m[172]['MemberCommittees'].OpenCollection() mcm.keys() # The `MemberCommittees` records don't actually contain the name of the specified committee. Instead, we get another navigation object that points to the committee. # In[201]: for k in mcm[660].keys(): print(k,mcm[660][k],mcm[660][k]) # If we open *that* collection, we can look up details of the committee. # In[202]: mcmc=mcm[660]['Committee'].OpenCollection() #Display the values for the committee for k, v in mcmc[mcmc.keys()[0]].data_items(): print(k,v.value) # So if we step back, we can find all the committees a member has been associated with # In[203]: #Get the MembersCommittees keys for a member for k in mcm.keys(): #Get the Committee details with mcm[k]['Committee'].OpenCollection() as mcmc: for k2,v2 in mcmc[mcmc.keys()[0]].data_items(): print(k2,v2.value) print('----------') # There must be an easier way to do this, and the `expansion` part of the `set_expand()` method seems like it shoud be the way to do this. For example, the following construction appears to be legal and would seem to suggest that we can "pull through", or at least access, name information from the actual committees? But it doesn't seem to form part of the available data values? # In[204]: m.set_expand({'MemberCommittees':{'Committee':None}}, {'Forename':None,'Surname':None, 'MemberCommittees':{'Committee':{'Name':None,'EndDate':None}}}) #Show the data retrived for the selected member for k, v in m[172].data_items(): if v.value is not None: print(k,v.value) # Alternatively, just request the value of a known attribute directly: # In[208]: m[172]['Forename'].value # Using the expansion also seems to have limited the amount of data that is requested fron the `Committee` entities: # In[206]: mcm=m[172]['MemberCommittees'].OpenCollection() for k in mcm.keys(): #Get the Committee details with mcm[k]['Committee'].OpenCollection() as mcmc: for k2,v2 in mcmc[mcmc.keys()[0]].data_items(): if k2=='EndDate' or v2.value is not None: print(k2,v2.value) print('------') # I'm not sure if there's a way of filtering based on this expansion? e.g. to limit results to committees where the `EndDate` really is `None` or `null` (i.e. limit to committees that the member is stull currently a member of?) # In[ ]: