#Install a library to help us run some SPARQL queries if we haven't already installed it
#http://rdflib.github.io/sparqlwrapper/
!pip3 uninstall -y sparqlwrapper
!pip3 install sparqlwrapper

#NOTE: if you find the SPARQL queries slowing down, or throwing an error message, try the following:
## 1) Save your notebook.
## 2) Close it.
## 3) Shut it down.
#This should reset sparqlwrapper
## 4) Restart the notebook.
# You will need to run the cells again to load packages, reset state etc, becuase you will have started a new IPython process.

#Import the necessary packages
from SPARQLWrapper import SPARQLWrapper, JSON

#Declare the BNB endpoint
endpoint="http://bnb.data.bl.uk/sparql"
sparql = SPARQLWrapper(endpoint)

#My experience of SPARQL is that things work then they don't and you have no idea which bit is broken
#This test should work. It really should. It has before. And it shouldn't take too long.
#It comes from http://bnb.data.bl.uk/getting-started
q='''PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX bio: <http://purl.org/vocab/bio/0.1/>
PREFIX blt: <http://www.bl.uk/schemas/bibliographic/blterms#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX event: <http://purl.org/NET/c4dm/event.owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX isbd: <http://iflastandards.info/ns/isbd/elements/>
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rda: <http://rdvocab.info/ElementsGr2/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT ?book ?bnb ?title WHERE {
    #Match the book by ISBN
    ?book bibo:isbn13 "9780729408745";
    #bind some variables to its other attributes
    blt:bnb ?bnb;
    dct:title ?title. }'''
sparql.setQuery(q)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
results

#Declare a standard, if exhaustive, list of prefixes we can apply to each query
#Don't leave white space on the left hand side...
prefix='''
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX bio: <http://purl.org/vocab/bio/0.1/>
PREFIX blt: <http://www.bl.uk/schemas/bibliographic/blterms#>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX event: <http://purl.org/NET/c4dm/event.owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX isbd: <http://iflastandards.info/ns/isbd/elements/>
PREFIX org: <http://www.w3.org/ns/org#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rda: <http://rdvocab.info/ElementsGr2/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    
'''

#Let's just test a simple query
#Search for books by author name
q='''
SELECT DISTINCT ?book ?title WHERE {
    ?book dct:creator ?author ;
        dct:title ?title.
    ?author foaf:name "Iain Banks".
} LIMIT 5
'''

#Run the query, parse the response as JSON, and get them into a variable
sparql.setQuery(prefix+q)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

#Here's what the response looks like
results

#Let's specify the response columns we want to display
answerCols=['book','title']

#We can then iterate through these
for result in results["results"]["bindings"]:
    for ans in answerCols:
        print(result[ans]['value'], end=" ")
    print()

#Let's make a function to handle that a little more tidily
def printResults(results,ansCols):
    ''' Print the required results column values from the SPARQL query '''
    for result in results["results"]["bindings"]:
        for ans in answerCols:
            print(result[ans]['value'], end=" ")
        print()

printResults(results,answerCols)

#Let's do a little more wrapping
def runQuery(endpoint,prefix,q):
    ''' Run a SPARQL query with a declared prefix over a specified endpoint '''
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(prefix+q)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

def queryResults(endpoint,prefix,q,ansCols):
    ''' Run a SPARQL query with a declared prefix over a specified endpoint and print the required results columns '''
    results=runQuery(endpoint,prefix,q)
    printResults(results,ansCols)

queryResults(endpoint,prefix,q,answerCols)

#Let's see what the results look like
results

#Some endpoints will return data in other formats, for example flattened as a CSV data table
#We can flatten the data ourselves in an ad hoc way and get it into a pandas datatable

import pandas as pd

#pandas may have a better way of doing this?!
data=[]
for result in results["results"]["bindings"]:
    tmp={}
    for el in result:
        tmp[el]=result[el]['value']
    data.append(tmp)
    #Note that we lise the type information which we could have used to type the columns in the final dataframe

df = pd.DataFrame(data)
df

#Let's wrap everything up
def dict2df(results):
    ''' Hack a function to flatten the SPARQL query results and return the column values '''
    data=[]
    for result in results["results"]["bindings"]:
        tmp={}
        for el in result:
            tmp[el]=result[el]['value']
        data.append(tmp)

    df = pd.DataFrame(data)
    return df

def dfResults(endpoint,prefix,q):
    ''' Generate a data frame containing the results of running
        a SPARQL query with a declared prefix over a specified endpoint '''
    return dict2df( runQuery( endpoint, prefix, q ) )

dfResults(endpoint,prefix,q)

q='DESCRIBE ?book WHERE { ?book bibo:isbn10 "1857232356" }'
ans=runQuery(endpoint,prefix,q)
ans
Format requested was JSON, but RDF/XML (application/rdf+xml;charset=UTF-8) has been returned by the endpoint
ans.serialize(format="nt")

print(ans.serialize(format="nt").decode("utf-8"))

#For convenience, let's just bundle that up in case we need to call it again
def printDesc(endpoint,prefix,q):
    ans=runQuery(endpoint,prefix,q)
    print(ans.serialize(format="nt").decode("utf-8"))

q='DESCRIBE ?book WHERE { ?book bibo:isbn10 "1857232356" }'
printDesc(endpoint,prefix,q)

q='''
SELECT ?book ?bnb ?publicationEvent ?title ?creator WHERE {
    #Match the book by ISBN
    ?book bibo:isbn10 "1857232356";
    
        #bind some variables to other attributes of the work
        
        #Get the British National Bibliography number
        blt:bnb ?bnb;
        
        #Identify the publication event associated with this work
        blt:publication ?publicationEvent;
        
        #Identify the title of the work
        dct:title ?title;
        
        #Identify the creator of the work
        dct:creator ?creator.
    }
'''
runQuery(endpoint,prefix,q)

q='''
SELECT DISTINCT ?property 
where {
    ?book bibo:isbn10 "1857232356";
        dct:creator ?creator.
    ?creator ?property ?x
}
'''
runQuery(endpoint,prefix,q)

q='''
SELECT ?book ?isbn10 ?bnb ?title ?author WHERE {
    #Match the book by ISBN
    ?book bibo:isbn10 "1857232356";
    
        #bind some variables to its other attributes
        blt:bnb ?bnb;
        dct:title ?title;
        bibo:isbn10 ?isbn10;
    
        dct:creator ?creator.
        
    ?creator foaf:name ?author.
    }
'''
dfResults(endpoint,prefix,q)

#YOUR INVESTIGATION HERE

q='''
SELECT DISTINCT ?a ?b WHERE {
    <http://bnb.data.bl.uk/id/resource/012701972/publicationevent/LondonOrbit1994> ?a ?b.
}
'''
dfResults(endpoint,prefix,q)

q='''
SELECT DISTINCT ?a ?b WHERE {
    <http://reference.data.gov.uk/id/year/1994> ?a ?b.
}
'''
dfResults(endpoint,prefix,q)

q='''
SELECT DISTINCT ?book ?title ?date WHERE {
    #Find books by 'Iain Banks':
    ?book dct:creator ?author ;
        dct:title ?title.
    ?author foaf:name "Iain Banks".
    
    #Find when they were published:
    ?book blt:publication ?publicationEvent.
    ?publicationEvent event:time ?eventTime.
    ?eventTime rdfs:label ?date.
    
    #Look for books published between 1985 and 1990
    FILTER (?date>="1985" && ?date<"1990")
} ORDER BY ?date 
'''
dfResults(endpoint,prefix,q)

def getBooksByAuthorBetweenDates(author,fromDate,toDate):
    q='''
            SELECT DISTINCT ?book ?title ?date WHERE {{
                #Find books by name:
                ?book dct:creator ?author ;
                    dct:title ?title.
                ?author foaf:name "{0}".

                #Find when they were published:
                ?book blt:publication ?publicationEvent.
                ?publicationEvent event:time ?eventTime.
                ?eventTime rdfs:label ?date.

                #Look for books published between dates
                FILTER (?date>="{1}" && ?date<="{2}")
            }} ORDER BY ?date
        '''.format(author,fromDate,toDate)
    return dfResults(endpoint,prefix,q)

getBooksByAuthorBetweenDates("Terry Pratchett","1985","1987")