# Creating a knowledge graph browser for the Met from Wikidata:
# By artist, department, type of object, image
# Complex: depicted content (Met tags, and non Met tags), by number of Wikimedia links/articles
# Specific: highlight, TOAH, all
# Near future: by era (start, stop time), made from material (P186), fabrication method (P2079)
# For the future - genre, movement, copyright status
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import HBox, VBox, Label
import requests
import numpy as np
import json
import pandas as pd
import urllib.parse
# Define API MediaWiki endpoint
wikidata_api_url = u'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
## Grab all items in Wikidata with Met Object ID (P3634)
# Rewrite URL if needed: BIND ((CONCAT ("[http://www.metmuseum.org/art/collection/search/", ?id, " ", ?inv, "]")) as ?meturl) .
pop_met_artists_query = u'''
# Most popular artists in Wikidata that have Met works
SELECT DISTINCT ?creator ?creatorLabel ?linkcount WHERE {
?item wdt:P3634 ?metid .
?item wdt:P170 ?creator .
?creator wikibase:sitelinks ?linkcount .
FILTER (?linkcount > 0) .
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} GROUP BY ?creator ?creatorLabel ?linkcount ORDER BY DESC(?linkcount)
'''
# Replace %s below with a VALUES statement like
# VALUES ?creator { wd:Q41264 }
creator_depictions_query_template = u'''
# Artists and their works depictions
#defaultView:Graph
SELECT ?item1 ?image1 ?item1Label ?item2 ?image2 ?item2Label ?size ?rgb
WHERE
{
%s
%s
{ # Get works and instances
VALUES ?rgb { "FFBD33" }
VALUES ?size { 2 }
?item1 wdt:P170 ?creator .
%s
?item1 wdt:P31 ?item2 .
OPTIONAL { ?item1 wdt:P18 ?image1. }
}
UNION
{ # Depictions
VALUES ?rgb { "fff033" }
VALUES ?size { 1 }
?item1 wdt:P170 ?creator .
%s
?item1 wdt:P180 ?item2 .
OPTIONAL { ?item1 wdt:P18 ?image1. }
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
'''
def sparql_qidlist_to_values_string(inlist: list) -> str:
'''
Converts a list of Q numbers to a VALUES string for SPARQL
Input: ['wd:Q123', 'wd:Q456', 'wd:Q789']
Output: '{wd:Q123 wd:Q456 wd:Q789}'
'''
try:
values_string ='{' + ' '.join(x for x in inlist) + '}'
return values_string
except:
print ('Error: making values string')
return None
def create_widget_tuple_list (labels: list, values: list) -> list:
'''
Converts two lists into the list of tuples needed for Jupyter Widgets
Input:
labels: ['Label1', 'Label2'...]
values: ['Value1', 'Value2'...]
Output:
[('Label1', 'Value1'),...]
'''
merged = tuple(zip(labels, values))
return merged
## START EXECUTION
creator_values = ''
institution_values = ''
institution_statement = ''
query_dict = {
'creators': [],
'institutions': []
}
try:
# Wikidata SPARQL query to get list of artists with some type of work at The Met
# If you want caching of requests, uncomment next two lines
# import requests_cache
# requests_cache.install_cache('wikidata_sparql_cache', allowable_methods=('GET', 'POST'))
data = requests.post(wikidata_api_url, data={'query': pop_met_artists_query, 'format': 'json'}).json()
except NameError:
print ('jsondecode problem.')
print (data.content)
raise
# Interpret result from SPARQL query
resultarray = []
for item in data['results']['bindings']:
resultarray.append({
'qid': str(item['creator']['value'].replace('http://www.wikidata.org/entity/','')),
'label': str(item['creatorLabel']['value']),
'linkcount': int(item['linkcount']['value'])
})
df = pd.DataFrame(resultarray)
# Make new dataframe to prep the selection box text
new_df = pd.DataFrame()
# Add new column turning Q123 to wd:Q123
new_df['wdqid'] = df.apply (lambda row: 'wd:'+row['qid'], axis=1)
# Add linkcount to label so it displays like "Henri Matisse (42)"
new_df['label'] = df.apply (lambda row: row['label']+' ('+str(row['linkcount'])+')', axis=1)
# Create a list of tuples to be used by SelectMultiple list
# [("Henri Matisse (42)", )]
creator_options = []
tuple_list = create_widget_tuple_list(new_df['label'].to_list(), new_df['wdqid'].to_list())
creator_options.extend(tuple_list)
# iframecode = '<iframe src=' + graphurl + ' width=900 height=600></iframe>'
iframecode_blank = '<iframe src=' + 'about:blank' + ' width=900 height=600></iframe>'
institution_options = [
('All institutions', 'ALL'),
('Met Museum', 'wd:Q160236'),
('National Gallery of Art (US)', 'wd:Q214867'),
('Smithsonian American Art Museum', 'wd:Q1192305'),
('National Portrait Gallery (US)','wd:Q1967614'),
('Cooper Hewitt','wd:Q1129820'),
('Hirshhorn Museum and Sculpture Garden','wd:Q1620553'),
('Freer Sackler Gallery','wd:Q105749808'),
('National Museum of Arfican Art','wd:Q46812'),
('Cleveland Museum of Art','wd:Q657415'),
('Yale University Gallery of Art', 'wd:Q1568434'),
('Rijksmuseum', 'wd:Q190804'),
]
textheader = widgets.HTML(
value="<H1>Wikidata Graph Browser</H1>",
placeholder='',
description='',
)
selector = widgets.SelectMultiple(
options=creator_options,
value=[],
rows=10,
description='',
disabled=False
)
institution_selector = widgets.SelectMultiple(
options=institution_options,
value=[],
rows=10,
description='',
disabled=False
)
graphoutput = widgets.HTML(
value=iframecode_blank,
placeholder='<p>Waiting for input</p>',
description=''
)
def handle_creators(incoming):
# Record creators into our dict
query_dict['creators'] = incoming['new']
draw_kg()
def handle_institutions(incoming):
# Record institutions into our dict
if 'ALL' in incoming['new']:
query_dict['institutions'] = []
else:
query_dict['institutions'] = incoming['new']
draw_kg()
def draw_kg():
creator_values = ''
institution_values = ''
institution_statement = ''
if (query_dict['creators']):
creator_values = 'VALUES ?creator ' + sparql_qidlist_to_values_string(list(query_dict['creators']))
if (query_dict['institutions']):
institution_values = 'VALUES ?institution ' + sparql_qidlist_to_values_string(list(query_dict['institutions']))
institution_statement = '?item1 wdt:P195 ?institution .'
# Create the SPARQL query
# graphurl = creator_depictions_query_template_url.format(values_string)
# TODO - eventually want to support as many UNION subqueries as needed
query = creator_depictions_query_template % \
(creator_values, institution_values, institution_statement, institution_statement)
# Create Wikidata query URL
graphurl = 'https://query.wikidata.org/embed.html#' + urllib.parse.quote(query)
# Poke iframe
iframecode = '<iframe src=' + graphurl + ' width=900 height=600></iframe>'
graphoutput.value=iframecode
a_selector = VBox([Label('Artists'),selector])
i_selector = VBox([Label('Institutions'),institution_selector])
topbox = HBox([a_selector, i_selector])
headerbox = VBox([textheader,topbox])
bottombox = HBox([graphoutput])
bigbox = VBox([headerbox,bottombox])
display(bigbox)
selector.observe(handle_creators, names='value')
institution_selector.observe(handle_institutions, names='value')