This notebook queries the FREYA PID Graph via Datacite's GraphQL API to retrieve people affiliated with an organization. It takes a ROR URL as input which is used internally to retrieve the Grid ID from the ROR API and Ringgold ID from Wikidata and use these identifiers to find ORCID record holders at the institution. From the resulting list of people we output the ORCID iDs.
# Prerequisites:
import requests # dependency to make HTTP calls
from benedict import benedict # dependency for dealing with json
The input for this notebook is a ROR URL, e.g. 'https://ror.org/021k10z87
'
# input parameter
example_ror="https://ror.org/021k10z87"
We use it to query Datacite's GraphQL API for the organization's metadata and all people connected to it. Since the API uses pagination, we need to loop through all pages to get the complete result set.
# Datacite's GraphQL endpoint for the FREYA PID Graph
DATACITE_GRAPHQL_API = "https://api.datacite.org/graphql"
# Query to retrieve an organization and all its affiliated people
QUERY_ORGA2PEOPLE = """query organization($ror :ID!, $after:String){
organization(id: $ror) {
people(first: 1000, after: $after) {
totalCount
pageInfo {
endCursor
hasNextPage
}
nodes {
id
name
}
}
}
}"""
# query the freya pid graph for all people connected to given ROR
def query_freya_for_orga2people(ror):
continue_paginating = True
cursor=""
while continue_paginating:
vars = {'ror': ror, 'after': cursor}
response = requests.post(url=DATACITE_GRAPHQL_API,
json={'query': QUERY_ORGA2PEOPLE, 'variables': vars},
headers={'Accept': 'application/json'})
response.raise_for_status()
result=response.json()
if 'errors' in result:
raise requests.exceptions.HTTPError(result)
# check if next page exists and set cursor to next page
continue_paginating = has_next_page(result)
cursor = next_cursor(result)
yield result
# check if there is another page with results to query
def has_next_page(response_data):
resp_dict = benedict.from_json(response_data)
has_next_page = resp_dict.get("data.organization.people.pageInfo.hasNextPage")
return has_next_page
# set cursor to next value
def next_cursor(response_data):
resp_dict = benedict.from_json(response_data)
cursor = resp_dict.get("data.organization.people.pageInfo.endCursor")
return cursor
#--- example execution
list_of_pages=query_freya_for_orga2people(example_ror)
From the returned pages we extract the list of people and for each person we extract and print out their name and ORCID iD.
# from the result pages we get from the GraphQL API, extract the data about the people
def extract_people_from_page(page):
page_dict=benedict.from_json(page)
return [person for person in page_dict.get('data.organization.people.nodes') or []]
# extract ORCID from person
def extract_orcid(person):
person_dict = benedict.from_json(person)
orcid = person_dict.get('id').replace("https://orcid.org/", "")
name = person_dict.get('name')
return orcid, name
#--- example execution
for page in list_of_pages or []:
people=extract_people_from_page(page)
for person in people or []:
orcid, name = extract_orcid(person)
print(f"{orcid}, {name}")
0000-0002-3783-6130, Irene Weipert-Fenner 0000-0002-5452-0488, Hans-Joachim Spanger 0000-0002-4621-9687, Simone Schnabel 0000-0001-6731-5304, Julia Eckert 0000-0001-6746-1248, Anton Peez 0000-0003-1575-9688, Hendrik Simon 0000-0002-1712-2624, Julian Junk 0000-0003-0035-5840, Raphael Oidtmann 0000-0002-5925-043X, Ariadne Natal 0000-0002-7012-6739, Peter Kreuzer 0000-0001-7843-4480, Dirk Peters 0000-0001-6823-6819, Janna Lisa Chalmovsky 0000-0003-1940-8877, Mikhail Polianskii 0000-0001-7302-444X, Katja Freistein 0000-0002-8739-2486, Elvira Rosert 0000-0001-7286-3575, Paul Chambers 0000-0003-0039-9827, Eldad Ben Aharon 0000-0002-4259-6071, Felix S. Bethke