This notebook displays in a human-friendly way all of the connections embedded in a maDMP. By the end of this notebook, you will be able to succinctly display the essential components of the maDMP vision using persistent identifiers (PIDs): Open Researcher and Contributor IDs (ORCIDs), funders IDs, organizations Org IDs, and Dataset IDs (DOIs). To demonstrate this we use an example DMP, viz. https://doi.org/10.4124/test/.879w8. The notebook fetches all the PIDs associated with this DMP and displays it in a Tree Diagram. See below. The diagram puts the DMP at center and there are four main branches: datasets, funders, organisations, and people. Each branch gives birth to individual entities of those branches. For example, the name of all the people that contributed to the DMP.
The process of displaying the DMP visulisation is very simple. First, and after a initial setup, we fetch all the we need from the DataCite GraphQL API. Then, we transform this data into a data structure that can be use for visulisation. Finally, we take the data tranformation and supply it to a Vega visulisation specification to generate the Chart you can see above.
%%capture
# Install required Python packages
!pip install dfply altair altair_saver vega altair_viewer
import json
import pandas as pd
import numpy as np
from dfply import *
import altair.vega.v5 as alt
from altair_saver import save
# alt.renderers.enable('notebook')
# Prepare the GraphQL client
import requests
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
_transport = RequestsHTTPTransport(
url='https://api.datacite.org/graphql',
use_json=True,
)
client = Client(
transport=_transport,
fetch_schema_from_transport=True,
)
from IPython.display import display, Markdown
import ipywidgets as widgets
f = widgets.Dropdown(
options=['https://doi.org/10.48321/D1F88S ','https://doi.org/10.48321/D17G67', 'https://doi.org/10.48321/D1H59R', 'https://doi.org/10.1575/1912/bco-dmo.775500.1', 'https://doi.org/10.48321/D1G59F','https://doi.org/10.48321/D14S38','https://doi.org/10.48321/D1101N','https://doi.org/10.48321/D1W88T','https://doi.org/10.48321/D1RG6W','https://doi.org/10.48321/D1MS3M','https://doi.org/10.48321/D1H010','https://doi.org/10.48321/D1C885','https://doi.org/10.48321/D17G67','https://doi.org/10.48321/D13S3Z','https://doi.org/10.48321/D1001B','https://doi.org/10.48321/D1V88H','https://doi.org/10.48321/D1QG6K','https://doi.org/10.48321/D1KS39','https://doi.org/10.48321/D1G01P'
],
value='https://doi.org/10.48321/D17G67',
description='Choose DOI:',
disabled=False,
)
# Generate the GraphQL query to retrieve up to 100 outputs of University of Oxford, with at least 100 views each.
# query_params = {
# "id" : f.value,
# "maxOutputs": 100,
# "minViews" : 100
# }
query = gql("""query getOutputs($id: ID!)
{
dataManagementPlan(id: $id) {
id
name: titles(first:1) {
title
}
datasets: citations(query:"types.resourceTypeGeneral:Dataset") {
totalCount
nodes {
id: doi
name: titles(first:1) {
title
}
}
}
publications: citations(query:"types.resourceTypeGeneral:Text") {
totalCount
nodes {
id: doi
name: titles(first:1) {
title
}
}
}
producer: contributors(contributorType: "Producer") {
id
name
contributorType
}
fundingReferences {
id: funderIdentifier
name: funderName
award: awardUri
}
creators {
id
name
type
affiliation{
id
name
}
}
pis: contributors(contributorType: "ProjectLeader") {
id
name
contributorType
affiliation{
id
name
}
}
curators: contributors(contributorType: "DataCurator") {
id
name
type
affiliation{
id
name
}
}
}
}
""")
def get_data(doi):
query_params = {
"id" : doi,
"maxOutputs": 100,
"minViews" : 100
}
return client.execute(query, variable_values=json.dumps(query_params))["dataManagementPlan"]
def get_affiliation(series_element):
if len(series_element) == 0:
return "None"
return series_element[0]['name']
def add_node_attributes(dataframe, parent=2):
"""Modifies each item to include attributes needed for the node visulisation
Parameters:
dataframe (dataframe): A dataframe with all the itemss
parent (int): The id of the parent node
Returns:
dataframe:Returning vthe same dataframe with new attributes
"""
if (dataframe) is None:
return pd.DataFrame()
else:
# print(dataframe)
return (dataframe >>
mutate(
id = X.id,
tooltip = X.id,
parent = parent,
))
def create_node(array=[], parent=2):
"""creates a node for the chart and formats it
Parameters:
array (array): An array with all the itemss
parent (int): The id of the parent node
Returns:
dict:Dict with all the nodes
"""
# print(array)
if len(array) == 0:
return {}
else:
# return {} if (array) is None else array
df = add_node_attributes(pd.DataFrame(array,columns=array[0].keys()), parent)
return df.to_dict(orient='records')
def merge_nodes(dmpTitle,id,dataset=[],references=[],funders=[],orgs=[],people=[]):
"""Merges all the nodes lists
Parameters:
datasets (array): dataset nodes
funders (array): funders nodes
orgs (array): orgs nodes
people (array): people nodes
Returns:
array:Array with all the nodes
"""
dataset = [] if len(dataset) == 0 else dataset
references = [] if len(references) == 0 else references
funders = [] if len(funders) == 0 else funders
orgs = [] if len(orgs) == 0 else orgs
people = [] if len(people) == 0 else people
dmp = {"id":id, "name": dmpTitle}
datasets_node = {"id":2, "name": "Datasets", "parent":id}
references_node = {"id":6, "name": "Publications", "parent":id}
funders_node = {"id":3, "name": "Funders", "parent":id}
organisations_node = {"id":4, "name": "Organisations", "parent":id}
people_node = {"id":5, "name": "People", "parent":id}
nodes_list = [dmp, datasets_node, references_node, funders_node,organisations_node,people_node] + dataset + references + funders + orgs + people,
# return np.array(nodes_list, dtype=object)
return nodes_list[0]
def get_title(series_element):
if len(series_element) == 0:
return "None"
return series_element[0]['title']
def extract_titles(list):
if len(list) == 0:
return []
return (pd.DataFrame(list) >>
mutate(
name = X.name.apply(get_title)
)).to_dict('records')
def generate_chart(doi):
data = get_data(doi)
datasets = create_node(extract_titles(data["datasets"]["nodes"]),2)
references = create_node(extract_titles(data["publications"]["nodes"]),6)
orgs = create_node(data["producer"],4)
people = create_node(data["creators"] + data["pis"] + data["curators"],5)
dmp_title = str('"' + data["name"][0]["title"] + '"')
funders = create_node(data["fundingReferences"],3)
id = data["id"]
nodes = merge_nodes(" ",id, datasets, references, funders, orgs, people)
return alt.vega(json.loads(vega_template(json.dumps(nodes), dmp_title)))
def vega_template(data, dmp_title):
return """
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"description": "An example of a radial layout for a node-link diagram of hierarchical data.",
"width": 1024,
"height": 720,
"padding": 5,
"autosize": "none",
"signals": [
{"name": "Chart", "value": """ + dmp_title + """, "bind": {"input": "url", "size":100}},
{"name": "labels", "value": true, "bind": {"input": "checkbox"}},
{
"name": "radius",
"value": 280,
"bind": {"input": "range", "min": 20, "max": 600}
},
{
"name": "extent",
"value": 360,
"bind": {"input": "range", "min": 0, "max": 360, "step": 1}
},
{
"name": "rotate",
"value": 0,
"bind": {"input": "range", "min": 0, "max": 360, "step": 1}
},
{
"name": "layout",
"value": "cluster",
"bind": {"input": "radio", "options": ["tidy", "cluster"]}
},
{
"name": "links",
"value": "orthogonal",
"bind": {
"input": "select",
"options": ["line", "curve", "diagonal", "orthogonal"]
}
},
{"name": "originX", "update": "width / 2"},
{"name": "originY", "update": "height / 2"}
],
"data": [
{
"name": "tree",
"values": """ + data + """,
"transform": [
{"type": "stratify", "key": "id", "parentKey": "parent"},
{
"type": "tree",
"method": {"signal": "layout"},
"size": [1, {"signal": "radius"}],
"as": ["alpha", "radius", "depth", "children"]
},
{
"type": "formula",
"expr": "(rotate + extent * datum.alpha + 270) % 360",
"as": "angle"
},
{"type": "formula", "expr": "PI * datum.angle / 180", "as": "radians"},
{
"type": "formula",
"expr": "inrange(datum.angle, [90, 270])",
"as": "leftside"
},
{
"type": "formula",
"expr": "originX + datum.radius * cos(datum.radians)",
"as": "x"
},
{
"type": "formula",
"expr": "originY + datum.radius * sin(datum.radians)",
"as": "y"
}
]
},
{
"name": "links",
"source": "tree",
"transform": [
{"type": "treelinks"},
{
"type": "linkpath",
"shape": {"signal": "links"},
"orient": "radial",
"sourceX": "source.radians",
"sourceY": "source.radius",
"targetX": "target.radians",
"targetY": "target.radius"
}
]
}
],
"scales": [
{
"name": "color",
"type": "linear",
"range": {"scheme": "viridis"},
"domain": {"data": "tree", "field": "depth"},
"zero": true
}
],
"marks": [
{
"type": "path",
"from": {"data": "links"},
"encode": {
"update": {
"x": {"signal": "originX"},
"y": {"signal": "originY"},
"path": {"field": "path"},
"stroke": {"value": "#ccc"}
}
}
},
{
"type": "symbol",
"from": {"data": "tree"},
"encode": {
"enter": {
"size": {"value": 300}, "stroke": {"value": "#fff"}
},
"update": {
"x": {"field": "x"},
"y": {"field": "y"},
"fill": {"scale": "color", "field": "depth"}
}
}
},
{
"type": "text",
"from": {"data": "tree"},
"encode": {
"enter": {
"text": {"field": "name"},
"fontSize": {"value": 12},
"baseline": {"value": "middle"},
"tooltip": {"signal":
"{'Identifier': datum.tooltip, 'Affiliation': datum.affiliation, 'Contribution': datum.contributorType, 'Award': datum.award}"}
},
"update": {
"x": {"field": "x"},
"y": {"field": "y"},
"dx": {"signal": "(datum.leftside ? -1 : 1) * 12"},
"align": {"signal": "datum.leftside ? 'right' : 'left'"},
"opacity": {"signal": "labels ? 1 : 0"}
}
}
}
]
}
"""
display(f)
Dropdown(description='Choose DOI:', options=('https://doi.org/10.48321/D1F88S ', 'https://doi.org/10.48321/D17…
chart = generate_chart(f.value)
<Vega 5 object> If you see this message, it means the renderer has not been properly enabled for the frontend that you are using. For more information, see https://altair-viz.github.io/user_guide/troubleshooting.html