#!/usr/bin/env python # coding: utf-8 # # Create a network graph visualisation of Australian government departments # # This notebook visualises changes in Australian government departments over time, using data from Wikidata. It creates a hierarchically-ordered network graph where each agency is represented as a node whose position and colour is determined by the decade in which the agency was created. The size of the node indicates how long the agency was in existence, while edges between nodes connect agencies to their successors. Earliest agencies will be at the top of the graph. # # You can [view the query](https://w.wiki/5tVh) used to generate this graph using the Wikidata Query Service. # In[11]: import json import arrow import pandas as pd from IPython.display import IFrame, display from pyvis.network import Network from SPARQLWrapper import JSON, SPARQLWrapper # In[12]: sparql = SPARQLWrapper("https://query.wikidata.org/sparql") sparql.setQuery( """ SELECT ?item ?label ?id ?start_date ?end_date ?after_id WHERE { ?item wdt:P31 wd:Q57605562; wdt:P10856 ?id; wdt:P571 ?start_date; rdfs:label ?agency_label. OPTIONAL { ?item wdt:P576 ?end_date. } OPTIONAL { ?item wdt:P1366 ?after. ?after wdt:P10856 ?after_id. } FILTER (lang(?agency_label) = "en"). # Combine start and end year into a single string, setting end date to "" if it doesn't exist BIND(concat(xsd:string(YEAR(?start_date)), "-", COALESCE(xsd:string(YEAR(?end_date)), "")) as ?date_range) # Combine dept name and date range into a single string BIND(concat(?agency_label, " (", ?date_range, ")") as ?label) SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } """ ) # In[13]: sparql.setReturnFormat(JSON) results = sparql.query().convert() df = pd.json_normalize(results["results"]["bindings"], sep="_") # In[14]: df.head() # In[15]: # Tableau style colours from http://tableaufriction.blogspot.com/2012/11/finally-you-can-use-tableau-data-colors.html rgb = [ "255.187.120", "255.127.14", "174.199.232", "44.160.44", "31.119.180", "255.152.150", "214.39.40", "197.176.213", "152.223.138", "148.103.189", "247.182.210", "227.119.194", "196.156.148", "140.86.75", "127.127.127", "219.219.141", "199.199.199", "188.189.34", "158.218.229", "23.190.207", ] def make_darker(colour, factor=0.75): """ Darken colour by given factor. """ return [str(round(int(c) * factor)) for c in colour] def make_lighter(colour, factor=0.75): """ Lighten colour by given factor. """ return [str(round((255 - int(c)) * factor) + int(c)) for c in colour] # List of Tableau style colours colours = [f'rgb({",".join(r.split("."))})' for r in rgb] # List of darkened colors borders = [f'rgb({",".join(make_darker(r.split(".")))})' for r in rgb] # List of lightened colours highlights = [f'rgb({",".join(make_lighter(r.split(".")))})' for r in rgb] # Create groups for each decade in the date range, assigning a different colour for each group decades = [str(d) for d in range(190, 203)] decade_groups = { d: { "color": { "background": colours[i], "border": borders[i], "highlight": {"background": highlights[i], "border": borders[i]}, } } for i, d in enumerate(decades) } # Calculate the possible range of values for the length of an agency's existence max_days = (arrow.utcnow() - arrow.get("1901-01-01")).days min_days = 1 current_range = max_days - min_days def calculate_size(start, end, current_range=current_range, biggest=150, smallest=30): """ Calculate the size of nodes based on each agency's length of existence. Adjust value to fall with the desired range. See: https://stackoverflow.com/a/929107 """ start_date = arrow.get(start) try: end_date = arrow.get(end) except (ValueError, TypeError): end_date = arrow.utcnow() delta = end_date - start_date return (((delta.days - 1) * (biggest - smallest)) / current_range) + 20 # In[31]: net = Network(notebook=True, cdn_resources="remote") # In[32]: # Loop through the agency data, creating a node for each agency for agency in df.itertuples(): net.add_node( agency.id_value, label=agency.id_value, # Include a hyperlink to the agency record in RecordSearch title=f"{agency.id_value}, {agency.label_value}", # Assign to a group based on the decade in which it was created # This will colour the nodes by decade group=agency.start_date_value[:3], # Assign a level based on decade in which it was created # This will help to position the agency hierarchically by creation date level=int(agency.start_date_value[:4]), # Size the node according the length of time the agency existed size=calculate_size(agency.start_date_value, agency.end_date_value), ) # In[33]: # Add edges between a node and its successors for agency in df.dropna(subset=["after_id_value"]).itertuples(): net.add_edge(agency.id_value, agency.after_id_value) # In[34]: # Network graph configuration # It's easier to manange this in a Python dict then convert to JSON for PyVis options = { "configure": {"enabled": False}, "layout": { # A hierarchical layout with levels based on start date will order the agencies by time "hierarchical": { "enabled": True, "sortMethod": "directed", "shakeTowards": "leaves", "nodeSpacing": 20, "levelSeparation": 40, "treeSpacing": 20, } }, "physics": {"hierarchicalRepulsion": {"avoidOverlap": 1, "nodeDistance": 180}}, "nodes": {"font": {"size": 15}}, # Assign colours by decade "groups": decade_groups, "edges": { "arrows": {"to": {"enabled": True, "scaleFactor": 0.5}}, "arrowStrikethrough": False, "smooth": {"enabled": True}, "color": {"color": "#b0bec5", "inherit": True}, }, } # In[35]: net.set_options(f"var options = {json.dumps(options)}") # Doing this rather than net.show() gives better results and predicatble sizes net.write_html("agencies-network.html", notebook=True) display(IFrame("agencies-network.html", height=800, width="100%")) # ---- # # Created by [Tim Sherratt](https://timsherratt.org) for the [GLAM Workbench](https://glam-workbench.net/). # # The development of the Wikidata section of the GLAM Workbench was supported by [Wikimedia Australia](https://wikimedia.org.au/wiki/Wikimedia_Australia).