#!/usr/bin/env python
# coding: utf-8

# # Network Visualisation
# 
# Some examples of visualising networks in Jupyter notebooks.

# ## Create Network
# 
# We can create a network from a simple edgelist, which is to say, a data object with two columns representing undirected edges between values in the two columns, or directed edges going from the entry in one column to the entry in another column.

# In[ ]:


#%pip install --upgrade pandas
import pandas as pd

df = pd.DataFrame({'from': ['a', 'b', 'c', 'c'], 'to': ['b', 'c', 'd', 'a']})
df


# We can write that data to a CSV file:

# In[ ]:


df.to_csv('dummy_graph.csv', index=False)
get_ipython().system(' head dummy_graph.csv')


# And read it back in again:

# In[ ]:


df = None

df = pd.read_csv('dummy_graph.csv')
df


# Create a graph object using `networkx` graphing package.

# In[202]:


#%pip install --upgrade networkx
import networkx as nx

# If we donlt set the `create_using` attribute, we generate an undirected graph
DG = nx.from_pandas_edgelist(df, 'from', 'to', create_using=nx.DiGraph())


# There are some default plotting packages in `networkx` but the diagrams they generate often look quite scrappy. There are various third party layout packages that can generate prettier diagrams.

# In[203]:


# This force atlas layout algorithm often generates "reliable" layouts...

#https://github.com/bhargavchippada/forceatlas2
#%pip install fa2
from fa2 import ForceAtlas2
import matplotlib.pyplot as plt

forceatlas2 = ForceAtlas2(
                        # Behavior alternatives
                        outboundAttractionDistribution=True,  # Dissuade hubs
                        linLogMode=False,  # NOT IMPLEMENTED
                        adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
                        edgeWeightInfluence=1.0,

                        # Performance
                        jitterTolerance=1.0,  # Tolerance
                        barnesHutOptimize=True,
                        barnesHutTheta=1.2,
                        multiThreaded=False,  # NOT IMPLEMENTED

                        # Tuning
                        scalingRatio=2.0,
                        strongGravityMode=False,
                        gravity=5.0,

                        # Log
                        verbose=True)


# We can use the `forceatlas2` layout algorithm to generate layout co-ordinates for the nodes in the `networkx` graph:

# In[204]:


positions = forceatlas2.forceatlas2_networkx_layout(DG, pos=None, iterations=2000)


# We can then layout the graph using the force atlas algorithm:

# In[205]:


plt.figure(figsize=(40,20))

# Draw nodes
nx.draw_networkx_nodes(DG, positions, node_color="blue", alpha=0.4)
# Draw labels
nx.draw_networkx_labels(DG, positions)

# Draw edges
nx.draw_networkx_edges(DG, positions,
                       edge_color="green", # edge colour
                       alpha=0.5, # edge transparency
                       connectionstyle='arc3,rad=0.2' # edge "bendiness"
                      );
# connectionstyle requires directed graph
# Maybe also as an alternative: https://github.com/beyondbeneath/bezier-curved-edges-networkx


# One of the easiest ways to weight node sizes is by degree. We can obtain the degree of each node directly from the graph:

# In[206]:


d = dict(DG.degree)
d


# Scale the size by degree:

# In[207]:


sizes = [v * 500 for v in d.values()]


# Now render the graph with the size set:

# In[208]:


# Draw nodes
nx.draw_networkx_nodes(DG, positions, node_size=sizes, node_color="blue", alpha=0.4)
# Draw labels
nx.draw_networkx_labels(DG, positions)

# Draw edges
nx.draw_networkx_edges(DG, positions, edge_color="green", alpha=0.5, connectionstyle='arc3,rad=0.2');


# We can use alternative labels based on a lookup from each node.

# In[209]:


df_labels = pd.DataFrame({'node':['d', 'b', 'c', 'a'],
                          'label': ['D', 'B', 'C', 'A']})
df_labels


# We need to pass this as a `dict`:

# In[210]:


labels_map = df_labels.set_index('node').to_dict()['label']
labels_map


# Plot using these labels:

# In[211]:


# Draw nodes
nx.draw_networkx_nodes(DG, positions, node_size=sizes, node_color='blue', alpha=0.4)
# Draw labels using the labels_map labels
nx.draw_networkx_labels(DG, positions, labels = labels_map)

# Draw edges
nx.draw_networkx_edges(DG, positions, edge_color="green", alpha=0.5, connectionstyle='arc3,rad=0.2');


# We can colour nodes if we pass in a list of colours in the node order:

# In[212]:


node_colors = {'a':'red', 'b':'blue','c':'green','d':'yellow'}

node_colour_map = []
for node in DG.nodes():
    node_colour_map.append(node_colors[node])
node_colour_map    


# In[213]:


# Draw nodes
nx.draw_networkx_nodes(DG, positions, node_size=sizes, node_color=node_colour_map, alpha=0.4)
# Draw labels using the labels_map labels
nx.draw_networkx_labels(DG, positions, labels = labels_map)

# Draw edges
nx.draw_networkx_edges(DG, positions, edge_color="green", alpha=0.5, connectionstyle='arc3,rad=0.2');


# The label layout is not ideal. We can offset the labels using an algorithm that will offset the label, and also try to prevent overlapping labels.

# In[214]:


# TO DO - this uses the node value as the label; at the moment, I don't know if / how we can pass arbitrary label list

#https://github.com/Phlya/adjustText
# Takes a long time to run
#%pip install adjustText
from adjustText import adjust_text

def plot_adjusted_labels(DG, adjust=True, resizer=1):
    """Plot adjusted labels."""
    if adjust:
        #fontsize=sizes[i]/10
        texts = [plt.text(positions[k][0], positions[k][1], k, fontsize=sizes[i]*resizer) for i, k in enumerate(positions)]
        adjust_text(texts, arrowprops=dict(arrowstyle="-", color='k', lw=0.5)),
    else:
        nx.draw_networkx_labels(DG, positions)


# *The following is not a great example!*

# In[215]:


# Draw nodes
nx.draw_networkx_nodes(DG, positions, node_size=sizes, node_color="blue", alpha=0.4)

# Draw edges
nx.draw_networkx_edges(DG, positions, edge_color="green", alpha=0.5, connectionstyle='arc3,rad=0.2');

plot_adjusted_labels(DG, resizer=0.02)


# ## Trying it with real data

# In[216]:


# TO DO


# ## Additional Tweaks
# 
# In a large graph, we may wan to limit the plotting of nodes to nodes above a certain degree. The following with create a filtered graph containing just nodes with larg degree in the original graph:

# In[217]:


def filter_graph_by_degree(DG, mindegree=1):
    """Filter a directed graph to nodes with a minimum degree."""
    DF = nx.DiGraph()                      

    fedges = filter(lambda x: DG.degree()[x[0]] >= mindegree and DG.degree()[x[1]] >= mindegree, DG.edges())
    DF.add_edges_from(fedges)

    # New network size
    #DF.size()
    return DF


# # Example Influence Network from Wikipedia/DBpedia
# 
# The following is an example influence network using data from DBpedia.

# First some utility functions for running queries:

# In[218]:


#%pip install --upgrade SPARQLWrapper
#from linkeddataquery import SPARQLWrapper, dfResults, runQuery
# Import the necessary packages
from SPARQLWrapper import SPARQLWrapper, JSON

# Add some helper functions

# A function that will return the results of running a SPARQL query with 
# a defined set of prefixes over a specified endpoint.
# It follows the same five-step process apart from creating the query, which 
# is provided as an argument to the function.
def runQuery(endpoint, prefix, q):
    ''' Run a SPARQL query with a declared prefix over a specified endpoint '''
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(prefix+q) # concatenate the strings representing the prefixes and the query
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

# Import pandas to provide facilities for creating a DataFrame to hold results
import pandas as pd

# Function to convert query results into a DataFrame
# The results are assumed to be in JSON format and therefore the Python dictionary will have  
# the results indexed by 'results' and then 'bindings'. 
def dict2df(results):
    ''' A function to flatten the SPARQL query results and return the column values '''
    data = []
    for result in results["results"]["bindings"]:
        tmp = {}
        for el in result:
            tmp[el] = result[el]['value']
        data.append(tmp)

    df = pd.DataFrame(data)
    return df

# Function to run a query and return results in a DataFrame
def dfResults(endpoint, prefix, q):
    ''' Generate a data frame containing the results of running
        a SPARQL query with a declared prefix over a specified endpoint '''
    return dict2df(runQuery(endpoint, prefix, q))

# Print a limited number of results of a query
def printQuery(results, limit=''):
    ''' Print the results from the SPARQL query '''
    resdata = results["results"]["bindings"]
    if limit != '':
        resdata = results["results"]["bindings"][:limit]
    for result in resdata:
        for ans in result:
            print('{0}: {1}'.format(ans, result[ans]['value']))
        print()

# Run a query and print out a limited number of results
def printRunQuery(endpoint, prefix, q, limit=''):
    ''' Print the results from the SPARQL query '''
    results = runQuery(endpoint, prefix, q)
    printQuery(results, limit)


# Define the endpoint:

# In[219]:


endpoint="http://dbpedia.org/sparql"
sparql = SPARQLWrapper(endpoint)


# Set up some handy prefixes:

# In[220]:


prefix='''
prefix gephi:<http://gephi.org/>
prefix foaf: <http://xmlns.com/foaf/0.1/>
prefix dct: <http://purl.org/dc/terms/>
PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX dbo:    <http://dbpedia.org/ontology/>
'''


# Create a query over the influence network of philosophers:

# In[221]:


q_philosophers = '''
SELECT ?philosopherName,  ?influenceName WHERE {
  ?philosopher a
  <http://dbpedia.org/ontology/Philosopher> .
  ?influence a
  <http://dbpedia.org/ontology/Philosopher> .
  ?philosopher <http://dbpedia.org/ontology/influencedBy> ?influence.
  ?philosopher foaf:name ?philosopherName.
  ?influence foaf:name ?influenceName.
} LIMIT 10000
'''


# Run the query:

# In[222]:


df2 = dfResults(endpoint, prefix, q_philosophers)
df2


# In[223]:


# Create a graph from the dataframe
DG = nx.from_pandas_edgelist(df2, 'philosopherName', 'influenceName', create_using=nx.DiGraph())

#The graph is quite large, so simplify it to just philosophers with significant degree
DF = filter_graph_by_degree(DG, 40)                  

DF.size()


# Render the significant influence network:

# In[227]:


# TO DO - ipywidget thing to let us interact with useful ForceAtlas parameters

forceatlas2 = ForceAtlas2(
                            # Behavior alternatives
                            outboundAttractionDistribution=True,  # Dissuade hubs
                            linLogMode=False,  # NOT IMPLEMENTED
                            adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
                            edgeWeightInfluence=1.0,

                            # Performance
                            jitterTolerance=1.0,  # Tolerance
                            barnesHutOptimize=True,
                            barnesHutTheta=1.2,
                            multiThreaded=False,  # NOT IMPLEMENTED

                            # Tuning
                            scalingRatio=2.0,
                            strongGravityMode=False,
                            gravity=5.0,

                            # Log
                            verbose=True)

# Node sizing
d = dict(DF.degree)
sizes=[v * 1 for v in d.values()]

# Node locations
positions = forceatlas2.forceatlas2_networkx_layout(DF, pos=None, iterations=2000)

# Give ourselve a reasonable plot size to work with
plt.figure(figsize=(20,20))

nx.draw_networkx_nodes(DF, positions, node_size=sizes, with_labels=True, node_color="blue", alpha=0.4)
nx.draw_networkx_edges(DF, positions, edge_color="green", alpha=0.2, connectionstyle='arc3,rad=0.2') 

# Position adjusted labels - this step may take some time
# Tweak the previous filter step to reduce graph size and speed things up
plot_adjusted_labels(DF, 0.3)


# In[ ]: