#!/usr/bin/env python
# coding: utf-8

# ## Pathfinding
# 
# Different ontologies exhibit different degrees of latticeyness. Highly latticed ontologies will have a combinatorial expolosion of paths to a root node.
# 
# This notebook has an analysis of path counts for the HPO
# 

# In[3]:


## We use a Factory object in the ontobio library
from ontobio import OntologyFactory


# In[5]:


## Get the HPO using default method (currently OntoBee SPARQL)
## This may take 5-10s the first time you run it; afterwards it is cached
ofa = OntologyFactory()
ont = ofa.create('hp')


# In[9]:


## The OWL version of HPO (used here) has many interesting relationship types;
## for now we just care about is-a (subClassOf between named classes)
ont = ont.subontology(relations='subClassOf')


# In[13]:


## Get the root of the abnormality subset
[root] = ont.search('Phenotypic abnormality')
root


# In[15]:


## Arbitrary term
[t] = ont.search('Clinodactyly of the 3rd finger')
t


# In[18]:


## We use the standard python networkx library for pathfinding here
## This is easily extracted from an ontology object
from networkx import nx
G = ont.get_graph()
G


# ### Use networkx to find all paths from an arbitrary term
# 
# See https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.simple_paths.all_simple_paths.html

# In[21]:


## number of paths
## (for the mapping of networkx to an ontology, source is root, and descendant is target)
len(list(nx.all_simple_paths(G, root, t)))


# In[22]:


## nx returns a list of lists, each list is a path
## Examine the first 2
list(nx.all_simple_paths(G, root, t))[0:2]


# ## We (heart) pandas
# 
# Pandas are cute.
# 
# We use a DataFrame object, which we will construct by making a table of terms plus their pathstats

# In[45]:


def get_pathstats(nodes):
    """
    for any given node, return a table row with stats
    """
    items = []
    for n in nodes:
        paths = list(nx.all_simple_paths(G, root, n))
        longest = len(max(paths, key=lambda p: len(p)))
        items.append({'id':n, 
                      'label': ont.label(n),
                      'pathcount': len(paths),
                      'longest': longest})
    return items

## Test it out
sample = list(ont.descendants(root))[0:20]
items = get_pathstats(sample)
items[0:3]


# In[46]:


## Look at same table in pandas
import pandas as pd
df = pd.DataFrame(items)
df


# In[49]:


## Basic aggregate stats (over our small sample, which may not be representative)
df['pathcount'].mean()


# ### Plotting with plotly
# 
# Let's do a simple barchart showing distribution of pathcounts for our sample

# In[50]:


import plotly.plotly as py
import plotly.graph_objs as go


# In[51]:


data = [
    go.Bar(
        x=df['label'], # assign x as the dataframe column 'x'
        y=df['pathcount']
    )
]

# IPython notebook
py.iplot(data, filename='pandas-bar-chart')

# use this in non-notebook context
# url = py.plot(data, filename='pandas-bar-chart')


# ## Summarizing over whole ontology
# 
# __warning__ this can take over an hour, if running interactively, be patient!
# 
# __help wanted__ is there a way to make Jupyter show a progress bar for cases like this?
# 

# In[52]:


sample = list(ont.descendants(root))
items = get_pathstats(sample)
items[0:3]


# In[53]:


len(items)


# In[54]:


df = pd.DataFrame(items)


# In[55]:


df['pathcount'].mean()


# In[56]:


df['pathcount'].max()


# ### Plotting all HP terms
# 

# In[57]:


data = [
    go.Bar(
        x=df['label'], # assign x as the dataframe column 'x'
        y=df['pathcount']
    )
]

# IPython notebook
py.iplot(data, filename='pandas-bar-chart-all')


# In[59]:


data = [
    go.Scatter(
        x=df['longest'], # assign x as the dataframe column 'x'
        y=df['pathcount'],
     mode = 'markers'
    )
]

# IPython notebook
py.iplot(data, filename='pandas-longest-vs-numpaths')


# In[61]:


max_num_paths = df['pathcount'].max()
nodes_with_max = [x['id'] for x in items if x['pathcount'] == max_num_paths]
nodes_with_max


# In[62]:


[ont.label(n) for n in nodes_with_max]


# In[70]:


len(nodes_with_max)


# In[71]:


## Pick an arbitrary term from list
t = nodes_with_max[0]


# In[77]:


ancs = ont.ancestors(t, reflexive=True)
ancs = [a for a in ancs if a.startswith('HP:')]
len(ancs)


# In[80]:


## Make a sub-ontology with just term and ancestors
subont = ont.subontology(ancs)


# In[83]:


sample_path = list(nx.all_simple_paths(G, root, t))[0]
sample_path


# In[84]:


## Render the sub-ontology,
## highlighting a sample path
from ontobio.io.ontol_renderers import GraphRenderer
w = GraphRenderer.create('png')
w.outfile = 'output/multipath.png'
w.write(subont,query_ids=sample_path)


# ![img](output/multipath.png)

# In[ ]: