#!/usr/bin/env python
# coding: utf-8

# ## Summarizing annotations to a term and descendants
# 
# This notebook demonstrates summarizing annotation counts for a term and its descendants.
# 
# An example use of this is a GO annotator exploring refactoring a subtree in GO
# 
# Of course, if this were a regular thing we would make a command line or even web interface,
# but keeping as a notebook gives us some flexibility in logic, and anyway is intended largely
# as a demonstration

# ### boilerplate
# 
#  * importing relevant ontobiolibraries
#  * set up key objects

# In[1]:


import pandas as pd

## Create an ontology factory in order to fetch GO
from ontobio.ontol_factory import OntologyFactory
ofactory = OntologyFactory()

## GOLR queries
from ontobio.golr.golr_query import GolrAssociationQuery

## rendering ontologies
from ontobio import GraphRenderer


# In[2]:


## Load GO. Note the first time this runs Jupyter will show '*' - be patient
ont = ofactory.create("go")  


# ### Finding descendants
# 
# Here we are using the in-memory ontology object, no external service calls are executed
# 
# Change the value of `term_id` to what you like

# In[3]:


term_id = "GO:0009070" ## serine family amino acid biosynthetic process
descendants = ont.descendants(term_id, reflexive=True, relations=['subClassOf', 'BFO:0000050'])


# In[4]:


descendants


# ### rendering subtrees
# 
# We use the good-old-fashioned Tree renderer
# 
# (this doesn't scale well for latticey-subontologies)

# In[5]:


renderer = GraphRenderer.create('tree')


# In[6]:


print(renderer.render_subgraph(ont, nodes=descendants))


# ### summarizing annotations
# 
# We write a short procedure to wrap calling Golr and returning a summary dict
# 
# The dict is keyed by taxon label. We also include an entry for `ALL`
# 

# In[58]:


DEFAULT_FACET_FIELDS = ['taxon_subset_closure_label', 'evidence_label', 'assigned_by']
def summarize(t: str, 
              evidence_closure='ECO:0000269', ## restrict to experimental
              facet_fields=None) -> dict:
    """
    Summarize a term
    """
    if facet_fields == None:
        facet_fields  = DEFAULT_FACET_FIELDS
    q = GolrAssociationQuery(object=t, rows=0, object_category='function', 
                             fq={'evidence_closur'taxon_subset_closure_label'e_label':'experimental evidence'},
                             facet_fields=facet_fields)
    #params = q.solr_params()
    #print(params)
    result = q.exec()
    fc = result['facet_counts']
    item = {'ALL': result['numFound']}  ## make sure this is the first entry
    for ff in facet_fields:
        if ff in fc:
            item.update(fc[ff])
    return item


# In[59]:


print(summarize(term_id))


# In[63]:


def summarize_set(ids, facet_fields=None) -> pd.DataFrame:
    """
    Summarize a set of annotations, return a dataframe
    """
    items = []
    for id in ids:
        item = {'id': id, 'name:': ont.label(id)}
        for k,v in summarize(id, facet_fields=facet_fields).items():
            item[k] = v
        items.append(item)
    df =  pd.DataFrame(items).fillna(0)
    # sort using total number
    df.sort_values('ALL', axis=0, ascending=False, inplace=True)
    return df


# ## Summarize GO term and descendants
# 
# More advanced visualziations are easy with plotly etc. We leave as an exercise to the reader...
# 
# As an example, for the first query we bundle all facets (species, evidence, assigned by) together

# In[64]:


pd.options.display.max_columns = None
df = summarize_set(descendants)
df


# ## Summary by assigned by
# 
# 

# In[67]:


summarize_set(descendants, facet_fields=['assigned_by'])


# ### Summarize by species
# 
# use `taxon_subset_closure_label` facet

# In[69]:


summarize_set(descendants, facet_fields=['taxon_subset_closure_label'])


# In[ ]: