#!/usr/bin/env python
# coding: utf-8

# ## Perform enrichment test using phenotype ontology
# 
# Given a file of genes:
# 
# ```
# $ head data/rp-genes.tsv
# NCBIGene:6295   SAG
# NCBIGene:1258   CNGB1
# NCBIGene:3614   IMPDH1
# NCBIGene:26121  PRPF31
# ```
# 
# The example file here is derived from a Monarch query for all retinitis pigmentosa genes.
# 
# We want to test each class in HPO to see if it is enriched for genes in this set.
# 
# First we need to parse the gene Ids in the file

# In[18]:


## Parse ids from file
file = open("data/rp-genes.tsv", "r")
gene_ids = [row.split("\t")[0] for row in file]

## show first 10 IDs:
gene_ids[:10]


# In[8]:


## Create an ontology factory in order to fetch HPO
from ontobio.ontol_factory import OntologyFactory

ofactory = OntologyFactory()
ont = ofactory.create("hp")  ## Load HP. Note the first time this runs Jupyter will show '*' - be patient


# In[9]:


## Create an association factory to get gene-phenotype associations
from ontobio.assoc_factory import AssociationSetFactory
afactory = AssociationSetFactory()
## Load Associations from Monarch. Note the first time this runs Jupyter will show '*' - be patient
aset = afactory.create(ontology=ont, subject_category='gene', object_category='phenotype', taxon='NCBITaxon:9606')


# In[12]:


## Run enrichment tests using all classes in ontology
enr = aset.enrichment_test(subjects=gene_ids, threshold=0.00005, labels=True)


# In[19]:


## Show first 20 results
for r in enr[:20]:
        print("{:8.3g} {} {:40s}".format(r['p'],r['c'],str(r['n'])))


# Given that the initial gene set is for retinitis pigmentosa genes, it's not surprising that enriched phenotype
# terms are related to retinal degeneration

# ## Viewing Results
# 
# We can use different visualization options to see the enriched terms. First we will show a simple tree view
# 

# In[14]:


## Get all enriched class Ids
terms = [r['c'] for r in enr]


# In[15]:


## Create a minimal slim of HPO consisting of enriched terms,
## with non-informative intermediate nodes removed
from ontobio.slimmer import get_minimal_subgraph
g = get_minimal_subgraph(ont.get_graph(), terms)


# In[17]:


## Render as ascii-tree
from ontobio.graph_io import GraphRenderer
w = GraphRenderer.create('tree')
w.write(g, query_ids=terms)


# ## visualization
# 
# Now we will show enriched terms in a graph using graphviz

# In[22]:


terms = [r['c'] for r in enr[:30]]
g = get_minimal_subgraph(ont.get_graph(), terms)
w = GraphRenderer.create('png')
w.outfile = "output/enr.png"
w.write(g, query_ids=terms)


# ## Image
# 
# ![title](output/enr.png)

# In[ ]: