#!/usr/bin/env python # coding: utf-8 # ## Perform enrichment test using phenotype ontology # # Given a file of genes: # # ``` # $ head data/rp-genes.tsv # NCBIGene:6295 SAG # NCBIGene:1258 CNGB1 # NCBIGene:3614 IMPDH1 # NCBIGene:26121 PRPF31 # ``` # # The example file here is derived from a Monarch query for all retinitis pigmentosa genes. # # We want to test each class in HPO to see if it is enriched for genes in this set. # # First we need to parse the gene Ids in the file # In[18]: ## Parse ids from file file = open("data/rp-genes.tsv", "r") gene_ids = [row.split("\t")[0] for row in file] ## show first 10 IDs: gene_ids[:10] # In[8]: ## Create an ontology factory in order to fetch HPO from ontobio.ontol_factory import OntologyFactory ofactory = OntologyFactory() ont = ofactory.create("hp") ## Load HP. Note the first time this runs Jupyter will show '*' - be patient # In[9]: ## Create an association factory to get gene-phenotype associations from ontobio.assoc_factory import AssociationSetFactory afactory = AssociationSetFactory() ## Load Associations from Monarch. Note the first time this runs Jupyter will show '*' - be patient aset = afactory.create(ontology=ont, subject_category='gene', object_category='phenotype', taxon='NCBITaxon:9606') # In[12]: ## Run enrichment tests using all classes in ontology enr = aset.enrichment_test(subjects=gene_ids, threshold=0.00005, labels=True) # In[19]: ## Show first 20 results for r in enr[:20]: print("{:8.3g} {} {:40s}".format(r['p'],r['c'],str(r['n']))) # Given that the initial gene set is for retinitis pigmentosa genes, it's not surprising that enriched phenotype # terms are related to retinal degeneration # ## Viewing Results # # We can use different visualization options to see the enriched terms. First we will show a simple tree view # # In[14]: ## Get all enriched class Ids terms = [r['c'] for r in enr] # In[15]: ## Create a minimal slim of HPO consisting of enriched terms, ## with non-informative intermediate nodes removed from ontobio.slimmer import get_minimal_subgraph g = get_minimal_subgraph(ont.get_graph(), terms) # In[17]: ## Render as ascii-tree from ontobio.graph_io import GraphRenderer w = GraphRenderer.create('tree') w.write(g, query_ids=terms) # ## visualization # # Now we will show enriched terms in a graph using graphviz # In[22]: terms = [r['c'] for r in enr[:30]] g = get_minimal_subgraph(ont.get_graph(), terms) w = GraphRenderer.create('png') w.outfile = "output/enr.png" w.write(g, query_ids=terms) # ## Image # # ![title](output/enr.png) # In[ ]: