#!/usr/bin/env python
# coding: utf-8

# ## Exercises 3 – answers
# 
# 1\. Write a script to lookup the gene called *ESPN* in human and print the stable ID of this gene.

# In[ ]:


import requests, sys, json
from pprint import pprint

def fetch_endpoint(server, request, content_type):

    r = requests.get(server+request, headers={ "Accept" : content_type})

    if not r.ok:
        r.raise_for_status()
        sys.exit()

    if content_type == 'application/json':
        return r.json()
    else:
        return r.text

# Get the gene name from the command line
gene_name = "ESPN"

# define the general URL parameters
server = "http://rest.ensembl.org/"
con = "application/json"
ext_get_lookup = "lookup/symbol/homo_sapiens/" + gene_name + "?"

# submit the query
get_lookup = fetch_endpoint(server, ext_get_lookup, con)

print (get_lookup['id'])


# 2\. Get all variants that are associated with the phenotype 'Coffee consumption'. For each variant print:
# 
#    a. the p-value for the association
#    
#    b. the PMID for the publication which describes the association between that variant and ‘Coffee consumption’
#    
#    c. the risk allele and the associated gene.

# In[ ]:


import requests, sys, json
from pprint import pprint

def fetch_endpoint(server, request, content_type):

    r = requests.get(server+request, headers={ "Accept" : content_type})

    if not r.ok:
        r.raise_for_status()
        sys.exit()

    if content_type == 'application/json':
        return r.json()
    else:
        return r.text

print ("Variant\tp-value\tPub-med ID\tRisk allele\tGene")

# define the general URL parameters
server = "http://rest.ensembl.org/"
ext_phen = "/phenotype/term/homo_sapiens/coffee consumption?"
con = "application/json"

# submit the query
get_phen = fetch_endpoint(server, ext_phen, con)

for variant in get_phen:
    id = variant['Variation']
    pv = str(variant['attributes'].get('p_value'))
    pmid = variant['attributes']['external_reference']
    risk = str(variant['attributes'].get('risk_allele'))
    gene = variant['attributes']['associated_gene']
 
    print (id + "\t" + pv + "\t" + pmid + "\t" + risk + "\t" + gene)


# 3\. Get the mouse homologue of the human BRCA2 and print the ID and the aligned sequence of both.
# 
# Note that the JSON for the endpoint you need is several layers deep, containing nested lists (appear as square brackets [ ] in the JSON) and key value sets (appear as curly brackets { } in the JSON). Pretty print (pprint) comes in very useful here for the intermediate stage when you're trying to work out the json.

# In[ ]:


import requests, sys, json
from pprint import pprint

def fetch_endpoint(server, request, content_type):

    r = requests.get(server+request, headers={ "Accept" : content_type})

    if not r.ok:
        r.raise_for_status()
        sys.exit()

    if content_type == 'application/json':
        return r.json()
    else:
        return r.text

gene = "BRCA2"

# define the general URL parameters
server = "http://rest.ensembl.org/"
ext_hom = "homology/symbol/human/" + gene + "?target_species=mouse"
con = "application/json"

get_hom = fetch_endpoint(server, ext_hom, con)

for datum in get_hom['data']:
    for homology in datum['homologies']:
        source_id = homology['source']['id']
        source_species = homology['source']['species']
        source_seq = homology['source']['align_seq']
        target_id = homology['target']['id']
        target_seq = homology['target']['align_seq']
        target_species = homology['target']['species']
        
        print (">", source_id + " " + source_species + "\n" + source_seq + "\n>", target_id + " " + target_species + "\n" + target_seq)


# ![3.3_Python.png](http://ftp.ebi.ac.uk/pub/databases/ensembl/training/images_for_REST/3.3_Python.png)

# [Next page: Other content types](4_Other_content_types.ipynb)