Notebook

Exercises 3 – answers¶

1. Write a script to lookup the gene called ESPN in human and print the stable ID of this gene.

In [ ]:

import requests, sys, json
from pprint import pprint

def fetch_endpoint(server, request, content_type):

    r = requests.get(server+request, headers={ "Accept" : content_type})

    if not r.ok:
        r.raise_for_status()
        sys.exit()

    if content_type == 'application/json':
        return r.json()
    else:
        return r.text

# Get the gene name from the command line
gene_name = "ESPN"

# define the general URL parameters
server = "http://rest.ensembl.org/"
con = "application/json"
ext_get_lookup = "lookup/symbol/homo_sapiens/" + gene_name + "?"

# submit the query
get_lookup = fetch_endpoint(server, ext_get_lookup, con)

print (get_lookup['id'])

2. Get all variants that are associated with the phenotype 'Coffee consumption'. For each variant print:

a. the p-value for the association

b. the PMID for the publication which describes the association between that variant and ‘Coffee consumption’

c. the risk allele and the associated gene.

In [ ]:

import requests, sys, json
from pprint import pprint

def fetch_endpoint(server, request, content_type):

    r = requests.get(server+request, headers={ "Accept" : content_type})

    if not r.ok:
        r.raise_for_status()
        sys.exit()

    if content_type == 'application/json':
        return r.json()
    else:
        return r.text

print ("Variant\tp-value\tPub-med ID\tRisk allele\tGene")

# define the general URL parameters
server = "http://rest.ensembl.org/"
ext_phen = "/phenotype/term/homo_sapiens/coffee consumption?"
con = "application/json"

# submit the query
get_phen = fetch_endpoint(server, ext_phen, con)

for variant in get_phen:
    id = variant['Variation']
    pv = str(variant['attributes'].get('p_value'))
    pmid = variant['attributes']['external_reference']
    risk = str(variant['attributes'].get('risk_allele'))
    gene = variant['attributes']['associated_gene']
 
    print (id + "\t" + pv + "\t" + pmid + "\t" + risk + "\t" + gene)

3. Get the mouse homologue of the human BRCA2 and print the ID and the aligned sequence of both.

Note that the JSON for the endpoint you need is several layers deep, containing nested lists (appear as square brackets [ ] in the JSON) and key value sets (appear as curly brackets { } in the JSON). Pretty print (pprint) comes in very useful here for the intermediate stage when you're trying to work out the json.

In [ ]:

import requests, sys, json
from pprint import pprint

def fetch_endpoint(server, request, content_type):

    r = requests.get(server+request, headers={ "Accept" : content_type})

    if not r.ok:
        r.raise_for_status()
        sys.exit()

    if content_type == 'application/json':
        return r.json()
    else:
        return r.text

gene = "BRCA2"

# define the general URL parameters
server = "http://rest.ensembl.org/"
ext_hom = "homology/symbol/human/" + gene + "?target_species=mouse"
con = "application/json"

get_hom = fetch_endpoint(server, ext_hom, con)

for datum in get_hom['data']:
    for homology in datum['homologies']:
        source_id = homology['source']['id']
        source_species = homology['source']['species']
        source_seq = homology['source']['align_seq']
        target_id = homology['target']['id']
        target_seq = homology['target']['align_seq']
        target_species = homology['target']['species']
        
        print (">", source_id + " " + source_species + "\n" + source_seq + "\n>", target_id + " " + target_species + "\n" + target_seq)

3.3_Python.png

Next page: Other content types