#!/usr/bin/env python # coding: utf-8 # ## Exercises 3 – answers # # 1\. Write a script to lookup the gene called *ESPN* in human and print the stable ID of this gene. # In[ ]: import requests, sys, json from pprint import pprint def fetch_endpoint(server, request, content_type): r = requests.get(server+request, headers={ "Accept" : content_type}) if not r.ok: r.raise_for_status() sys.exit() if content_type == 'application/json': return r.json() else: return r.text # Get the gene name from the command line gene_name = "ESPN" # define the general URL parameters server = "http://rest.ensembl.org/" con = "application/json" ext_get_lookup = "lookup/symbol/homo_sapiens/" + gene_name + "?" # submit the query get_lookup = fetch_endpoint(server, ext_get_lookup, con) print (get_lookup['id']) # 2\. Get all variants that are associated with the phenotype 'Coffee consumption'. For each variant print: # # a. the p-value for the association # # b. the PMID for the publication which describes the association between that variant and ‘Coffee consumption’ # # c. the risk allele and the associated gene. # In[ ]: import requests, sys, json from pprint import pprint def fetch_endpoint(server, request, content_type): r = requests.get(server+request, headers={ "Accept" : content_type}) if not r.ok: r.raise_for_status() sys.exit() if content_type == 'application/json': return r.json() else: return r.text print ("Variant\tp-value\tPub-med ID\tRisk allele\tGene") # define the general URL parameters server = "http://rest.ensembl.org/" ext_phen = "/phenotype/term/homo_sapiens/coffee consumption?" con = "application/json" # submit the query get_phen = fetch_endpoint(server, ext_phen, con) for variant in get_phen: id = variant['Variation'] pv = str(variant['attributes'].get('p_value')) pmid = variant['attributes']['external_reference'] risk = str(variant['attributes'].get('risk_allele')) gene = variant['attributes']['associated_gene'] print (id + "\t" + pv + "\t" + pmid + "\t" + risk + "\t" + gene) # 3\. Get the mouse homologue of the human BRCA2 and print the ID and the aligned sequence of both. # # Note that the JSON for the endpoint you need is several layers deep, containing nested lists (appear as square brackets [ ] in the JSON) and key value sets (appear as curly brackets { } in the JSON). Pretty print (pprint) comes in very useful here for the intermediate stage when you're trying to work out the json. # In[ ]: import requests, sys, json from pprint import pprint def fetch_endpoint(server, request, content_type): r = requests.get(server+request, headers={ "Accept" : content_type}) if not r.ok: r.raise_for_status() sys.exit() if content_type == 'application/json': return r.json() else: return r.text gene = "BRCA2" # define the general URL parameters server = "http://rest.ensembl.org/" ext_hom = "homology/symbol/human/" + gene + "?target_species=mouse" con = "application/json" get_hom = fetch_endpoint(server, ext_hom, con) for datum in get_hom['data']: for homology in datum['homologies']: source_id = homology['source']['id'] source_species = homology['source']['species'] source_seq = homology['source']['align_seq'] target_id = homology['target']['id'] target_seq = homology['target']['align_seq'] target_species = homology['target']['species'] print (">", source_id + " " + source_species + "\n" + source_seq + "\n>", target_id + " " + target_species + "\n" + target_seq) # ![3.3_Python.png](http://ftp.ebi.ac.uk/pub/databases/ensembl/training/images_for_REST/3.3_Python.png) # [Next page: Other content types](4_Other_content_types.ipynb)