1. Write a script to lookup the gene called ESPN in human and print the stable ID of this gene.
library(httr)
library(jsonlite)
fetch_endpoint <- function(server, request, content_type){
r <- GET(paste(server, request, sep = ""), accept(content_type))
stop_for_status(r)
if (content_type == 'application/json'){
return (fromJSON(content(r, "text", encoding = "UTF-8")))
} else {
return (content(r, "text", encoding = "UTF-8"))
}
}
# define the gene name
gene_name <- "ESPN"
# define the general URL parameters
server <- "http://rest.ensembl.org/"
con <- "application/json"
# define REST query to get the gene ID from the gene name
ext_get_lookup <- paste("lookup/symbol/homo_sapiens/", gene_name, "?", sep ="")
get_lookup <- fetch_endpoint(server, ext_get_lookup, con)
stable_id <- get_lookup$id
cat(stable_id)
2. Get all variants that are associated with the phenotype 'Coffee consumption'. For each variant print:
a. the p-value for the association
b. the PMID for the publication which describes the association between that variant and ‘Coffee consumption’
c. the risk allele and the associated gene.
Note that R cannot cope with spaces in your URL extension, so you will need to write the phenotype as 'coffee%20consumption'.
library(httr)
library(jsonlite)
fetch_endpoint <- function(server, request, content_type){
r <- GET(paste(server, request, sep = ""), accept(content_type))
stop_for_status(r)
if (content_type == 'application/json'){
return (fromJSON(content(r, "text", encoding = "UTF-8")))
} else {
return (content(r, "text", encoding = "UTF-8"))
}
}
# define the URL parameters
server <- "http://rest.ensembl.org/"
con <- "application/json"
ext_phen <- "/phenotype/term/homo_sapiens/coffee%20consumption?"
get_phen <- fetch_endpoint(server, ext_phen, con)
flat_get_phen <- flatten(get_phen, recursive = TRUE)
flat_get_phen[, c("Variation", "attributes.p_value", "attributes.external_reference", "attributes.risk_allele", "attributes.associated_gene")]
3. Get the mouse homologue of the human BRCA2 and print the ID and the aligned sequence of both.
Note that the JSON for the endpoint you need is several layers deep, containing nested lists (appear as square brackets [ ] in the JSON) and key value sets (appear as curly brackets { } in the JSON).
library(httr)
library(jsonlite)
fetch_endpoint <- function(server, request, content_type){
r <- GET(paste(server, request, sep = ""), accept(content_type))
stop_for_status(r)
if (content_type == 'application/json'){
return (fromJSON(content(r, "text", encoding = "UTF-8")))
} else {
return (content(r, "text", encoding = "UTF-8"))
}
}
gene <- "BRCA2"
# define the URL parameters
server <- "http://rest.ensembl.org/"
con <- "application/json"
ext_hom <- paste("homology/symbol/human/", gene, "?target_species=mouse", sep = "")
get_hom <- fetch_endpoint(server, ext_hom, con)
homologies <- get_hom$data$homologies
for (homology in homologies) {
homology
source_id <- homology$source$id
source_seq <- homology$source$align_seq
source_species <- homology$source$species
target_id <- homology$target$id
target_seq <- homology$target$align_seq
target_species <- homology$target$species
cat(">", source_id, source_species, "\n", source_seq, "\n>", target_id, target_species, "\n", target_seq)
}