Denis Torre
September 20, 2017
This notebook explains how to extract data from the Datasets2Tools API using Python. The notebook can be downloaded at the following GitHub page: https://github.com/denis-torre/datasets2tools/tree/master/api.
The Datasets2Tools API can be used to search three types of objects:
More detailed explanation on searching these objects is available below.
Here is an example of search results for the analyses endpoint.
# Import modules
import json
import requests
import pandas as pd
# Get API URL
url = 'http://amp.pharm.mssm.edu/datasets2tools/api/search'
# Search 5 analyses
data = {
'object_type': 'canned_analysis',
'page_size': 5
}
# Get response
response = requests.post(url, params=data)
# Read response
results = json.loads(response.text)
# Convert to dataframe
results_dataframe = pd.DataFrame(results)
results_dataframe
For convenience, we define a function to search the API and return a pandas DataFrame.
# Import modules
import json
import requests
import pandas as pd
def search_datasets2tools(search_options):
# Get API URL
url = 'http://amp.pharm.mssm.edu/datasets2tools/api/search'
# Get response
response = requests.post(url, params=search_options)
try:
# Read response
results_dict = json.loads(response.text)
# Convert to dataframe
results_dataframe = pd.DataFrame(results_dict)
# Set index
results_dataframe.set_index(search_options['object_type']+'_accession', inplace=True)
return results_dataframe
except:
return 'Sorry, there has been an error.'
results = search_datasets2tools({'object_type': 'canned_analysis',
'q': 'prostate cancer'})
results.head()
Search all canned analyses associated to GEO dataset GSE775.
results = search_datasets2tools({'object_type': 'canned_analysis',
'dataset_accession': 'GSE775'})
results.head()
Search all canned analyses generated by Enrichr.
results = search_datasets2tools({'object_type': 'canned_analysis',
'tool_name': 'Enrichr'})
results.head()
Search all canned analyses with the colon cancer disease name.
results = search_datasets2tools({'object_type': 'canned_analysis',
'disease_name': 'colon cancer'})
results.head()
Search all analyses generated by Enrichr on dataset GSE31106, where the geneset is upregulated.
results = search_datasets2tools({'object_type': 'canned_analysis',
'tool_name': 'Enrichr',
'dataset_accession': 'GSE31106',
'geneset': 'upregulated'})
results.head()
results = search_datasets2tools({'object_type': 'dataset',
'dataset_accession': 'GSE775'})
results.head()
Search all datasets which contain the keyword asthma.
results = search_datasets2tools({'object_type': 'dataset',
'q': 'asthma'})
results.head()
Search all datasets which have been analyzed by Enrichr.
results = search_datasets2tools({'object_type': 'dataset',
'tool_name': 'Enrichr'})
results.head()
Search all datasets which have been used to generate canned analysis DCA00000002.
results = search_datasets2tools({'object_type': 'dataset',
'canned_analysis_accession': 'DCA00000002'})
results.head()
results = search_datasets2tools({'object_type': 'tool',
'tool_name': 'ARCHS4'})
results.head()
Search all tools which contain the keyword enrichment.
results = search_datasets2tools({'object_type': 'tool',
'q': 'enrichment'})
results.head()
Search all tools which have analyzed GEO dataset GSE775.
results = search_datasets2tools({'object_type': 'tool',
'dataset_accession': 'GSE775'})
results.head()
Search all tools which have been used to generate canned analysis DCA00000002.
results = search_datasets2tools({'object_type': 'tool',
'canned_analysis_accession': 'DCA00000002'})
results.head()