#!/usr/bin/env python
# coding: utf-8

# ### Retrieve compound data from Metabolomics Workbench using REST API

# Import Python modules...

# In[ ]:


from __future__ import print_function

import os
import sys
import time
import re

import requests

from IPython import __version__ as ipyVersion

print("Python: %s.%s.%s" % sys.version_info[:3])
print("IPython: %s" % ipyVersion)

print()
print(time.asctime())


# **The URL PATH**
# 
# The MW REST URL consists of three main parts, separated by forward slashes, after the common prefix specifying the invariant base URL (https://www.metabolomicsworkbench.org/rest/):
# 
# https://www.metabolomicsworkbench.org/rest/context/input_specification/output_specification
#     
# Part 1: The context determines the type of data to be accessed from the Metabolomics Workbench, such as metadata or results related to the submitted studies, data from metabolites, genes/proteins and analytical chemistry databases as well as other services related to mass spectrometry and metabolite identification:
# 
# context = study | compound | refmet | gene | protein | moverz | exactmass
# 
# Part 2: The input specification consists of two required parameters describing the REST request:
# 
# input_specification = input_item/input_value
# 
# Part 3: The output specification consists of two parameters describing the output generated by the REST request:
# 
# output_specification = output_item/(output_format)
# 
# The first parameter is required in most cases. The second parameter is optional. The input and output specifications are context sensitive. The context determines the values allowed for the remaining parameters in the input and output specifications as detailed in the sections below.
# 

# Setup MW REST base URL...

# In[ ]:


MWBaseURL = "https://www.metabolomicsworkbench.org/rest"


# **The “compound” context**
# 
# The “compound” context provides services for the Metabolomics Workbench Metabolite Database which contains structures and annotations of biologically relevant metabolites. The database contains over 64,000 entries, collected from public repositories such as LIPID MAPS, ChEBI, HMDB, BMRB, PubChem, and KEGG, as well as from literature sources. This context provides access to many structural features including molfile, SMILES, InChIKey, exact mass, formula common and systematic names, chemical classification and cross-references to other databases.
# 
# context = compound
# 
# input_item = regno | formula | inchi_key | lm_id | pubchem_cid | hmdb_id | kegg_id | chebi_id | metacyc_id | abbrev
# 
# input_value = input_item_value
# 
# output_item = all | regno | formula | exactmass | inchi_key | name | sys_name | smiles | lm_id | pubchem_cid | hmdb_id | kegg_id | chebi_id | metacyc_id | classification | molfile | png | regno,formula,exactmass,...
# 
# output_format = txt | json (Default: json)
# 
# Only one input item and input value may be specified in the REST URL.
# 
# The “all” output item is automatically expanded to include the following items: regno, formula, exactmass, inchi_key, name, sys_name, lm_id, pubchem_cid, hmdb_id, kegg_id, chebi_id, metacyc_id, smiles. These output items should not be individually specified with the “all” output item.
# 
# The “classification” output item is automatically expanded to include the following items: regno, name, sys_name, cf_superclass, cf_class, cf_subclass, cf_direct_parent, cf_alternative_parents, lm_category, lm_main_class, lm_sub_class, lm_class_level4. These output items should not be individually specified with the “classification” output item. The “cf” and “lm” correspond to ClassyFire and LIPID MAPS classification systems respectively. 
# 

# 
# **Retrieve and process compound data in JSON format**
# 

# Setup REST URL to retrieve name for a compound using MW regno...

# In[ ]:


MWDataURL = MWBaseURL + "/compound/regno/34361/name"


# Execute REST request using "request" module...

# In[ ]:


print("Initiating request: %s" % MWDataURL)
    
Response = requests.get(MWDataURL)


# Check "request" status...

# In[ ]:


print("\nStatus Code: %d" % (Response.status_code))

if Response.status_code != 200:
    print("Request failed: status_code: %d" % Response.status_code)


# Process JSON results...

# In[ ]:


print("\nAvailable name data for a compound using regno:\n")

Results = Response.json()

for ResultType in Results:
    ResultValue = Results[ResultType]
    print("%s: %s" % (ResultType, ResultValue))


# Retrieve and process all data for a compound using regno...

# In[ ]:


MWDataURL = MWBaseURL + "/compound/regno/34361/all"

print("Initiating request: %s" % MWDataURL)
    
Response = requests.get(MWDataURL)

print("\nStatus Code: %d" % (Response.status_code))
if Response.status_code != 200:
    print("Request failed: status_code: %d" % Response.status_code)

print("\nAll available data for a compound using regno:\n")

Results = Response.json()

for ResultType in Results:
    ResultValue = Results[ResultType]
    print("%s: %s" % (ResultType, ResultValue))


# Retrieve and process classification data for a compound using PUBCHEM_CID...

# In[ ]:


MWDataURL = MWBaseURL + "/compound/pubchem_cid/5997/classification"

print("Initiating request: %s" % MWDataURL)
    
Response = requests.get(MWDataURL)

print("\nStatus Code: %d" % (Response.status_code))
if Response.status_code != 200:
    print("Request failed: status_code: %d" % Response.status_code)

print("\nAll available classification data for a compound using PUBCHEM_CID:\n")

Results = Response.json()

for ResultType in Results:
    ResultValue = Results[ResultType]
    print("%s: %s" % (ResultType, ResultValue))


# **Retrieve and process compound data in text format**

# Retrieve and process all data for a compound using regno...

# In[ ]:


MWDataURL = MWBaseURL + "/compound/regno/34361/all/txt"

print("Initiating request: %s" % MWDataURL)
    
Response = requests.get(MWDataURL)

print("\nStatus Code: %d" % (Response.status_code))
if Response.status_code != 200:
    print("Request failed: status_code: %d" % Response.status_code)

print("\nAll available data for a compound using regno:\n")

Results = Response.text
for Result in Results.split("\n"):
    Words = Result.split("\t")
    if len(Words) != 2:
        continue
    
    ResultType, ResultValue = Result.split("\t")
    print("%s: %s" % (ResultType, ResultValue))