# First of all we have to import some useful libraries: # json for converting python dicts to json objects back and forth import json # lxml.etree for pretty-printing XML documents from lxml import etree # Ipython helper for displaying images from IPython.display import Image, display from IPython.display import SVG from IPython.display import Javascript # By default our Python client will use public instance of web services, requiring internet connection. # We want to use a local instance provided with myChEMBL so we are doing some additional configuration. # You should skip this when using the client outside of myChEMBL from chembl_webresource_client.settings import Settings Settings.Instance().UTILS_SPORE_URL = 'http://127.0.0.1:80/utils/spore' # Finally, importing utils (aka Beaker) part of ChEMBL webservices, and we are ready to go! import chembl_webresource_client.utils as utils_mod utils = utils_mod.utils print dir(utils) # We will start with converting SMILES to molfile # Lets take SMILES of aspirin: smiles = 'O=C(Oc1ccccc1C(=O)O)C' # And this is how we do the conversion, simple! ctab = utils.smiles2ctab(smiles) # And here we can see the result: print ctab # OK, now having our molfile (ctab), let's convert is back to SMILES # By default, computed SMILES will be canonical: smi_file = utils.ctab2smiles(ctab) print smi_file # The result is a *.smi file with the header so in order to get only SMILES, we have to extract the relevant part: canonical_smiles = smi_file.split()[2] print canonical_smiles # Having our aspirin molfile (ctab), we can compute InCHI: inchi = utils.ctab2inchi(ctab) print inchi # And, of course, an InCHIKey from InCHI: inchiKey = utils.inchi2inchiKey(inchi) print inchiKey # It's also possible to convert InCHI back to molfile (ctab2): ctab2 = utils.inchi2ctab(inchi) # And a molfile to smiles: smiles2 = utils.ctab2smiles(ctab2).split()[2] # Let's check if we've go the same canonical SMILES after this round trip: canonical_smiles == smiles2 # Again, let's start from the SMILES for Aspirin smiles = 'O=C(Oc1ccccc1C(=O)O)C' # We will convert it to *.mrv format mrv = json.loads(utils.molExport(structure=smiles, parameters="mrv"))['structure'] # Since *.mrv files are XML-based we can pretty-print it: root = etree.fromstring(mrv).getroottree() print etree.tostring(root, pretty_print=True) # OK, now let's do the opposite. Starting with some mrv (cml) file, let's compute a stereo information: cml = ''' W ''' # According to Marvin 4 JS WS specification, the result has to be json: stereo_info = json.loads(utils.cipStereoInfo(structure=cml)) print stereo_info # First method provided by standardiser is to break bonds to Group I and II metal atoms: # Before using it, we have to convert our input SMILES string to ctab: mol = utils.smiles2ctab("[Na]OC(=O)c1ccccc1") # Now we can apply the function br = utils.breakbonds(mol) # In order to get our result back in SMILES format we have to make a conversion: smiles = utils.ctab2smiles(br).split()[2] # And here is the result: print smiles # We can even use Beaker to render input and output: [display(Image(utils.smiles2image("[Na]OC(=O)c1ccccc1"))), display(Image(utils.smiles2image("[Na+].O=C([O-])c1ccccc1")))] # The second method neutralizes charges by adding/removing protons # Again, we have to convert SMILES to ctab first, then apply the method and convert result back to SMILES: mol = utils.smiles2ctab("C(C(=O)[O-])(Cc1n[n-]nn1)(C[NH3+])(C[N+](=O)[O-])") ne = utils.neutralise(mol) smiles = utils.ctab2smiles(ne).split()[2] # Now we can print the result print smiles # And render input and output [display(Image(utils.smiles2image("C(C(=O)[O-])(Cc1n[n-]nn1)(C[NH3+])(C[N+](=O)[O-])"))), display(Image(utils.smiles2image("NCC(Cc1nn[nH]n1)(C[N+](=O)[O-])C(=O)O")))] # Third method applies many structure-normalisation transformations # Invoking it in standard way mol = utils.smiles2ctab("Oc1nccc2cc[nH]c(=N)c12") ru = utils.rules(mol) smiles = utils.ctab2smiles(ru).split()[2] # Printing the results: print smiles # Rendering input and output: [display(Image(utils.smiles2image("Oc1nccc2cc[nH]c(=N)c12"))), display(Image(utils.smiles2image("Nc1nccc2cc[nH]c(=O)c12")))] # Forth method can be used to discard any salt/solvate components # We alredy know what to do: mol = utils.smiles2ctab("[Na+].OC(=O)Cc1ccc(CN)cc1.OS(=O)(=O)C(F)(F)F") un = utils.unsalt(mol) smiles = utils.ctab2smiles(un).split()[2] # printing results: print smiles # rendering input and output: [display(Image(utils.smiles2image("[Na+].OC(=O)Cc1ccc(CN)cc1.OS(=O)(=O)C(F)(F)F"))), display(Image(utils.smiles2image("NCc1ccc(CC(=O)O)cc1")))] # The last method from the Standardiser package aggregates four previous into one: mol = utils.smiles2ctab("[Na]OC(=O)Cc1ccc(C[NH3+])cc1.c1nnn[n-]1.O") st = utils.standardise(mol) smiles = utils.ctab2smiles(st).split()[2] print smiles [display(Image(utils.smiles2image("[Na]OC(=O)Cc1ccc(C[NH3+])cc1.c1nnn[n-]1.O"))), display(Image(utils.smiles2image("NCc1ccc(CC(=O)O)cc1")))] # We will now calculate a number of chemical descriptors # As prevously we will start with aspirin SMILES: aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C') # First descriptor will e the number of heavy atoms: num_atoms = json.loads(utils.getNumAtoms(aspirin))[0] print "num atoms = %s" % num_atoms # Molecular weight: mol_wt = json.loads(utils.molWt(aspirin))[0] print "mol_wt = %s" % mol_wt # Log_p: log_p = json.loads(utils.logP(aspirin))[0] print "log_p = %s" % log_p # TPSA: tpsa = json.loads(utils.tpsa(aspirin))[0] print "tpsa = %s" % tpsa # Or we can just calculate all those descriptors (and more!) at once: descriptors = json.loads(utils.descriptors(aspirin))[0] print descriptors # As well as descriptor we can compute fingerprints. # The output will be an FPS format. You can use optional "type" argument to choose type of fingerprints. # This can be "morgan", "pair" or "maccs". Default is "morgan". aspirin = utils.smiles2ctab('O=C(Oc1ccccc1C(=O)O)C') fingerprints = utils.sdf2fps(aspirin) print fingerprints # In addition to compute compound images in raster format (png), Beaker supports vector formats as well. # We will first introduce JSON-based format. You can for example use `smiles2json` method to generate json object # describing the visual representation. In order to render it, you can use raphael.js library and it's # `paper.add` method: aspirin = 'O=C(Oc1ccccc1C(=O)O)C' print utils.smiles2json(aspirin) code = """ window.define = undefined; $.getScript('https://cdnjs.cloudflare.com/ajax/libs/raphael/2.1.0/raphael-min.js', function(){ var target = $(':focus').parent('div'); var paper = Raphael(target, 320, 200); paper.add(%s); $(paper.canvas).delay( 2000 ).fadeOut( 400 ); }); """ Javascript(code % utils.smiles2json(aspirin)) # Most popular vector graphics format is XML-based SVG, this is how we can render compound as a SVG image: benzene = 'c1ccccc1' svg = utils.smiles2svg(benzene) # pretty-printing SVG input, just to prove this is a vector graphic: root = etree.fromstring(svg).getroottree() print etree.tostring(root, pretty_print=True) # And finally displaying it: SVG(svg) # And finally our old friends - raster images: aspirin = 'O=C(Oc1ccccc1C(=O)O)C' img = utils.smiles2image(aspirin) Image(img) # This is how to find a maximum common substructure (MCS) of three molecules: smiles = ["O=C(NCc1cc(OC)c(O)cc1)CCCC/C=C/C(C)C", "CC(C)CCCCCC(=O)NCC1=CC(=C(C=C1)O)OC", "c1(C=O)cc(OC)c(O)cc1"] # converting out molecules SMILES to molfiles: mols = [utils.smiles2ctab(smile) for smile in smiles] # joining molfiles to create a SDF file: sdf = ''.join(mols) # and finally computing MCS result = utils.mcs(sdf) # and displaying results: print result # It's very easy to compute a molfile with 3D coordinates: aspirin = 'O=C(Oc1ccccc1C(=O)O)C' mol_3D = utils.smiles23D(aspirin) print mol_3D # Traditionally, let's start with aspirin SMILES: aspirin = 'CC(=O)Oc1ccccc1C(=O)O' # Let's convert it to image: im = utils.smiles2image(aspirin) # And use OSRA to convert image to molfile: mol = utils.image2ctab(im) # We can now convert molfile to SMILES: smiles = utils.ctab2smiles(mol).split()[2] # And check if we get the same SMILES string: smiles == aspirin # Last piece of Beaker functionality is kekulisation: # This time we will start with molfile: aromatic=''' Mrv0541 08191414212D 6 6 0 0 0 0 999 V2000 -1.7679 1.5616 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -2.4823 1.1491 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -2.4823 0.3241 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.7679 -0.0884 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.0534 0.3241 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.0534 1.1491 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 4 0 0 0 0 1 6 4 0 0 0 0 2 3 4 0 0 0 0 3 4 4 0 0 0 0 4 5 4 0 0 0 0 5 6 4 0 0 0 0 M END ''' # Kekulising is trivial: kek = utils.kekulize(aromatic) # Rendering the result Image(utils.ctab2image(kek))