#!/usr/bin/env python # coding: utf-8 # **An Introduction to the RDKit for Cheminformatics** # Import the necessary Python libraries # In[1]: from rdkit import Chem #RDKit Chemistry from rdkit.Chem.Draw import IPythonConsole #RDKit drawing from rdkit.Chem import Draw #RDKit drawing # A few settings to improve the quality of structures from rdkit.Chem import rdDepictor IPythonConsole.ipython_useSVG = True rdDepictor.SetPreferCoordGen(True) from rdkit.Chem import PandasTools #Add the ability to add a molecule to a dataframegrid import mols2grid #The mols2grid library provides a convenient way of displaying molecules in a grid # Create a molecule (benzene) from a SMILES string # In[2]: mol = Chem.MolFromSmiles("c1ccccc1") # In[3]: mol # Get SMILES fro Gleevec from ChEMBL # # - [From ChEMBL](https://www.ebi.ac.uk/chembl/compound/inspect/CHEMBL941) # In[4]: glvc = Chem.MolFromSmiles("CN1CCN(Cc2ccc(cc2)C(=O)Nc3ccc(C)c(Nc4nccc(n4)c5cccnc5)c3)CC1") # In[5]: glvc # Read a set of molecules from an SD file # In[6]: mols = [x for x in Chem.SDMolSupplier("example_compounds.sdf")] # In[7]: mols # Draw these molecules as a grid # In[8]: Draw.MolsToGridImage(mols,molsPerRow=4,useSVG=True) # We can use the mols2grid library to display molecules in a grid # In[9]: mols2grid.display(mols) # In[10]: mols2grid.selection # We can also read an SD file into a Pandas dataframe. # In[11]: df = PandasTools.LoadSDF("example_compounds.sdf") # In[12]: df.head() # Let's add columns with molecular weight and LogP to the dataframe. # In[13]: from rdkit.Chem.Descriptors import MolWt from rdkit.Chem.Crippen import MolLogP df['MW'] = [MolWt(x) for x in df.ROMol] df['LogP'] = [MolLogP(x) for x in df.ROMol] # In[14]: df.head() # We can use a boxplot to examine the distribution of molecular weight within the dataframe. # In[15]: import seaborn as sns # In[16]: ax = sns.boxplot(x=df.MW) # In[ ]: