This feature is available since version 2.0.0
NOTE: Since version 2.1.7 it is not needed to specify protected_ids
to avoid failures of MolDrug. However, on this tutorial we will let the workaround for user with older versions. May be that the specified CReM parameters completely remove any similarity between a solution and the reference structure for constraint docking; only in this specific scenarios you should protect some atoms in order to preserve some MCS.
import tempfile, os, requests, gzip, shutil, yaml
from multiprocessing import cpu_count
from moldrug.data import ligands, boxes, receptor_pdbqt, receptor_pdb, constraintref
from moldrug import utils
tmp_path = tempfile.TemporaryDirectory()
import numpy as np
from rdkit import Chem
def atom_ids_list(smiles:str) -> list[int]:
"""Return a list of atom IDs for the molecule
Parameters
----------
smiles : str
The SMILES string of the molecule
Returns
-------
list[int]
List of atoms IDs
"""
return list(range(Chem.MolFromSmiles(smiles).GetNumAtoms()))
# Setting the working directory (you could change it but it MUST be an absolute path)
wd = tmp_path.name
# os.makedirs('wd_tutorial', exist_ok=True)
# wd = os.path.abspath('wd_tutorial')
# Getting the data
lig = ligands.r_x0161
box = boxes.r_x0161
with open(os.path.join(wd, 'x0161.pdbqt'), 'w') as f:
f.write(receptor_pdbqt.r_x0161)
with open(os.path.join(wd, 'x0161.pdb'), 'w') as f:
f.write(receptor_pdb.r_x0161)
with open(os.path.join(wd, 'ref.sdf'), 'w') as f:
f.write(constraintref.r_x0161)
# Getting the CReM data base
url = "http://www.qsar4u.com/files/cremdb/replacements02_sc2.db.gz"
r = requests.get(url, allow_redirects=True)
crem_dbgz_path = os.path.join(wd,'crem.db.gz')
crem_db_path = os.path.join(wd,'crem.db')
open(crem_dbgz_path, 'wb').write(r.content)
with gzip.open(crem_dbgz_path, 'rb') as f_in:
with open(crem_db_path, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
print(lig)
print(atom_ids_list(lig))
print(box)
print(wd, os.listdir(wd))
COC(=O)C=1C=CC(=CC1)S(=O)(=O)N [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] {'A': {'boxcenter': [12.11, 1.84, 23.56], 'boxsize': [22.5, 22.5, 22.5]}} /tmp/tmp7152x8eg ['x0161.pdb', 'ref.sdf', 'crem.db.gz', 'x0161.pdbqt', 'crem.db']
MolDrug's version 2.0.4 tried to fix some issues with constraint docking (see release description). If follow jobs are needed and mutate_crem_kwargs
allows changes in heavy atoms; it MUST be specified the keyword protected_ids
for mutate_crem_kwargs
. This is needed because during the generation of constraint conformers constraint_ref
is used as fix core. Therefore if a generated molecule does not have this core, an error happens. On version 2.0.0 the core was guessed based on MCS but some error happened (see this RDKit bug). You have then two possibles work around:
mutate_crem_kwargs
that only allow grow operations (min_size = max_size = 0
). In this way the core will be preserved.protected_ids
inside mutate_crem_kwargs
. Those IDs are the atom indexes of seed_mol
that correspond to the atoms of constraint_ref
.On this tutorial we will use the second strategy. In this case constraint_ref
is the same as seed_mol
but with some specific conformation state. Therefore we could use the the result of atom_ids_list
function (specified above) to get the protected_ids list of atoms.
config ={
"01_grow": {
"type": "GA",
"njobs": 6,
"seed_mol": lig,
"AddHs": True,
"costfunc": "Cost",
"costfunc_kwargs": {
"vina_executable": "vina",
"receptor_pdbqt_path": os.path.join(wd, 'x0161.pdbqt'),
"boxcenter": box['A']['boxcenter'],
"boxsize": box['A']['boxsize'],
"exhaustiveness": 4,
"ncores": int(cpu_count() / 6),
"num_modes": 1,
"constraint": True,
"constraint_type": "score_only", # local_only
"constraint_ref": os.path.join(wd, 'ref.sdf'),
"constraint_receptor_pdb_path": os.path.join(wd, 'x0161.pdb'),
"constraint_num_conf": 100,
"constraint_minimum_conf_rms": 0.01
},
"crem_db_path": crem_db_path,
"maxiter": 5,
"popsize": 25,
"beta": 0.001,
"pc": 1,
"get_similar": False,
"mutate_crem_kwargs": {
"radius": 3,
"min_size": 0,
"max_size": 0,
"min_inc": -5,
"max_inc": 6,
"protected_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
"ncores": 12
},
"save_pop_every_gen": 10,
"deffnm": "01_grow"
},
"02_allow_grow": {
"mutate_crem_kwargs": {
"radius": 3,
"min_size": 0,
"max_size": 2,
"min_inc": -5,
"max_inc": 3,
"protected_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
"ncores": 12
},
"maxiter": 5,
"deffnm": "02_allow_grow"
},
"03_pure_mutate": {
"mutate_crem_kwargs": {
"radius": 3,
"min_size": 1,
"max_size": 8,
"min_inc": -5,
"max_inc": 3,
"protected_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
"ncores": 12
},
"maxiter": 2,
"deffnm": "03_pure_mutate"
},
"04_local": {
"mutate_crem_kwargs": {
"radius": 3,
"min_size": 0,
"max_size": 1,
"min_inc": -1,
"max_inc": 1,
"protected_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
"ncores": 12
},
"maxiter": 2,
"deffnm": "04_local"
}
}
# Save the config as a yaml file
with open(os.path.join(wd, 'config.yml'), 'w') as f:
yaml.dump(config, f)
os.listdir(wd)
['x0161.pdb', 'ref.sdf', 'crem.db.gz', 'x0161.pdbqt', 'config.yml', 'crem.db']
cwd = os.getcwd()
os.chdir(wd)
! moldrug config.yml
os.chdir(cwd)
os.listdir(wd)
os.chdir(cwd)
You are using moldrug: 2.0.5. The main job is being executed. Creating the first population with 25 members: 100%|███████████████████████████████████████████| 25/25 [00:16<00:00, 1.48it/s] Initial Population: Best individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) File 01_grow_pop.sdf was createad! Evaluating generation 1 / 5: 100%|███████████████████████████████████████████| 25/25 [00:28<00:00, 1.13s/it] Generation 1: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 2 / 5: 100%|███████████████████████████████████████████| 25/25 [00:34<00:00, 1.37s/it] Generation 2: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 3 / 5: 100%|███████████████████████████████████████████| 25/25 [00:28<00:00, 1.15s/it] Generation 3: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 4 / 5: 100%|███████████████████████████████████████████| 25/25 [00:39<00:00, 1.59s/it] Generation 4: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 5 / 5: 100%|███████████████████████████████████████████| 25/25 [00:32<00:00, 1.32s/it] File 01_grow_pop.sdf was createad! Generation 5: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 5 generations with a population of 25 individuals. A total number of 150 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (5 generations): 218.39 (s). Finished at Sun Aug 28 20:42:35 2022. File 01_grow_pop.sdf was createad! The main job finished!. The follow job 02_allow_grow started. File 02_allow_grow_pop.sdf was createad! Evaluating generation 6 / 10: 100%|███████████████████████████████████████████| 25/25 [00:24<00:00, 1.02it/s] Generation 6: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 7 / 10: 100%|███████████████████████████████████████████| 25/25 [00:22<00:00, 1.12it/s] Generation 7: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 8 / 10: 100%|███████████████████████████████████████████| 22/22 [00:21<00:00, 1.04it/s] Generation 8: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 9 / 10: 100%|███████████████████████████████████████████| 25/25 [00:22<00:00, 1.09it/s] Generation 9: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 10 / 10: 100%|███████████████████████████████████████████| 23/23 [00:22<00:00, 1.04it/s] File 02_allow_grow_pop.sdf was createad! Generation 10: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 10 generations with a population of 25 individuals. A total number of 269 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (10 generations): 141.84 (s). Finished at Sun Aug 28 20:44:57 2022. File 02_allow_grow_pop.sdf was createad! The job 02_allow_grow finished!. The follow job 03_pure_mutate started. File 03_pure_mutate_pop.sdf was createad! Note: The mutation on Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) did not work, it will be returned the same individual Evaluating generation 11 / 12: 70%|██████████████████████████████ | 14/20 [00:12<00:04, 1.48it/s]Command vina --receptor /tmp/tmp7152x8eg/x0161.pdbqt --center_x 12.11 --center_y 1.84 --center_z 23.56 --size_x 22.5 --size_y 22.5 --size_z 22.5 --cpu 2 --exhaustiveness 4 --num_modes 1 --score_only --ligand /tmp/costfunc5a5wcm9r/283_conf_0.pdbqt returned non-zero exit status 1 /home/ale/GITLAB/moldrug/moldrug/fitness.py:407: UserWarning: vina failed. Check: 283_conf_0_error.pbz2 file. warnings.warn(f"vina failed. Check: {Individual.idx}_conf_{conf.GetId()}_error.pbz2 file.") 100%|███████████████████████████████████████████| 20/20 [00:17<00:00, 1.16it/s] Generation 11: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 12 / 12: 100%|███████████████████████████████████████████| 22/22 [00:22<00:00, 1.02s/it] File 03_pure_mutate_pop.sdf was createad! Generation 12: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 12 generations with a population of 25 individuals. A total number of 311 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (12 generations): 53.36 (s). Finished at Sun Aug 28 20:45:50 2022. File 03_pure_mutate_pop.sdf was createad! The job 03_pure_mutate finished!. The follow job 04_local started. File 04_local_pop.sdf was createad! Evaluating generation 13 / 14: 100%|███████████████████████████████████████████| 20/20 [00:17<00:00, 1.13it/s] Generation 13: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 14 / 14: 100%|███████████████████████████████████████████| 22/22 [00:15<00:00, 1.39it/s] File 04_local_pop.sdf was createad! Generation 14: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 14 generations with a population of 25 individuals. A total number of 352 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (14 generations): 42.84 (s). Finished at Sun Aug 28 20:46:33 2022. File 04_local_pop.sdf was createad! The job 04_local finished!.
os.listdir(wd)
['x0161.pdb', '01_grow_pop.sdf', '283_conf_0_error.pbz2', '04_local_result.pbz2', 'ref.sdf', '02_allow_grow_pop.pbz2', '03_pure_mutate_result.pbz2', 'crem.db.gz', '03_pure_mutate_pop.sdf', '04_local_pop.pbz2', '01_grow_result.pbz2', '02_allow_grow_pop.sdf', '03_pure_mutate_pop.pbz2', '02_allow_grow_result.pbz2', '04_local_pop.sdf', 'x0161.pdbqt', 'config.yml', 'crem.db', '01_grow_pop.pbz2']
import nglview as nv
import parmed as pmd
from ipywidgets import IntSlider, VBox
parms = pmd.rdkit.from_sdf(os.path.join(wd, '04_local_pop.sdf')) # require parmed, rdkit
parms
[<Structure 23 atoms; 1 residues; 23 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 33 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 33 bonds; NOT parametrized>, <Structure 36 atoms; 1 residues; 36 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 33 bonds; NOT parametrized>, <Structure 39 atoms; 1 residues; 39 bonds; NOT parametrized>, <Structure 36 atoms; 1 residues; 36 bonds; NOT parametrized>, <Structure 29 atoms; 1 residues; 29 bonds; NOT parametrized>, <Structure 28 atoms; 1 residues; 28 bonds; NOT parametrized>, <Structure 31 atoms; 1 residues; 31 bonds; NOT parametrized>, <Structure 35 atoms; 1 residues; 35 bonds; NOT parametrized>, <Structure 31 atoms; 1 residues; 32 bonds; NOT parametrized>, <Structure 23 atoms; 1 residues; 23 bonds; NOT parametrized>, <Structure 30 atoms; 1 residues; 30 bonds; NOT parametrized>, <Structure 39 atoms; 1 residues; 40 bonds; NOT parametrized>, <Structure 28 atoms; 1 residues; 29 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 34 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 34 bonds; NOT parametrized>, <Structure 36 atoms; 1 residues; 36 bonds; NOT parametrized>, <Structure 29 atoms; 1 residues; 30 bonds; NOT parametrized>, <Structure 35 atoms; 1 residues; 35 bonds; NOT parametrized>, <Structure 28 atoms; 1 residues; 28 bonds; NOT parametrized>, <Structure 29 atoms; 1 residues; 29 bonds; NOT parametrized>, <Structure 25 atoms; 1 residues; 25 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 33 bonds; NOT parametrized>]
view = nv.NGLWidget()
slider = IntSlider(max=len(parms)-1)
def show_one_ligand(change):
val = change['new']
view.show_only([val])
slider.observe(show_one_ligand, 'value')
VBox([view, slider])
VBox(children=(NGLWidget(), IntSlider(value=0, max=24)))
for p in parms:
view.add_structure(nv.ParmEdTrajectory(p))
view.show_only([0])
result = utils.decompress_pickle(os.path.join(wd, '04_local_result.pbz2'))
valid_individuals_vina_score = [individual.vina_score for individual in result.pop if individual.vina_score != np.inf]
print(f"avg = {np.average(valid_individuals_vina_score)}")
print(f"min = {min(valid_individuals_vina_score)}")
print(f"max = {max(valid_individuals_vina_score)}")
avg = -2.2375708 min = -4.62608 max = 6.91252
# Save the new config as a yaml file
new_config = config.copy()
new_config['01_grow']['costfunc_kwargs']['constraint_type'] = 'local_only'
with open(os.path.join(wd, 'new_config.yml'), 'w') as f:
yaml.dump(config, f)
os.listdir(wd)
['x0161.pdb', '01_grow_pop.sdf', '283_conf_0_error.pbz2', '04_local_result.pbz2', 'ref.sdf', '02_allow_grow_pop.pbz2', '03_pure_mutate_result.pbz2', 'crem.db.gz', '03_pure_mutate_pop.sdf', '04_local_pop.pbz2', '01_grow_result.pbz2', '02_allow_grow_pop.sdf', '03_pure_mutate_pop.pbz2', '02_allow_grow_result.pbz2', '04_local_pop.sdf', 'new_config.yml', 'x0161.pdbqt', 'config.yml', 'crem.db', '01_grow_pop.pbz2']
cwd = os.getcwd()
os.chdir(wd)
! moldrug new_config.yml
os.chdir(cwd)
os.listdir(wd)
os.chdir(cwd)
You are using moldrug: 2.0.5. The main job is being executed. Creating the first population with 25 members: 100%|███████████████████████████████████████████| 25/25 [00:24<00:00, 1.04it/s] Initial Population: Best individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 01_grow_pop.sdf was createad! Evaluating generation 1 / 5: 100%|███████████████████████████████████████████| 25/25 [00:37<00:00, 1.49s/it] Generation 1: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 2 / 5: 100%|███████████████████████████████████████████| 25/25 [00:38<00:00, 1.53s/it] Generation 2: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 3 / 5: 100%|███████████████████████████████████████████| 25/25 [00:40<00:00, 1.61s/it] Generation 3: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 4 / 5: 100%|███████████████████████████████████████████| 25/25 [00:40<00:00, 1.64s/it] Generation 4: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 5 / 5: 100%|███████████████████████████████████████████| 24/24 [00:37<00:00, 1.55s/it] Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 01_grow_pop.sdf was createad! Generation 5: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 5 generations with a population of 25 individuals. A total number of 148 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (5 generations): 256.96 (s). Finished at Sun Aug 28 20:50:53 2022. Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 01_grow_pop.sdf was createad! The main job finished!. The follow job 02_allow_grow started. Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 02_allow_grow_pop.sdf was createad! Evaluating generation 6 / 10: 100%|███████████████████████████████████████████| 24/24 [00:26<00:00, 1.10s/it] Generation 6: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 7 / 10: 100%|███████████████████████████████████████████| 24/24 [00:28<00:00, 1.19s/it] Generation 7: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 8 / 10: 100%|███████████████████████████████████████████| 24/24 [00:26<00:00, 1.12s/it] Generation 8: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 9 / 10: 100%|███████████████████████████████████████████| 24/24 [00:30<00:00, 1.29s/it] Generation 9: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 10 / 10: 100%|███████████████████████████████████████████| 25/25 [00:30<00:00, 1.21s/it] Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 02_allow_grow_pop.sdf was createad! Generation 10: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 10 generations with a population of 25 individuals. A total number of 269 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (10 generations): 174.68 (s). Finished at Sun Aug 28 20:53:48 2022. Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 02_allow_grow_pop.sdf was createad! The job 02_allow_grow finished!. The follow job 03_pure_mutate started. Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 03_pure_mutate_pop.sdf was createad! Evaluating generation 11 / 12: 100%|███████████████████████████████████████████| 22/22 [00:28<00:00, 1.30s/it] Generation 11: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Note: The mutation on Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) did not work, it will be returned the same individual Note: The mutation on Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) did not work, it will be returned the same individual Note: The mutation on Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) did not work, it will be returned the same individual Evaluating generation 12 / 12: 100%|███████████████████████████████████████████| 19/19 [00:17<00:00, 1.12it/s] Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 03_pure_mutate_pop.sdf was createad! Generation 12: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 12 generations with a population of 25 individuals. A total number of 310 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (12 generations): 58.38 (s). Finished at Sun Aug 28 20:54:47 2022. Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 03_pure_mutate_pop.sdf was createad! The job 03_pure_mutate finished!. The follow job 04_local started. Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 04_local_pop.sdf was createad! Evaluating generation 13 / 14: 100%|███████████████████████████████████████████| 23/23 [00:24<00:00, 1.08s/it] Generation 13: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). Evaluating generation 14 / 14: 100%|███████████████████████████████████████████| 21/21 [00:22<00:00, 1.08s/it] Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 04_local_pop.sdf was createad! Generation 14: Best Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 14 generations with a population of 25 individuals. A total number of 353 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) Final Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) The cost function droped in 0.0 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (14 generations): 56.61 (s). Finished at Sun Aug 28 20:55:43 2022. Individual(idx = 12, smiles = CCCC#CCOC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 1.0) does not have a valid pdbqt. Individual(idx = 17, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. Individual(idx = 20, smiles = CCCCOc1cc(S(N)(=O)=O)ccc1C(=O)OC, cost = 1.0) does not have a valid pdbqt. File 04_local_pop.sdf was createad! The job 04_local finished!.
parms = pmd.rdkit.from_sdf(os.path.join(wd, '04_local_pop.sdf')) # require parmed, rdkit
parms
[<Structure 23 atoms; 1 residues; 23 bonds; NOT parametrized>, <Structure 32 atoms; 1 residues; 32 bonds; NOT parametrized>, <Structure 32 atoms; 1 residues; 32 bonds; NOT parametrized>, <Structure 31 atoms; 1 residues; 31 bonds; NOT parametrized>, <Structure 27 atoms; 1 residues; 27 bonds; NOT parametrized>, <Structure 31 atoms; 1 residues; 32 bonds; NOT parametrized>, <Structure 32 atoms; 1 residues; 32 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 33 bonds; NOT parametrized>, <Structure 26 atoms; 1 residues; 26 bonds; NOT parametrized>, <Structure 33 atoms; 1 residues; 33 bonds; NOT parametrized>, <Structure 36 atoms; 1 residues; 36 bonds; NOT parametrized>, <Structure 30 atoms; 1 residues; 31 bonds; NOT parametrized>, <Structure 37 atoms; 1 residues; 37 bonds; NOT parametrized>, <Structure 24 atoms; 1 residues; 24 bonds; NOT parametrized>, <Structure 28 atoms; 1 residues; 28 bonds; NOT parametrized>, <Structure 34 atoms; 1 residues; 34 bonds; NOT parametrized>, <Structure 32 atoms; 1 residues; 32 bonds; NOT parametrized>, <Structure 34 atoms; 1 residues; 34 bonds; NOT parametrized>, <Structure 30 atoms; 1 residues; 30 bonds; NOT parametrized>, <Structure 34 atoms; 1 residues; 34 bonds; NOT parametrized>, <Structure 36 atoms; 1 residues; 36 bonds; NOT parametrized>, <Structure 25 atoms; 1 residues; 25 bonds; NOT parametrized>]
view = nv.NGLWidget()
slider = IntSlider(max=len(parms)-1)
def show_one_ligand(change):
val = change['new']
view.show_only([val])
slider.observe(show_one_ligand, 'value')
VBox([view, slider])
VBox(children=(NGLWidget(), IntSlider(value=0, max=21)))
for p in parms:
view.add_structure(nv.ParmEdTrajectory(p))
view.show_only([0])
result = utils.decompress_pickle(os.path.join(wd, '04_local_result.pbz2'))
valid_individuals_vina_score = [individual.vina_score for individual in result.pop if individual.vina_score != np.inf]
print(f"avg = {np.average(valid_individuals_vina_score)}")
print(f"min = {min(valid_individuals_vina_score)}")
print(f"max = {max(valid_individuals_vina_score)}")
avg = -4.386658636363636 min = -5.29661 max = 1.01658
As you can see now the fix core changes a little. On both examples (when you run the tutorial by yourself may be different) MolDrug can not optimize the cost function. Even so, the local_only strategy performs better respect to maximum and average cost. One of the main cause of this behavior is the desirability definition which is for vina_score:
'vina_score': {
'w': 1,
'SmallerTheBest': {
'Target': -12,
'UpperLimit': -6,
'r': 1
}
}
This means that if the vina score is not lower than -6 the cost function will be always 1. In this scenario all were are not optimizing over the generations the "best" individuals. In other words, it is important to see what is the behavior of our system (one small simulation) and then tune your parameters accordantly. On possibility for this system is change UpperLimit to, for example -4. There are a lot of other different options that you could play around (e.g. increase the generations, etc...). It is important to remark that this example is just for the tutorial, in a real project you may need to increase generations, tune the crem keywords, etc. I let it to you as a challenge. Happy simulations!
Just as a bonus I let you here how to change the desirability definition.
# Save the new config as a yaml file
tune_config = new_config.copy()
tune_config['01_grow']['costfunc_kwargs']['desirability'] = {
'qed': {
'w': 1,
'LargerTheBest': {
'LowerLimit': 0.1,
'Target': 0.75,
'r': 1
}
},
'sa_score': {
'w': 1,
'SmallerTheBest': {
'Target': 3,
'UpperLimit': 7,
'r': 1
}
},
'vina_score': {
'w': 1,
'SmallerTheBest': {
'Target': -8,
'UpperLimit': -3,
'r': 1
}
}
}
with open(os.path.join(wd, 'tune_config.yml'), 'w') as f:
yaml.dump(config, f)
os.listdir(wd)
['x0161.pdb', '01_grow_pop.sdf', '283_conf_0_error.pbz2', '04_local_result.pbz2', 'ref.sdf', '02_allow_grow_pop.pbz2', '03_pure_mutate_result.pbz2', 'crem.db.gz', 'tune_config.yml', '03_pure_mutate_pop.sdf', '04_local_pop.pbz2', '01_grow_result.pbz2', '02_allow_grow_pop.sdf', '03_pure_mutate_pop.pbz2', '02_allow_grow_result.pbz2', '04_local_pop.sdf', 'new_config.yml', 'x0161.pdbqt', 'config.yml', 'crem.db', '01_grow_pop.pbz2']
cwd = os.getcwd()
os.chdir(wd)
! moldrug tune_config.yml
os.chdir(cwd)
os.listdir(wd)
os.chdir(cwd)
You are using moldrug: 2.0.5. The main job is being executed. Creating the first population with 25 members: 100%|███████████████████████████████████████████| 25/25 [00:21<00:00, 1.15it/s] Initial Population: Best individual: Individual(idx = 5, smiles = COC(=O)c1ccc(S(N)(=O)=O)c(Cl)c1, cost = 0.5458783343354592) File 01_grow_pop.sdf was createad! Evaluating generation 1 / 5: 100%|███████████████████████████████████████████| 25/25 [00:35<00:00, 1.41s/it] Generation 1: Best Individual: Individual(idx = 37, smiles = CCCN(CCCN(C)C)S(=O)(=O)c1ccc(C(=O)OC)cc1, cost = 0.5159527419264649). Evaluating generation 2 / 5: 100%|███████████████████████████████████████████| 25/25 [00:37<00:00, 1.51s/it] Generation 2: Best Individual: Individual(idx = 37, smiles = CCCN(CCCN(C)C)S(=O)(=O)c1ccc(C(=O)OC)cc1, cost = 0.5159527419264649). Evaluating generation 3 / 5: 100%|███████████████████████████████████████████| 24/24 [00:48<00:00, 2.03s/it] Generation 3: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 4 / 5: 100%|███████████████████████████████████████████| 25/25 [00:41<00:00, 1.66s/it] Generation 4: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 5 / 5: 100%|███████████████████████████████████████████| 25/25 [00:37<00:00, 1.51s/it] File 01_grow_pop.sdf was createad! Generation 5: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 5 generations with a population of 25 individuals. A total number of 149 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 0.6346613653952515) Final Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764) The cost function droped in 0.3016770103136751 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (5 generations): 263.29 (s). Finished at Sun Aug 28 21:00:09 2022. File 01_grow_pop.sdf was createad! The main job finished!. The follow job 02_allow_grow started. File 02_allow_grow_pop.sdf was createad! Evaluating generation 6 / 10: 100%|███████████████████████████████████████████| 25/25 [00:36<00:00, 1.48s/it] Generation 6: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 7 / 10: 100%|███████████████████████████████████████████| 24/24 [00:39<00:00, 1.64s/it] Generation 7: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 8 / 10: 100%|███████████████████████████████████████████| 25/25 [00:29<00:00, 1.18s/it] Generation 8: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 9 / 10: 100%|███████████████████████████████████████████| 24/24 [00:34<00:00, 1.43s/it] Generation 9: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 10 / 10: 100%|███████████████████████████████████████████| 22/22 [00:31<00:00, 1.42s/it] File 02_allow_grow_pop.sdf was createad! Generation 10: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 10 generations with a population of 25 individuals. A total number of 268 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 0.6346613653952515) Final Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764) The cost function droped in 0.3016770103136751 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (10 generations): 220.88 (s). Finished at Sun Aug 28 21:03:50 2022. File 02_allow_grow_pop.sdf was createad! The job 02_allow_grow finished!. The follow job 03_pure_mutate started. File 03_pure_mutate_pop.sdf was createad! Evaluating generation 11 / 12: 100%|███████████████████████████████████████████| 25/25 [00:31<00:00, 1.24s/it] Generation 11: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 12 / 12: 100%|███████████████████████████████████████████| 25/25 [00:31<00:00, 1.25s/it] File 03_pure_mutate_pop.sdf was createad! Generation 12: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 12 generations with a population of 25 individuals. A total number of 317 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 0.6346613653952515) Final Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764) The cost function droped in 0.3016770103136751 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (12 generations): 81.47 (s). Finished at Sun Aug 28 21:05:12 2022. File 03_pure_mutate_pop.sdf was createad! The job 03_pure_mutate finished!. The follow job 04_local started. File 04_local_pop.sdf was createad! Evaluating generation 13 / 14: 100%|███████████████████████████████████████████| 23/23 [00:28<00:00, 1.25s/it] Generation 13: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). Evaluating generation 14 / 14: 100%|███████████████████████████████████████████| 22/22 [00:21<00:00, 1.02it/s] File 04_local_pop.sdf was createad! Generation 14: Best Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764). =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ The simulation finished successfully after 14 generations with a population of 25 individuals. A total number of 360 Individuals were seen during the simulation. Initial Individual: Individual(idx = 0, smiles = COC(=O)c1ccc(S(N)(=O)=O)cc1, cost = 0.6346613653952515) Final Individual: Individual(idx = 89, smiles = CCC(C)COC(=O)c1ccc(S(=O)(=O)NCc2ccco2)c(Cl)c1, cost = 0.3329843550815764) The cost function droped in 0.3016770103136751 units. =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ Total time (14 generations): 65.53 (s). Finished at Sun Aug 28 21:06:18 2022. File 04_local_pop.sdf was createad! The job 04_local finished!.
result = utils.decompress_pickle(os.path.join(wd, '04_local_result.pbz2'))
valid_individuals_vina_score = [individual.vina_score for individual in result.pop if individual.vina_score != np.inf]
print(f"avg = {np.average(valid_individuals_vina_score)}")
print(f"min = {min(valid_individuals_vina_score)}")
print(f"max = {max(valid_individuals_vina_score)}")
avg = -5.0644092 min = -5.68406 max = -4.43432
As you see the maximum is much lower than the rest of the examples. So, we get some improvements changing the desirability.