XMLObjects
Using the ref2015.wts
Scorefunction¶Warning: This notebook uses pyrosetta.distributed.viewer
code, which runs in jupyter notebook
and might not run if you're using jupyterlab
.
Note: This Jupyter notebook requires the PyRosetta distributed layer. Please make sure to activate the PyRosetta.notebooks
conda environment before running this notebook. The kernel is set to use this environment.
import logging
logging.basicConfig(level=logging.INFO)
import matplotlib
%matplotlib inline
import os
import pandas as pd
import pyrosetta
import pyrosetta.distributed.viewer as viewer
import seaborn
seaborn.set()
import sys
!pip install pyrosettacolabsetup
import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta()
import pyrosetta; pyrosetta.init()
Now we change the scorefunction to ref2015.wts
, the weights of which were optimized on ligands with AM1-BCC partial charges generated with Amber's antechamber
. Therefore, the Rosetta .params
file should ideally also have AM1-BCC partial charges generated with antechamber
.
ligand_params = "inputs/TPA.am1-bcc.fa.params"
flags = f"""
-ignore_unrecognized_res 1
-extra_res_fa {ligand_params}
"""
pyrosetta.distributed.init(flags)
pose = pyrosetta.io.pose_from_file(filename="inputs/test_lig.pdb")
scorefxn = pyrosetta.create_score_function("ref2015")
Ligand docking using XmlObjects
:
xml = pyrosetta.rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string("""
<ROSETTASCRIPTS>
<SCOREFXNS>
<ScoreFunction name="fa_standard" weights="ref2015.wts"/>
</SCOREFXNS>
<RESIDUE_SELECTORS>
<Chain name="chX" chains="X"/>
</RESIDUE_SELECTORS>
<SIMPLE_METRICS>
<RMSDMetric name="rmsd_chX" residue_selector="chX" reference_name="store_native" residue_selector_ref="chX" robust="true" rmsd_type="rmsd_all" />
</SIMPLE_METRICS>
<SCORINGGRIDS ligand_chain="X" width="25">
<ClassicGrid grid_name="vdw" weight="1.0"/>
</SCORINGGRIDS>
<LIGAND_AREAS>
<LigandArea name="docking_sidechain_X" chain="X" cutoff="6.0" add_nbr_radius="true" all_atom_mode="true" minimize_ligand="10"/>
<LigandArea name="final_sidechain_X" chain="X" cutoff="6.0" add_nbr_radius="true" all_atom_mode="true"/>
<LigandArea name="final_backbone_X" chain="X" cutoff="7.0" add_nbr_radius="false" all_atom_mode="true" Calpha_restraints="0.3"/>
</LIGAND_AREAS>
<INTERFACE_BUILDERS>
<InterfaceBuilder name="side_chain_for_docking" ligand_areas="docking_sidechain_X"/>
<InterfaceBuilder name="side_chain_for_final" ligand_areas="final_sidechain_X"/>
<InterfaceBuilder name="backbone" ligand_areas="final_backbone_X" extension_window="3"/>
</INTERFACE_BUILDERS>
<MOVEMAP_BUILDERS>
<MoveMapBuilder name="docking" sc_interface="side_chain_for_docking" minimize_water="true"/>
<MoveMapBuilder name="final" sc_interface="side_chain_for_final" bb_interface="backbone" minimize_water="true"/>
</MOVEMAP_BUILDERS>
<MOVERS>
<SavePoseMover name="spm" restore_pose="0" reference_name="store_native"/>
<Transform name="transform" chain="X" box_size="20.0" move_distance="10" angle="360" initial_perturb="2" cycles="500" repeats="5" temperature="1000"/>
<HighResDocker name="high_res_docker" cycles="9" repack_every_Nth="3" scorefxn="fa_standard" movemap_builder="docking"/>
<FinalMinimizer name="final" scorefxn="fa_standard" movemap_builder="final"/>
</MOVERS>
<FILTERS>
<LigInterfaceEnergy name="interfE" scorefxn="fa_standard" energy_cutoff="0.0" confidence="0"/>
<SimpleMetricFilter name="rmsd_chX" metric="rmsd_chX" cutoff="999999." comparison_type="lt" confidence="0"/>
</FILTERS>
<PROTOCOLS>
<Add mover="spm"/>
<Add mover="transform"/>
<Add mover="high_res_docker"/>
<Add mover="final"/>
<Add filter="interfE"/>
<Add filter="rmsd_chX"/>
</PROTOCOLS>
</ROSETTASCRIPTS>
""").get_mover("ParsedProtocol")
Produce 5 global ligand docking trajectories using PyJobDistributor
:
if not os.getenv("DEBUG"):
working_dir = os.getcwd()
output_dir = "outputs"
if not os.path.exists(output_dir):
os.mkdir(output_dir)
os.chdir(output_dir)
jd = pyrosetta.toolbox.py_jobdistributor.PyJobDistributor(pdb_name="test_lig_XMLObjects",
nstruct=5,
scorefxn=scorefxn)
jd.native_pose = pose
df = pd.DataFrame()
while not jd.job_complete:
test_pose = pose.clone()
xml.apply(test_pose)
test_df = pd.DataFrame.from_records(dict(test_pose.scores), index=[jd.current_name])
df = df.append(test_df)
jd.output_decoy(test_pose)
os.chdir(working_dir)
Now that we have sampled some global ligand docking trajectories, let's plot the ligand binding energy landscape:
#Skip for tests (as DF is not present)
if not os.getenv("DEBUG"):
matplotlib.rcParams['figure.figsize'] = [12.0, 8.0]
seaborn.scatterplot(x="rmsd_chX", y="interfE", data=df)
We can check which .pdb
file has the lowest interfE
score:
#Skip for tests
if not os.getenv("DEBUG"):
df.sort_values(by="interfE")
Let's take a look at the pose with the lowest interfE
value that was generated:
#Skip for tests
if not os.getenv("DEBUG"):
lowest_energy_pdb_filename = os.path.join("expected_outputs", df.sort_values(by="interfE").head(1).index[0])
test_pose = pyrosetta.io.pose_from_file(filename=lowest_energy_pdb_filename)
chE = pyrosetta.rosetta.core.select.residue_selector.ChainSelector("E")
view = viewer.init(test_pose)
view.add(viewer.setStyle())
view.add(viewer.setStyle(command=({"hetflag": True}, {"stick": {"colorscheme": "brownCarbon", "radius": 0.2}})))
view.add(viewer.setSurface(residue_selector=chE, opacity=0.7, color='white'))
view.add(viewer.setHydrogenBonds())
view()
Exercise:
Re-run the above example with more sampling. Pretend that you have done enough sampling (i.e. ~2,000-10,000 global ligand docking trajectories depending on the surface area of the protein) and that the decoy with the lowest interfE
score is the "native" ligand binding mode. Re-plot the ligand binding energy landscape fixing that decoy to rmsd_chX
=0.0
Hint: You have all of the decoys saved as .pdb
files, so you need to re-score them using the command line flag -in:file:native
specifying the .pdb
file with the lowest interfE
score, that way all rmsd_chX
values correspond to the RMSD from that decoy, not the binding mode we started with above. Use the following cell to get started! The code below does not save the new scores to a scorefile, but if you would like to, make use of the pyrosetta.toolbox.py_jobdistributor.output_scorefile()
function.
Restart Jupyter Notebook kernel to properly re-initialize PyRosetta
import glob
import logging
logging.basicConfig(level=logging.INFO)
import matplotlib
%matplotlib inline
import os
import pandas as pd
import pyrosetta
import pyrosetta.distributed.viewer as viewer
import seaborn
seaborn.set()
import sys
!pip install pyrosettacolabsetup
import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta()
import pyrosetta; pyrosetta.init()
if not os.getenv("DEBUG"):
pdb_filenames = glob.glob("expected_outputs/test_lig_XMLObjects*.pdb")
ligand_params = "inputs/TPA.am1-bcc.fa.params"
native_pdb_filename = "expected_outputs/test_lig_XMLObjects_1.pdb"
flags = f"""
-extra_res_fa {ligand_params}
-in:file:native {native_pdb_filename}
-ignore_unrecognized_res 1
-mute all
"""
pyrosetta.distributed.init(flags)
scorefxn = pyrosetta.create_score_function("ref2015")
xml = pyrosetta.rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string("""
<ROSETTASCRIPTS>
<SCOREFXNS>
<ScoreFunction name="fa_standard" weights="ref2015.wts"/>
</SCOREFXNS>
<RESIDUE_SELECTORS>
<Chain name="chX" chains="X"/>
</RESIDUE_SELECTORS>
<SIMPLE_METRICS>
<RMSDMetric name="rmsd_chX" use_native="true" residue_selector="chX" residue_selector_ref="chX" robust="true" rmsd_type="rmsd_all" />
</SIMPLE_METRICS>
<FILTERS>
<LigInterfaceEnergy name="interfE" scorefxn="fa_standard" energy_cutoff="0.0" confidence="0"/>
<SimpleMetricFilter name="rmsd_chX" metric="rmsd_chX" cutoff="999999." comparison_type="lt" confidence="0"/>
</FILTERS>
<PROTOCOLS>
<Add filter="interfE"/>
<Add filter="rmsd_chX"/>
</PROTOCOLS>
</ROSETTASCRIPTS>
""").get_mover("ParsedProtocol")
df = pd.DataFrame()
for pdb_filename in pdb_filenames:
test_pose = pyrosetta.io.pose_from_file(filename=pdb_filename)
xml.apply(test_pose)
test_df = pd.DataFrame.from_records(dict(test_pose.scores), index=[pdb_filename.split("/")[-1]])
df = df.append(test_df)
matplotlib.rcParams['figure.figsize'] = [12.0, 8.0]
seaborn.scatterplot(x="rmsd_chX", y="interfE", data=df)