#!/usr/bin/env python # coding: utf-8 # # *This notebook contains material from [PyRosetta](https://RosettaCommons.github.io/PyRosetta.notebooks); # content is available [on Github](https://github.com/RosettaCommons/PyRosetta.notebooks.git).* # # < [Fast Fourier Transform Based Docking via ZDOCK](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/07.01-Fast-Fourier-Transform-Based-Docking-via-ZDOCK.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Ligand Refinement in PyRosetta (a.k.a. High-Resolution Local Docking) Using the `ligand.wts` Scorefunction](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/08.00-Ligand-Docking-PyRosetta.ipynb) >

# # Docking Moves in Rosetta # Keywords: SwitchResidueTypeMover(), fold_tree(), Jump, EDGE, RigidBodyPerturbMover(), DockingSlideIntoContact(), FaDockingSlideIntoContact(), PyJobDistributor(), output_decoy(), DockMCMProtocol(), DockingLowRes(), ReturnSidechainMover() # # ## Overview # Here, we will cover many types of Movers in Rosetta that can aid in docking, and cover the FoldTree used for docking. Note that the DockMCMProtocol is the general high-resolution docking algorithm used by Rosetta, while DockingLowRes is used for low-resolution dock moves. # In[ ]: get_ipython().system('pip install pyrosettacolabsetup') import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta() import pyrosetta; pyrosetta.init() # **Make sure you are in the directory with the pdb files:** # # `cd google_drive/MyDrive/student-notebooks/` # For the following exercises, download and clean the complex of colicin D and ImmD (1V74). Don't forget to import `pyrosetta` and initialize. Store three poses — a full-atom starting pose and centroid and full-atom “working” poses. # # ``` # from pyrosetta import * # from pyrosetta.teaching import * # from pyrosetta.toolbox import cleanATOM # # pyrosetta.init() # cleanATOM("1V74.pdb") # pose = pose_from_file("1V74.clean.pdb") # starting_pose = pose.clone() # cen_pose = pose.clone() # cen_switch = SwitchResidueTypeSetMover("centroid") # cen_switch.apply(cen_pose) # starting_cen_pose = cen_pose.clone() # ``` # In[1]: import os if not os.getenv("DEBUG"): ### BEGIN SOLUTION from pyrosetta import * from pyrosetta.teaching import * from pyrosetta.toolbox import cleanATOM pyrosetta.init() from pyrosetta.toolbox import pose_from_rcsb pose = pose_from_rcsb("1V74") starting_pose = pose.clone() cen_pose = pose.clone() cen_switch = SwitchResidueTypeSetMover("centroid") cen_switch.apply(cen_pose) starting_cen_pose = cen_pose.clone() ### END SOLUTION # ## Rigid-body transformations and Fold Trees # # The fundamental docking move is a rigid-body transformation consisting of a translation and rotation. Any rigid body move also needs to know which part moves and which part is fixed. In Rosetta, this division is known as a `Jump` and the set of protein segments and jumps are stored in an object attached to a pose called a `FoldTree`. # # ``` # print(pose.fold_tree()) # ``` # In[2]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION print(pose.fold_tree()) ### END SOLUTION # In the `FoldTree` printout, each three number sequence following the word `EDGE` is the beginning and ending residue number, then a code. The codes are -1 for stretches of protein and any positive integer for a `Jump`, which represents the `Jump` number. # # __Question:__ View the fold tree of your full-atom pose. How many `Jumps` are there in your pose? # In[3]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION print(starting_pose.fold_tree()) print("There's 1 Jump!") ### END SOLUTION # # By default, there is a `Jump` between the N-terminus of chain A and the N-terminus of chain B, but we can change this using the exposed method `setup_foldtree()`. # # ``` # from pyrosetta.rosetta.protocols.docking import setup_foldtree # print(pose.fold_tree()) # setup_foldtree(pose, "A_B", Vector1([1])) # setup_foldtree(starting_pose, "A_B", Vector1([1])) # print(pose.fold_tree()) # ``` # In[4]: if not os.getenv("DEBUG"): ### BEGIN SOLUITON from pyrosetta.rosetta.protocols.docking import setup_foldtree print(pose.fold_tree()) setup_foldtree(pose, "A_B", Vector1([1])) setup_foldtree(starting_pose, "A_B", Vector1([1])) print(pose.fold_tree()) ### END SOLUTION # The argument "A_B" tells Rosetta to make chain A the “rigid” chain and allow chain B to move. If there were more chains in the pdb structure, supplying "AB_C" would hold chains A and B rigid together as a single unit and allow chain C to move. (The third argument `Vector1([1]`) is required, it creates a Rosetta vector object — indexed from 1 — with one element that identifies the first `Jump` in the `FoldTree` for docking use.) # # __Question:__ The above command changed the `FoldTree` and prepared it for docking. What has changed? # In[5]: ### BEGIN SOLUTION print("We changed the Jump that was connecting the N-termini of A and B into a Jump that connects the centers of A and B.") ### END SOLUTION # # You can see the type of information in the `Jump` by printing it from the `pose`: # # ``` # jump_num = 1 # print(pose.jump(jump_num).get_rotation()) # rotation matrix # print(pose.jump(jump_num).get_translation()) # translation vector # ``` # In[6]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION jump_num = 1 print(pose.jump(jump_num).get_rotation()) print('\n') print(pose.jump(jump_num).get_translation()) ### END SOLUTION # ## RigidBody Movers # The two basic manipulations are translations and rotations. For translation, the change in x, y, and z coordinates are needed as well as the `Jump` number. A rotation requires a center and an axis about which to rotate. The rigid-body displacement can be altered directly with the `RigidBodyTransMover` for translations or the `RigidBodySpinMover` for rotations. # # # However, for structure prediction calculations, we have a `Mover` that is preconfigured to make random movements varying around set magnitudes (in this case, a mean of 8° rotation and 3 Å translation) located in the `rosetta.protocols.rigid` namespace, (which we will rename with an alias `rigid_moves` for our convenience) : # # ``` # import pyrosetta.rosetta.protocols.rigid as rigid_moves # pert_mover = rigid_moves.RigidBodyPerturbMover(jump_num, 8, 3) # ``` # In[7]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION import pyrosetta.rosetta.protocols.rigid as rigid_moves pert_mover = rigid_moves.RigidBodyPerturbMover(jump_num, 8, 3) ### END SOLUTION # Apply the `RigidBodyPerturbMover` to a full-atom pose and (optional) use a `PyMOLMover` to confirm that the motions are what you expect. # # __Question:__ What are the new rotation matrix and translation vector in the `Jump`? How many ångströms did the downstream protein move? # In[8]: if not os.getenv("DEBUG"): from pyrosetta import PyMOLMover pymol = PyMOLMover() pymol.apply(pose) # In[9]: if not os.getenv("DEBUG"): pert_mover.apply(pose) pymol.apply(pose) # Global perturbations are useful for making completely randomized starting structures. The following `Mover` will rotate a protein about its geometric center. The final orientation is equally distributed over the “globe”. # # ``` # randomize1 = rigid_moves.RigidBodyRandomizeMover(pose, jump_num, rigid_moves.partner_upstream) # randomize2 = rigid_moves.RigidBodyRandomizeMover(pose, jump_num, rigid_moves.partner_downstream) # ``` # # (`partner_upstream` and `partner_downstream` are predefined terms within the `pyrosetta.rosetta.protocols.rigid` namespace, which in our case refer to chains A and B, respectively.) # In[10]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION randomize1 = rigid_moves.RigidBodyRandomizeMover(pose, jump_num, rigid_moves.partner_upstream) randomize2 = rigid_moves.RigidBodyRandomizeMover(pose, jump_num, rigid_moves.partner_downstream) ### END SOLUTION # Apply both `Movers` to the starting structure, and view the structure in PyMOL. (You might view it along with the original pose). # # __Question:__ Does the new conformation look like a candidate docked structure yet? # In[11]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION randomize1.apply(pose) pymol.apply(pose) ### END SOLUTION # In[12]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION randomize2.apply(pose) pymol.apply(pose) ### END SOLUTION # Since proteins are not spherical, sometimes the random orientation creates severe clashes between the docking partners, and other times it places the partners so they are no longer touching. The `FaDockingSlideIntoContact` `Mover` will translate the downstream protein along the line of protein centers until the proteins are in contact. # # ``` # slide = DockingSlideIntoContact(jump_num) # for centroid mode # slide = FaDockingSlideIntoContact(jump_num) # for full-atom mode # slide.apply(pose) # ``` # In[13]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION slide = DockingSlideIntoContact(jump_num) # for centroid mode slide = FaDockingSlideIntoContact(jump_num) # for full-atom mode slide.apply(pose) pymol.apply(pose) ### END SOLUTION # The `MinMover`, which we have previously used to change torsion angles to find the nearest minimum in the score function, can also operate on the jump translation and rotation. It suffices to set the `Jump` variable as moveable in the `MoveMap`: # # ``` # movemap = MoveMap() # movemap.set_jump(jump_num, True) # min_mover = MinMover() # min_mover.movemap(movemap) # scorefxn = get_fa_scorefxn() # min_mover.score_function(scorefxn) # ``` # In[14]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION movemap = MoveMap() movemap.set_jump(jump_num, True) min_mover = MinMover() min_mover.movemap(movemap) scorefxn = get_fa_scorefxn() min_mover.score_function(scorefxn) ### END SOLUTION # Apply the above `MinMover` to the working pose # # ``` # scorefxn(pose) # min_mover.apply(pose) # print(pose.jump(jump_num).get_rotation()) # print(pose.jump(jump_num).get_translation()) # ``` # # __Question:__ How much does the score change? What are the new rotation matrix and translation vector in the `Jump`? How many Ångstroms did the downstream protein move? # In[15]: #Skip for tests if not os.getenv("DEBUG"): ### BEGIN SOLUTION scorefxn(pose) min_mover.apply(pose) print(pose.jump(jump_num).get_rotation()) print(pose.jump(jump_num).get_translation()) ### END SOLUTION # ## Low-Resolution Docking via RosettaDock # # # RosettaDock can also perform global docking runs, but it can require significant time. Typically, $10^{5}$ - $10^{6}$ decoys are needed in a global run. For this workshop, we will create a much smaller number and learn the tools needed to handle large runs. # # # Docking is available as a mover that completely encompasses the protocol. To use the `Mover`, you will need a starting pose with both chains and a jump defined. The structure must be in low-resolution (centroid) mode, and you will need the low-resolution docking score function: # # ``` # scorefxn_low = create_score_function("interchain_cen") # ``` # In[16]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION scorefxn_low = create_score_function("interchain_cen") ### END SOLUTION # Randomize your centroid version of the complex. Then, create low-resolution docking structures as follows: # # ``` # dock_lowres = DockingLowRes(scorefxn_low, jump_num) # print(cen_pose.fold_tree()) # setup_foldtree(cen_pose, "A_B", Vector1([1])) # print(cen_pose.fold_tree()) # dock_lowres.apply(cen_pose) # ``` # In[17]: #Skip for tests if not os.getenv("DEBUG"): ### BEGIN SOLUTION dock_lowres = DockingLowRes(scorefxn_low, jump_num) print(cen_pose.fold_tree()) setup_foldtree(cen_pose, "A_B", Vector1([1])) print(cen_pose.fold_tree()) dock_lowres.apply(cen_pose) ### END SOLUTION # You can compare structures by calculating the root-mean-squared deviation of all the Cα atoms, using the function `CA_rmsd(pose1, pose2)`. In docking, a more useful measure is the ligand RMSD, which is the deviation of the backbone Cα atoms of the ligand after superposition of the receptor protein backbones. You can calculate ligand RMSD with `calc_Lrmsd(pose1, pose2, Vector1([1])`). # # ``` # print(CA_rmsd(cen_pose, starting_cen_pose)) # print(calc_Lrmsd(cen_pose, starting_cen_pose, Vector1([1]))) # ``` # # __Question:__ Using both measures, how far did your pose move from the low-resolution search? # In[18]: if not os.getenv("DEBUG"): print(CA_rmsd(cen_pose, starting_cen_pose)) print(calc_Lrmsd(cen_pose, starting_cen_pose, Vector1([1]))) # Examine the created decoys in PyMOL directly. # ``` # pymol.keep_history(True) # pymol.apply(cen_pose) # pymol.apply(pose) # ``` # OR dump the pdbs to view in PyMOL. # ``` # cen_pose.dump_pdb("cen_pose.pdb") # pose.dump_pdb("pose.pdb") # ``` # # In[ ]: # __Question:__ Does it look like a reasonable structure for a protein-protein complex? Explain. # In[ ]: # ## Job Distributor # # # For exhaustive searches with Rosetta (docking, refinement, or folding), it is necessary to create a large number of candidate structures, termed “decoys”. This is often accomplished by spreading out the work over a large number of computers. Additionally, each decoy created needs to be individually labeled. The object that is responsible for managing the output is called a `PyJobDistributor`. Here, we will use a simple job distributor to create multiple structures. The following constructor sets the job distributor to create 10 decoys, with filenames like `output_1.pdb`, `output_2.pdb`, etc. The pdb files will also include scores according to the `ScoreFunction` provided. # # ``` # jd = PyJobDistributor("output", 10, scorefxn_low) # ``` # In[20]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION jd = PyJobDistributor("output", 10, scorefxn_low) ### END SOLUTION # It is also useful to compare each decoy to the native structure (if it is known; otherwise any reference structure can be used). The job distributor will do the RMSD calculation and final scoring upon output. To set the native pose: # # ``` # # your starting_cen_pose should be the native crystal structure # jd.native_pose = starting_cen_pose # ``` # In[21]: if not os.getenv("DEBUG"): # your starting_cen_pose should be the native crystal structure ### BEGIN SOLUTION jd.native_pose = starting_cen_pose ### END SOLUTION # Create a randomized starting pose, working pose, fold tree, score function, job distributor, and low-resolution docking mover. Now, run the low-resolution docking protocol to create a structure, and output a decoy: # # ``` # cen_pose.assign(starting_cen_pose) # dock_lowres.apply(cen_pose) # jd.output_decoy(cen_pose) # ``` # # Do this twice and confirm that you have two output files in your working directory. # In[22]: #Skip for tests if not os.getenv("DEBUG"): ### BEGIN SOLUTION cen_pose.assign(starting_cen_pose) dock_lowres.apply(cen_pose) jd.output_decoy(cen_pose) ### END SOLUTION # Whenever the `output_decoy()` method is called, the `current_num` variable of the `PyJobDistributor` is incremented, and it also outputs an updated score file: `output.fasc`. We can finish the set of 10 decoys by using the `PyJobDistributor` to set up a loop: # # ``` # while not jd.job_complete: # cen_pose.assign(starting_cen_pose) # dock_lowres.apply(cen_pose) # jd.output_decoy(cen_pose) # ``` # # Note the `jd.job_complete` Boolean variable that indicates whether all 10 decoys have been created. # In[23]: #Skip for tests if not os.getenv("DEBUG"): ### BEGIN SOLUTION while not jd.job_complete: cen_pose.assign(starting_cen_pose) dock_lowres.apply(cen_pose) jd.output_decoy(cen_pose) ### END SOLUTION # Run the loop to create 10 structures. The score file, `output.fasc` summarizes the energies and RMSDs of all structures created. # # __Question:__ Examine that file. What is the lowest score? What is the lowest energy? # In[ ]: # Reset the `PyJobDistributor` to create 100 decoys (or more or less, as the speed of your processor allows) by reconstructing it. Rerun the loop above to make 100 decoys. Use your score file to plot score versus RMSD. (Two easy ways to do this are to import the score file into Excel or to use the Linux command gnuplot.) # # ``` # jd = PyJobDistributor("output", 100, scorefxn_low) # while not jd.job_complete: # cen_pose.assign(starting_cen_pose) # dock_lowres.apply(cen_pose) # jd.output_decoy(cen_pose) # ``` # # __Question:__ Do you see an energy funnel? # In[24]: #Skip for tests if not os.getenv("DEBUG"): ### BEGIN SOLUTION jd = PyJobDistributor("output", 100, scorefxn_low) while not jd.job_complete: cen_pose.assign(starting_cen_pose) dock_lowres.apply(cen_pose) jd.output_decoy(cen_pose) ### END SOLUTION # ## High-Resolution Docking # # # The high-resolution stage of RosettaDock is also available as a `Mover`. This mover encompasses random rigid-body moves, side-chain packing, and gradient-based minimization in the rigid-body coordinates. High-resolution docking needs an all-atom score function. The optimized docking weights are available as a patch to the standard all-atom energy function. # # ``` # scorefxn_high = create_score_function("ref2015.wts", "docking") # dock_hires = DockMCMProtocol() # dock_hires.set_scorefxn(scorefxn_high) # dock_hires.set_partners("A_B") # make sure the FoldTree is set up properly # ``` # # __Note__ that unlike for `DockingLowRes`, we must supply the docking partners with `"A_B"` instead of `jump_num`. # In[ ]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION scorefxn_high = create_score_function("ref2015.wts", "docking") dock_hires = DockMCMProtocol() dock_hires.set_scorefxn(scorefxn_high) dock_hires.set_partners("A_B") # make sure the FoldTree is set up properly ### END SOLUTION # A high-resolution decoy needs side chains. One way to place the side chains is to call the `PackMover`, which will generate a conformation from rotamers. A second way is to copy the side chains from the original monomer structures. This is often helpful for docking calculations since the monomer crystal structures have good side chain positions. # # ``` # recover_sidechains = ReturnSidechainMover(starting_pose) # recover_sidechains.apply(pose) # ``` # In[ ]: if not os.getenv("DEBUG"): ### BEGIN SOLUTION recover_sidechains = ReturnSidechainMover(starting_pose) recover_sidechains.apply(pose) ### END SOLUTION # Load one of your low-resolution decoys, add the side chains from the starting pose, and refine the decoy using high-resolution docking. # # __Question:__ How far did the structure move during refinement? How much did the score improve? # In[ ]: # Starting from your lowest-scoring low-resolution decoy, create three high-resolution decoys. (You might use the `PyJobDistributor`.) Do the same starting from the native structure. # # __Questions:__ # # - How do the refined-native scores compare to the refined-decoy scores? # # - What is the RMSD of the refined native? Why is it not zero? # # - How much variation do you see in the refined native scores? In the refined decoy scores? Is the difference between the refined natives and the refined decoys significant? # In[ ]: # ## Docking Funnel # # Using a `PyJobDistributor` and `DockMCMProtocol`, create 10 decoys starting with a `RigidBodyRandomizeMover` perturbation of `partner_downstream`, 10 decoys starting from different local random perturbations (8°, 3 Å), 10 decoys starting from low-resolution decoys, and 10 starting from the native structure. Plot all of these points on a funnel plot. # # __Question:__ How is the sampling from each method? Does the scoring function discriminate good complexes? # In[ ]: # ## Programming Exercises # # # - Output a structure with a 10 Å translation and another with a 30° rotation (both starting from the same starting structure), and load them into PyMOL to confirm the motions are what you expect. # # # - Diffusion. Make a series of random rigid body perturbations and record the RMSD after each. Plot RMSD versus the number of moves. Does this process emulate diffusion? If it did, how would you know? (Hint: there is a way to plot these data to make them linear.) # # # - Starting from a low-resolution docking decoy, refine the structure in three separate ways: # # - side-chain packing # # - gradient-based minimization in the rigid-body coordinates # # - gradient-based minimization in the torsional coordinates # # - the docking high-resolution protocol # # For each, note the change in RMSD and the change in score. Which operations move the protein the most? Which make the most difference in the score? # # - Using the `MonteCarlo` object, the `RigidBodyMover`, `PackRotamers`, and the `MinMover`, create your own high-resolution docking protocol. Bonus: Can you tune it to beat the standard protocol? “Beating” the standard protocol could mean achieving lower energies, running in faster time, and/or being able to better predict complexes. # In[ ]: # # < [Fast Fourier Transform Based Docking via ZDOCK](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/07.01-Fast-Fourier-Transform-Based-Docking-via-ZDOCK.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Ligand Refinement in PyRosetta (a.k.a. High-Resolution Local Docking) Using the `ligand.wts` Scorefunction](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/08.00-Ligand-Docking-PyRosetta.ipynb) >