#!/usr/bin/env python # coding: utf-8 # # *This notebook contains material from [PyRosetta](https://RosettaCommons.github.io/PyRosetta.notebooks); # content is available [on Github](https://github.com/RosettaCommons/PyRosetta.notebooks.git).* # # < [Running Rosetta in Parallel](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.00-Running-PyRosetta-in-Parallel.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Distributed computation example: miniprotein design](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.02-PyData-miniprotein-design.ipynb) >

# # Distributed analysis example: exhaustive ddG PSSM # # ## Notes # This tutorial will walk you through how to generate an exhaustive ddG PSSM in PyRosetta using the PyData stack for analysis and distributed computing. # # This Jupyter notebook uses parallelization and is not meant to be executed within a Google Colab environment. # # ## Setup # Please see setup instructions in Chapter 16.00 # # ## Citation # [Integration of the Rosetta Suite with the Python Software Stack via reproducible packaging and core programming interfaces for distributed simulation](https://doi.org/10.1002/pro.3721) # # Alexander S. Ford, Brian D. Weitzner, Christopher D. Bahl # # ## Manual # Documentation for the `pyrosetta.distributed` namespace can be found here: https://nbviewer.jupyter.org/github/proteininnovation/Rosetta-PyData_Integration/blob/master/distributed_overview.ipynb # In[ ]: get_ipython().system('pip install pyrosettacolabsetup') import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta() import pyrosetta; pyrosetta.init() # In[ ]: import logging logging.basicConfig(level=logging.INFO) # In[ ]: import pandas import seaborn import matplotlib # In[ ]: import Bio.SeqUtils import Bio.Data.IUPACData as IUPACData # In[ ]: import pyrosetta import pyrosetta.distributed.io as io import pyrosetta.distributed.packed_pose as packed_pose import pyrosetta.distributed.tasks.rosetta_scripts as rosetta_scripts import pyrosetta.distributed.tasks.score as score # In[ ]: import os,sys,platform platform.python_version() # ## Create test pose, initialize rosetta and pack # In[ ]: input_protocol = """ """ # In[ ]: input_relax = rosetta_scripts.SingleoutputRosettaScriptsTask(input_protocol) # Syntax check via setup input_relax.setup() # In[ ]: raw_input_pose = score.ScorePoseTask()(io.pose_from_sequence("TESTESTEST")) input_pose = input_relax(raw_input_pose) # ## Perform exhaustive point mutation and pack # In[ ]: def mutate_residue(input_pose, res_index, new_aa, res_label = None): import pyrosetta.rosetta.core.pose as pose work_pose = packed_pose.to_pose(input_pose) # Annotate strucure with reslabel, for use in downstream protocol # Add parameters as score, for use in downstream analysis if res_label: work_pose.pdb_info().add_reslabel(res_index, res_label) pose.setPoseExtraScore(work_pose, "mutation_index", res_index) pose.setPoseExtraScore(work_pose, "mutation_aa", new_aa) if len(new_aa) == 1: new_aa = str.upper(Bio.SeqUtils.seq3(new_aa)) assert new_aa in map(str.upper, IUPACData.protein_letters_3to1) protocol = """ """ % locals() return rosetta_scripts.SingleoutputRosettaScriptsTask(protocol)(work_pose) # In[ ]: refine = """ """ refine_mutation = rosetta_scripts.SingleoutputRosettaScriptsTask(refine) # # Mutation and pack # ## Job distribution via `multiprocessing` # In[ ]: from multiprocessing import Pool import itertools with pyrosetta.distributed.utility.log.LoggingContext(logging.getLogger("rosetta"), level=logging.WARN): with Pool() as p: work = [ (input_pose, i, aa, "mutation") for i, aa in itertools.product(range(1, len(packed_pose.to_pose(input_pose).residues) + 1), IUPACData.protein_letters) ] logging.info("mutating") mutations = p.starmap(mutate_residue, work) # ## Job distribution via `dask` # In[ ]: if not os.getenv("DEBUG"): import dask.distributed cluster = dask.distributed.LocalCluster(n_workers=1, threads_per_worker=1) client = dask.distributed.Client(cluster) refinement_tasks = [client.submit(refine_mutation, mutant) for mutant in mutations] logging.info("refining") refinements = [task.result() for task in refinement_tasks] client.close() cluster.close() # ## Analysis of delta score # In[ ]: if not os.getenv("DEBUG"): result_frame = pandas.DataFrame.from_records(packed_pose.to_dict(refinements)) result_frame["delta_total_score"] = result_frame["total_score"] - input_pose.scores["total_score"] result_frame["mutation_index"] = list(map(int, result_frame["mutation_index"])) # In[ ]: if not os.getenv("DEBUG"): matplotlib.rcParams['figure.figsize'] = [24.0, 8.0] seaborn.heatmap( result_frame.pivot("mutation_aa", "mutation_index", "delta_total_score"), cmap="RdBu_r", center=0, vmax=50) # In[ ]: # # < [Running Rosetta in Parallel](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.00-Running-PyRosetta-in-Parallel.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Distributed computation example: miniprotein design](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.02-PyData-miniprotein-design.ipynb) >