#!/usr/bin/env python
# coding: utf-8
#
# *This notebook contains material from [PyRosetta](https://RosettaCommons.github.io/PyRosetta.notebooks);
# content is available [on Github](https://github.com/RosettaCommons/PyRosetta.notebooks.git).*
#
# < [Running Rosetta in Parallel](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.00-Running-PyRosetta-in-Parallel.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Distributed computation example: miniprotein design](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.02-PyData-miniprotein-design.ipynb) >
# # Distributed analysis example: exhaustive ddG PSSM
#
# ## Notes
# This tutorial will walk you through how to generate an exhaustive ddG PSSM in PyRosetta using the PyData stack for analysis and distributed computing.
#
# This Jupyter notebook uses parallelization and is not meant to be executed within a Google Colab environment.
#
# ## Setup
# Please see setup instructions in Chapter 16.00
#
# ## Citation
# [Integration of the Rosetta Suite with the Python Software Stack via reproducible packaging and core programming interfaces for distributed simulation](https://doi.org/10.1002/pro.3721)
#
# Alexander S. Ford, Brian D. Weitzner, Christopher D. Bahl
#
# ## Manual
# Documentation for the `pyrosetta.distributed` namespace can be found here: https://nbviewer.jupyter.org/github/proteininnovation/Rosetta-PyData_Integration/blob/master/distributed_overview.ipynb
# In[ ]:
get_ipython().system('pip install pyrosettacolabsetup')
import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta()
import pyrosetta; pyrosetta.init()
# In[ ]:
import logging
logging.basicConfig(level=logging.INFO)
# In[ ]:
import pandas
import seaborn
import matplotlib
# In[ ]:
import Bio.SeqUtils
import Bio.Data.IUPACData as IUPACData
# In[ ]:
import pyrosetta
import pyrosetta.distributed.io as io
import pyrosetta.distributed.packed_pose as packed_pose
import pyrosetta.distributed.tasks.rosetta_scripts as rosetta_scripts
import pyrosetta.distributed.tasks.score as score
# In[ ]:
import os,sys,platform
platform.python_version()
# ## Create test pose, initialize rosetta and pack
# In[ ]:
input_protocol = """
"""
# In[ ]:
input_relax = rosetta_scripts.SingleoutputRosettaScriptsTask(input_protocol)
# Syntax check via setup
input_relax.setup()
# In[ ]:
raw_input_pose = score.ScorePoseTask()(io.pose_from_sequence("TESTESTEST"))
input_pose = input_relax(raw_input_pose)
# ## Perform exhaustive point mutation and pack
# In[ ]:
def mutate_residue(input_pose, res_index, new_aa, res_label = None):
import pyrosetta.rosetta.core.pose as pose
work_pose = packed_pose.to_pose(input_pose)
# Annotate strucure with reslabel, for use in downstream protocol
# Add parameters as score, for use in downstream analysis
if res_label:
work_pose.pdb_info().add_reslabel(res_index, res_label)
pose.setPoseExtraScore(work_pose, "mutation_index", res_index)
pose.setPoseExtraScore(work_pose, "mutation_aa", new_aa)
if len(new_aa) == 1:
new_aa = str.upper(Bio.SeqUtils.seq3(new_aa))
assert new_aa in map(str.upper, IUPACData.protein_letters_3to1)
protocol = """
""" % locals()
return rosetta_scripts.SingleoutputRosettaScriptsTask(protocol)(work_pose)
# In[ ]:
refine = """
"""
refine_mutation = rosetta_scripts.SingleoutputRosettaScriptsTask(refine)
# # Mutation and pack
# ## Job distribution via `multiprocessing`
# In[ ]:
from multiprocessing import Pool
import itertools
with pyrosetta.distributed.utility.log.LoggingContext(logging.getLogger("rosetta"), level=logging.WARN):
with Pool() as p:
work = [
(input_pose, i, aa, "mutation")
for i, aa in itertools.product(range(1, len(packed_pose.to_pose(input_pose).residues) + 1), IUPACData.protein_letters)
]
logging.info("mutating")
mutations = p.starmap(mutate_residue, work)
# ## Job distribution via `dask`
# In[ ]:
if not os.getenv("DEBUG"):
import dask.distributed
cluster = dask.distributed.LocalCluster(n_workers=1, threads_per_worker=1)
client = dask.distributed.Client(cluster)
refinement_tasks = [client.submit(refine_mutation, mutant) for mutant in mutations]
logging.info("refining")
refinements = [task.result() for task in refinement_tasks]
client.close()
cluster.close()
# ## Analysis of delta score
# In[ ]:
if not os.getenv("DEBUG"):
result_frame = pandas.DataFrame.from_records(packed_pose.to_dict(refinements))
result_frame["delta_total_score"] = result_frame["total_score"] - input_pose.scores["total_score"]
result_frame["mutation_index"] = list(map(int, result_frame["mutation_index"]))
# In[ ]:
if not os.getenv("DEBUG"):
matplotlib.rcParams['figure.figsize'] = [24.0, 8.0]
seaborn.heatmap(
result_frame.pivot("mutation_aa", "mutation_index", "delta_total_score"),
cmap="RdBu_r", center=0, vmax=50)
# In[ ]:
#
# < [Running Rosetta in Parallel](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.00-Running-PyRosetta-in-Parallel.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Distributed computation example: miniprotein design](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/16.02-PyData-miniprotein-design.ipynb) >