pip install pdb-profiling
from tqdm import tqdm
from pdb_profiling import default_config
your_output_folder = "C:/GitWorks/pdb-profiling/test/demo"
default_config(your_output_folder)
from pdb_profiling.processors import SIFTS, SIFTSs, PDB, PDBs
from pdb_profiling.utils import DisplayPDB
SIFTS.chain_filter, SIFTS.entry_filter = '', ''
demo = SIFTS('P21359')
pdb_id
: PDB Entry IDentity_id
: the entity identifier of a PDB Entity; for what is PDB Entity? please look at this linkchain_id
: the chain identifier of a PDB Chainstruct_asym_id
: the chain identifier of a PDB Chain (unique across all PDB Entity)Entry
: UniProt Entry IDUniProt
: UniProt Isoform IDis_canonical
: whether the UniProt Isoform is the canonical sequence of that UniProt Entryidentity
: sequence identity between the corresponding PDB Chain's Sequence(complete SEQRES) and UniProt Isoform's Sequence unp_range
: mapped range of the UniProt Isoform's Sequence with its corresponding PDB Chain's Sequence (Index from 1)pdb_range
: mapped range of the PDB Chain's Sequence Sequence with its corresponding UniProt Isoform's (Index from 1)new_unp_range
: fixed(deal with InDel) mapped range of the UniProt Isoform's Sequence with its corresponding PDB Chain's Sequence (Index from 1)new_pdb_range
: fixed(deal with InDel) mapped range of the PDB Chain's Sequence Sequence with its corresponding UniProt Isoform's (Index from 1)conflict_pdb_range
: the chain's residue index of residue confilct with UniProt isoform sequence in the mapped range(Index from 1)select_tag
: whether in the recommanded representative setselect_rank
: the rank among all the chains (1st denoted as the best)%time df1 = demo.pipe_select_mo().result()
# NOTE: df1里的所有结果是该UniProt匹配上的所有PDB链
df1
# NOTE: 程序推荐的是select_tag为True的,根据一系列打分排名; 同时考虑了覆盖范围,尽多选择覆盖完该UniProt Isoform序列的结构
Interactome3D
MetaData¶下面这里的代码只需要运行一次,以后再也不用运行,包括重启、重新打开代码文件时也是不用再运行;
from pdb_profiling.processors.i3d.api import Interactome3D
Interactome3D.pipe_init_interaction_meta().result()
# NOTE: 如果未运行过如下代码,可以把#号注释去掉
# Only need to run once
# from pdb_profiling.processors.i3d.api import Interactome3D
# Interactome3D.pipe_init_interaction_meta().result()
Interaction metadata comes from
Interactome3D
_1
: denoted as the partner chain 1_2
: denoted as the partner chain 2assembly_id
model_id
: the model ID of the chain in the corresponding biological assembly PDB format fileunp_range_DSC
: the Dice Similarity Coefficient of new_unp_range_1
& new_unp_range_2
interface_range_1
: the range of the interaction's interface in the aspect of partner1 chain (Index from 1)interface_range_2
: the range of the interaction's interface in the aspect of partner2 chain (Index from 1)unp_interface_range_1
: the range of the interaction's interface in the aspect of partner1 chain (mapped to the UniProt Isoforom)unp_interface_range_2
: the range of the interaction's interface in the aspect of partner2 chain (mapped to the UniProt Isoforom)i_select_tag
: whether in the recommanded interaction representative seti_select_rank
: the rank among all the interacting-chains (1st denoted as the best)%time df2 = demo.pipe_select_ho(run_as_completed=True, progress_bar=tqdm).result()
# NOTE: df2里的所有结果是该UniProt匹配上的所有同聚体相互作用链;每一行就是一对相互作用 (同属于一个蛋白的两条链相互作用);
df2
# NOTE: 程序推荐的是i_select_tag为True的,根据一系列打分排名; 同时考虑了interface覆盖范围,尽多选择各种类型同聚体相互作用结构
Interaction metadata comes from
Interactome3D
i_group
: the Heteromeric Interaction Group# NOTE: 给出目标蛋白的相互作用蛋白
%time df3 = demo.pipe_select_he(run_as_completed=True, progress_bar=tqdm).result()
# NOTE: df3里的所有结果是该UniProt匹配上的所有异聚体相互作用链;每一行就是一对相互作用 (属于两个不同个蛋白的两条链相互作用);
df3
# NOTE: 程序推荐的是i_select_tag为True的,根据一系列打分排名; 同时考虑了interface覆盖范围,尽多选择同一对异聚体相互作用下的各种相互作用结构
# NOTE: 输入的UniProt Isoform可能是UniProt_1也可能是UniProt_2
# NOTE: 同一对异聚体相互作用可用i_group认定
summary: pipe_select_mo是给出目标蛋白的单体信息;pipe_select_ho、pipe_select_he分别给出目标蛋白的同聚体相互作用和异聚体相互作用蛋白相关信息;和mo给出的列信息多数一样但是有两个蛋白
df1[df1.select_tag.eq(True)]
# NOTE: 选定一个UniProt Isoform与PDB chain的对应关系
# NOTE: 在df1的select_tag为true的PDB链里选择覆盖了你的突变的结构; 有没有覆盖你可以看new_unp_range这个区间有没有覆盖你的突变位置
record = df1.loc[12]
# show it
record
# NOTE: 简单查看结构静态图
DisplayPDB(True).show(record['pdb_id'])
unp_residue_number
: residue index in the aspect of the UniProt Isoform's Sequence (Index from 1)residue_number
: residue index in the aspect of the PDB Chain's Sequence (SEQRES, Index from 1)author_residue_number
: residue index in the aspect of the PDB Chain's Sequence but assigned by the author of this PDB Entrysummary: unp_residue_number就是unp对应序列的从1开始计数的索引位置; resiude_number是对pdb链从1开始计数的索引; author_residue_numer是pdb文件作者定义的索引
因为unp_residue_number和author_residue_numer可能会不一致,而多数软件需要author_residue_number作为输入,所以要将位点统一转为author_residue_nume再进行使用; (i.e. unp上的12号位在pdb链上不一定是12号)
一般没法事先知道pdb与unp的标号是否一致,这是一个索引映射的问题;比如unp是100长度,索引就是1,2,..100 而与这个unp匹配上的PDB晶体结构,它的对应匹配上的链是长度为91;作者给这条链的标号是66,67,...156
PDB(record['pdb_id']).get_expanded_map_res_df(
record['UniProt'],
record['new_unp_range'],
record['new_pdb_range'],
struct_asym_id=record['struct_asym_id']).result()
Example: 原突变(p.P1836R) $\rightarrow$ 于3p7z的B链中变为(P1815R) $\rightarrow$ 作为foldx
的突变位置输入变为(PB1815R)
在这里关注的是1836$\rightarrow$1815的转变
当然同时可以通过residue_name
列或df1的conflict_pdb_range
列确认142位是不是氨基酸PRO
WARNING: 如果你的位点的author_insertion_code非空或者author_residue_number小于等于0, 请联系学委, 须进行额外步骤
下面展示如果有多个UniProt Isoform的批量处理方法
# optional
res = SIFTSs(('Q00987-2', 'Q00987-10', 'O15350')).fetch('pipe_select_mo').run(tqdm).result()
# NOTE: 传入tqdm与否不影响运行
# NOTE: res is a list and the order of the result can be different from the original input order
res[0]
res[1]