f = open('input/blosum62_affine.txt', 'r')
score = [line.strip().split() for line in f]
alphabet = score[0]
blosum62 = [i[1:] for i in score[1:]]
blosum62[alphabet.index('R')][alphabet.index('R')]
'5'
#!/usr/bin/env python
'''A Bioinformatics Algorithms script containing scoring matrices.'''
import os
class BLOSUM62(object):
"""The BLOSUM62 scoring matrix class."""
def __init__(self):
"""Initialize the scoring matrix."""
with open(os.path.join(os.path.dirname(__file__), 'input/BLOSUM62.txt')) as input_data:
items = [line.strip().split() for line in input_data.readlines()]
self.scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}
def __getitem__(self, pair):
"""Returns the score of the given pair of protein."""
return self.scoring_matrix[pair[0], pair[1]]
class PAM250(object):
"""The PAM250 scoring matrix class."""
def __init__(self):
"""Initialize the scoring matrix."""
with open(os.path.join(os.path.dirname(__file__), 'data/PAM250.txt')) as input_data:
items = [line.strip().split() for line in input_data.readlines()]
self.scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items}
def __getitem__(self, pair):
"""Returns the score of the given pair of protein."""
return self.scoring_matrix[pair[0], pair[1]]
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-13-63b2c8c90850> in <module>() ----> 1 BLOSUM62() <ipython-input-11-bc2495e10efd> in __init__(self) 10 def __init__(self): 11 """Initialize the scoring matrix.""" ---> 12 with open(os.path.join(os.path.dirname(__file__), 'data/BLOSUM62.txt')) as input_data: 13 items = [line.strip().split() for line in input_data.readlines()] 14 self.scoring_matrix = {(item[0], item[1]):int(item[2]) for item in items} NameError: global name '__file__' is not defined
def global_alignment_affine_gap_penalty(v, w, scoring_matrix, sigma, epsilon):
'''Returns the global alignment score of v and w with constant gap peantaly sigma subject to the scoring_matrix.'''
# Initialize the matrices.
S = [[[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)] for k in xrange(3)]
backtrack = [[[0 for j in xrange(len(w)+1)] for i in xrange(len(v)+1)] for k in xrange(3)]
# Initialize the edges with the given penalties.
for i in xrange(1, len(v)+1):
S[0][i][0] = -sigma - (i-1)*epsilon
S[1][i][0] = -sigma - (i-1)*epsilon
S[2][i][0] = -10*sigma
for j in xrange(1, len(w)+1):
S[2][0][j] = -sigma - (j-1)*epsilon
S[1][0][j] = -sigma - (j-1)*epsilon
S[0][0][j] = -10*sigma
# Fill in the scores for the lower, middle, upper, and backtrack matrices.
for i in xrange(1, len(v)+1):
for j in xrange(1, len(w)+1):
lower_scores = [S[0][i-1][j] - epsilon, S[1][i-1][j] - sigma]
S[0][i][j] = max(lower_scores)
backtrack[0][i][j] = lower_scores.index(S[0][i][j])
upper_scores = [S[2][i][j-1] - epsilon, S[1][i][j-1] - sigma]
S[2][i][j] = max(upper_scores)
backtrack[2][i][j] = upper_scores.index(S[2][i][j])
middle_scores = [S[0][i][j], S[1][i-1][j-1] + scoring_matrix[v[i-1], w[j-1]], S[2][i][j]]
S[1][i][j] = max(middle_scores)
backtrack[1][i][j] = middle_scores.index(S[1][i][j])
# Initialize the values of i, j and the aligned sequences.
i,j = len(v), len(w)
v_aligned, w_aligned = v, w
# Get the maximum score, and the corresponding backtrack starting position.
matrix_scores = [S[0][i][j], S[1][i][j], S[2][i][j]]
max_score = max(matrix_scores)
backtrack_matrix = matrix_scores.index(max_score)
# Quick lambda function to insert indels.
insert_indel = lambda word, i: word[:i] + '-' + word[i:]
# Backtrack to the edge of the matrix starting bottom right.
while i*j != 0:
if backtrack_matrix == 0: # Lower backtrack matrix conditions.
if backtrack[0][i][j] == 1:
backtrack_matrix = 1
i -= 1
w_aligned = insert_indel(w_aligned, j)
elif backtrack_matrix == 1: # Middle backtrack matrix conditions.
if backtrack[1][i][j] == 0:
backtrack_matrix = 0
elif backtrack[1][i][j] == 2:
backtrack_matrix = 2
else:
i -= 1
j -= 1
else: # Upper backtrack matrix conditions.
if backtrack[2][i][j] == 1:
backtrack_matrix = 1
j -= 1
v_aligned = insert_indel(v_aligned, i)
# Prepend the necessary preceeding indels to get to (0,0).
for _ in xrange(i):
w_aligned = insert_indel(w_aligned, 0)
for _ in xrange(j):
v_aligned = insert_indel(v_aligned, 0)
return str(max_score), v_aligned, w_aligned
v = 'PRTEINS'
w = 'PRTWPSEIN'
global_alignment_affine_gap_penalty(v, w, BLOSUM62, 11, 1)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-23-1b8f717a7bbe> in <module>() 1 v = 'PRTEINS' 2 w = 'PRTWPSEIN' ----> 3 global_alignment_affine_gap_penalty(v, w, BLOSUM62, 11, 1) <ipython-input-22-7e38bfcb39f2> in global_alignment_affine_gap_penalty(v, w, scoring_matrix, sigma, epsilon) 26 backtrack[2][i][j] = upper_scores.index(S[2][i][j]) 27 ---> 28 middle_scores = [S[0][i][j], S[1][i-1][j-1] + scoring_matrix[v[i-1], w[j-1]], S[2][i][j]] 29 S[1][i][j] = max(middle_scores) 30 backtrack[1][i][j] = middle_scores.index(S[1][i][j]) TypeError: 'type' object has no attribute '__getitem__'