def exampleCost(xc, yc, zc):
""" Cost function assigning the score of a column of the alignment matrix is equal to 1
if all of the column's symbols are identical, and 0 if even one symbol disagrees """
if xc == yc == zc: return 1
#if xc == '-' or yc == '-' or zc == '-': return 0
else: return 0
import numpy as np
def multipleSequenceAlignment(v, w, u, s):
arr3d = np.zeros((len(v)+1, len(w)+1, len(u)+1), dtype = 'int')
for i in xrange(1, len(v) + 1):
for j in xrange(1, len(w) + 1):
for k in xrange(1, len(u) + 1):
arr3d[i,j,k] = max(arr3d[i-1, j, k] + s(v[i-1], '-', '-'),
arr3d[i, j-1, k] + s('-', w[j-1], '-'),
arr3d[i, j, k-1] + s('-', '-', u[k-1]),
arr3d[i-1, j-1, k] + s(v[i-1], w[j-1], '-'),
arr3d[i-1, j, k-1] + s(v[i-1], '-', u[k-1]),
arr3d[i, j-1, k-1] + s('-', w[j-1], u[k-1]),
arr3d[i-1, j-1, k-1] + s(v[i-1], w[j-1], u[k-1]))
return arr3d
v, w, u = 'ATATCCG', 'TCCGA', 'ATGTACTG'
arr3d = multipleSequenceAlignment(v, w, u, exampleCost)
print arr3d
[[[0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0]] [[0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 0 0 0 0 0 0 0 0] [0 1 1 1 1 1 1 1 1]] [[0 0 0 0 0 0 0 0 0] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 1 1 1 1 1 1 1 1]] [[0 0 0 0 0 0 0 0 0] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 1 1 1 1 2 2 2 2]] [[0 0 0 0 0 0 0 0 0] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 1 1 1] [0 1 1 1 1 2 2 2 2]] [[0 0 0 0 0 0 0 0 0] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 2 2 2] [0 0 1 1 1 1 2 2 2] [0 0 1 1 1 1 2 2 2] [0 1 1 1 1 2 2 2 2]] [[0 0 0 0 0 0 0 0 0] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 2 2 2] [0 0 1 1 1 1 2 2 2] [0 0 1 1 1 1 2 2 2] [0 1 1 1 1 2 2 2 2]] [[0 0 0 0 0 0 0 0 0] [0 0 1 1 1 1 1 1 1] [0 0 1 1 1 1 2 2 2] [0 0 1 1 1 1 2 2 2] [0 0 1 2 2 2 2 2 3] [0 1 1 2 2 2 2 2 3]]]
def traceback_MSA(arr3d, v, w, u, s):
i,j,k = len(v), len(w), len(u)
alv, alw, alu = [], [], []
while (i,j,k) != (0,0,0):
max_score = max(arr3d[i-1, j, k] + s(v[i-1], '-', '-'),
arr3d[i, j-1, k] + s('-', w[j-1], '-'),
arr3d[i, j, k-1] + s('-', '-', u[k-1]),
arr3d[i-1, j-1, k] + s(v[i-1], w[j-1], '-'),
arr3d[i-1, j, k-1] + s(v[i-1], '-', u[k-1]),
arr3d[i, j-1, k-1] + s('-', w[j-1], u[k-1]),
arr3d[i-1, j-1, k-1] + s(v[i-1], w[j-1], u[k-1]))
print max_score
if arr3d[i, j-1, k-1] + s('-', w[j-1], u[k-1]) == max_score:
alv.append('-'); alw.append(w[j-1]); alu.append(u[k-1])
j -= 1; k -=1
elif arr3d[i-1, j-1, k-1] + s(v[i-1], w[j-1], u[k-1]) == max_score:
alv.append(v[i-1]); alw.append(w[j-1]); alu.append(u[k-1])
i -= 1; j -= 1; k -= 1
elif arr3d[i, j-1, k] + s('-', w[j-1], '-') == max_score:
alv.append('-'); alw.append(w[j-1]); alu.append('-')
j -= 1
elif arr3d[i-1, j-1, k] + s(v[i-1], w[j-1], '-') == max_score:
alv.append(v[i-1]); alw.append(w[j-1]); alu.append('-')
i -= 1; j -= 1
elif arr3d[i-1, j, k] + s(v[i-1], '-', '-') == max_score:
alv.append(v[i-1]); alw.append('-'); alu.append('-')
i -= 1
elif arr3d[i, j, k-1] + s('-', '-', u[k-1]) == max_score:
alv.append('-'); alw.append('-'); alu.append(u[k-1])
k -= 1
elif arr3d[i-1, j, k-1] + s(v[i-1], '-', u[k-1]) == max_score:
alv.append(v[i-1]); alw.append('-'); alu.append(u[k-1])
i-=1; k-=1
print alv, alw, alu
alignment = map(lambda x: ''.join(x), [alv[::-1], alw[::-1], alu[::-1]])
return alignment
v, w, u = 'ATATCCG', 'TCCGA', 'ATGTACTG'
arr3d = multipleSequenceAlignment(v, w, u, exampleCost)
algn = traceback_MSA(arr3d, v, w, u, exampleCost)
print algn
3 ['-'] ['A'] ['-'] 3 ['-', 'G'] ['A', 'G'] ['-', 'G'] 2 ['-', 'G', '-'] ['A', 'G', 'C'] ['-', 'G', 'T'] 2 ['-', 'G', '-', 'C'] ['A', 'G', 'C', 'C'] ['-', 'G', 'T', 'C'] 1 ['-', 'G', '-', 'C', 'C'] ['A', 'G', 'C', 'C', '-'] ['-', 'G', 'T', 'C', '-'] 1 ['-', 'G', '-', 'C', 'C', 'T'] ['A', 'G', 'C', 'C', '-', '-'] ['-', 'G', 'T', 'C', '-', '-'] 1 ['-', 'G', '-', 'C', 'C', 'T', 'A'] ['A', 'G', 'C', 'C', '-', '-', '-'] ['-', 'G', 'T', 'C', '-', '-', '-'] 1 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-'] ['A', 'G', 'C', 'C', '-', '-', '-', '-'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A'] 1 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-', 'T'] ['A', 'G', 'C', 'C', '-', '-', '-', '-', 'T'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A', 'T'] 1 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-', 'T', '-'] ['A', 'G', 'C', 'C', '-', '-', '-', '-', 'T', 'A'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A', 'T', 'G'] 1 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-', 'T', '-', '-'] ['A', 'G', 'C', 'C', '-', '-', '-', '-', 'T', 'A', '-'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A', 'T', 'G', 'T'] 0 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-', 'T', '-', '-', '-'] ['A', 'G', 'C', 'C', '-', '-', '-', '-', 'T', 'A', '-', 'G'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A', 'T', 'G', 'T', 'A'] 0 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-', 'T', '-', '-', '-', '-'] ['A', 'G', 'C', 'C', '-', '-', '-', '-', 'T', 'A', '-', 'G', 'C'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A', 'T', 'G', 'T', 'A', 'G'] 0 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-', 'T', '-', '-', '-', '-', '-'] ['A', 'G', 'C', 'C', '-', '-', '-', '-', 'T', 'A', '-', 'G', 'C', 'C'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A', 'T', 'G', 'T', 'A', 'G', 'T'] 0 ['-', 'G', '-', 'C', 'C', 'T', 'A', '-', 'T', '-', '-', '-', '-', '-', '-'] ['A', 'G', 'C', 'C', '-', '-', '-', '-', 'T', 'A', '-', 'G', 'C', 'C', 'T'] ['-', 'G', 'T', 'C', '-', '-', '-', 'A', 'T', 'G', 'T', 'A', 'G', 'T', 'C']
--------------------------------------------------------------------------- IndexError Traceback (most recent call last) <ipython-input-131-d26f35c10e3d> in <module>() 1 v, w, u = 'ATATCCG', 'TCCGA', 'ATGTACTG' 2 arr3d = multipleSequenceAlignment(v, w, u, exampleCost) ----> 3 algn = traceback_MSA(arr3d, v, w, u, exampleCost) 4 print algn <ipython-input-130-e00898d95f52> in traceback_MSA(arr3d, v, w, u, s) 4 while (i,j,k) != (0,0,0): 5 max_score = max(arr3d[i-1, j, k] + s(v[i-1], '-', '-'), ----> 6 arr3d[i, j-1, k] + s('-', w[j-1], '-'), 7 arr3d[i, j, k-1] + s('-', '-', u[k-1]), 8 arr3d[i-1, j-1, k] + s(v[i-1], w[j-1], '-'), IndexError: string index out of range
len(u)
8
'a' == 'a' == 'a'
True