#!/usr/bin/env python # coding: utf-8 # In[43]: def exampleCost(xc, yc, zc): """ Cost function assigning the score of a column of the alignment matrix is equal to 1 if all of the column's symbols are identical, and 0 if even one symbol disagrees """ if xc == yc == zc: return 1 #if xc == '-' or yc == '-' or zc == '-': return 0 else: return 0 # In[46]: import numpy as np def multipleSequenceAlignment(v, w, u, s): arr3d = np.zeros((len(v)+1, len(w)+1, len(u)+1), dtype = 'int') for i in xrange(1, len(v) + 1): for j in xrange(1, len(w) + 1): for k in xrange(1, len(u) + 1): arr3d[i,j,k] = max(arr3d[i-1, j, k] + s(v[i-1], '-', '-'), arr3d[i, j-1, k] + s('-', w[j-1], '-'), arr3d[i, j, k-1] + s('-', '-', u[k-1]), arr3d[i-1, j-1, k] + s(v[i-1], w[j-1], '-'), arr3d[i-1, j, k-1] + s(v[i-1], '-', u[k-1]), arr3d[i, j-1, k-1] + s('-', w[j-1], u[k-1]), arr3d[i-1, j-1, k-1] + s(v[i-1], w[j-1], u[k-1])) return arr3d # In[48]: v, w, u = 'ATATCCG', 'TCCGA', 'ATGTACTG' arr3d = multipleSequenceAlignment(v, w, u, exampleCost) print arr3d # In[130]: def traceback_MSA(arr3d, v, w, u, s): i,j,k = len(v), len(w), len(u) alv, alw, alu = [], [], [] while (i,j,k) != (0,0,0): max_score = max(arr3d[i-1, j, k] + s(v[i-1], '-', '-'), arr3d[i, j-1, k] + s('-', w[j-1], '-'), arr3d[i, j, k-1] + s('-', '-', u[k-1]), arr3d[i-1, j-1, k] + s(v[i-1], w[j-1], '-'), arr3d[i-1, j, k-1] + s(v[i-1], '-', u[k-1]), arr3d[i, j-1, k-1] + s('-', w[j-1], u[k-1]), arr3d[i-1, j-1, k-1] + s(v[i-1], w[j-1], u[k-1])) print max_score if arr3d[i, j-1, k-1] + s('-', w[j-1], u[k-1]) == max_score: alv.append('-'); alw.append(w[j-1]); alu.append(u[k-1]) j -= 1; k -=1 elif arr3d[i-1, j-1, k-1] + s(v[i-1], w[j-1], u[k-1]) == max_score: alv.append(v[i-1]); alw.append(w[j-1]); alu.append(u[k-1]) i -= 1; j -= 1; k -= 1 elif arr3d[i, j-1, k] + s('-', w[j-1], '-') == max_score: alv.append('-'); alw.append(w[j-1]); alu.append('-') j -= 1 elif arr3d[i-1, j-1, k] + s(v[i-1], w[j-1], '-') == max_score: alv.append(v[i-1]); alw.append(w[j-1]); alu.append('-') i -= 1; j -= 1 elif arr3d[i-1, j, k] + s(v[i-1], '-', '-') == max_score: alv.append(v[i-1]); alw.append('-'); alu.append('-') i -= 1 elif arr3d[i, j, k-1] + s('-', '-', u[k-1]) == max_score: alv.append('-'); alw.append('-'); alu.append(u[k-1]) k -= 1 elif arr3d[i-1, j, k-1] + s(v[i-1], '-', u[k-1]) == max_score: alv.append(v[i-1]); alw.append('-'); alu.append(u[k-1]) i-=1; k-=1 print alv, alw, alu alignment = map(lambda x: ''.join(x), [alv[::-1], alw[::-1], alu[::-1]]) return alignment # In[131]: v, w, u = 'ATATCCG', 'TCCGA', 'ATGTACTG' arr3d = multipleSequenceAlignment(v, w, u, exampleCost) algn = traceback_MSA(arr3d, v, w, u, exampleCost) print algn # In[59]: len(u) # In[70]: 'a' == 'a' == 'a' # In[ ]: