# Suffix tree built with simple O(m^2)-time algorithm.
class SuffixTree(object):
class Node(object):
def __init__(self, depth, off, ln, lab=None):
self.depth = depth
self.off = off # offset into T of substring
self.ln = ln # length of substring
self.out = {} # outgoing edges; maps characters to nodes
def __init__(self, t):
""" Make suffix tree, without suffix links, from s in quadratic time
and linear space """
t += '$'
self.t = t
self.root = self.Node(0, 0, 0, None)
self.root.out[t[0]] = self.Node(len(t), 0, len(t), t)
self.nodes = []
for i in range(1, len(t)):
cur = self.root
j = i
while j < len(t):
if t[j] in cur.out:
child = cur.out[t[j]]
lab = t[child.off:child.off+child.ln]
k = j+1 # Walk along edge
while k-j < len(lab) and t[k] == lab[k-j]:
k += 1
if k-j == len(lab):
cur = child # exhausted the edge
j = k
else:
# fell off in middle of edge
cExist, cNew = lab[k-j], t[k]
mid = self.Node(cur.depth + k-j, child.off, k-j, lab[:k-j])
mid.out[cNew] = self.Node(mid.depth + len(t[k:]), k, len(t[k:]), t[k:])
self.nodes.append(mid)
self.nodes.append(mid.out[cNew])
mid.out[cExist] = child
child.off += (k-j)
child.ln -= (k-j)
cur.out[t[j]] = mid
else:
# Create a new edge hanging off of this node
cur.out[t[j]] = self.Node(cur.depth + len(t[j:]), j, len(t[j:]), t[j:])
self.nodes.append(cur.out[t[j]])
def saLcp(self):
# Return suffix array and an LCP1 array corresponding to this
# suffix tree. self.root is root, self.t is the text.
self.minSinceLeaf = 0
sa, lcp1 = [], []
def __visit(n):
if len(n.out) == 0:
# leaf node, record offset and LCP1 with previous leaf
sa.append(len(self.t) - n.depth)
lcp1.append(self.minSinceLeaf)
# reset LCP1 to depth of this leaf
self.minSinceLeaf = n.depth
# visit children in lexicographical order
for c, child in sorted(n.out.items()):
__visit(child)
# after each child visit, perhaps decrease
# minimum-depth-since-last-leaf value
self.minSinceLeaf = min(self.minSinceLeaf, n.depth)
__visit(self.root)
return sa, lcp1[1:]
# example from the lecture notes
st = SuffixTree('abaaba')
sa, lcp1 = st.saLcp()
sa, lcp1
([6, 5, 2, 3, 0, 4, 1], [0, 1, 1, 3, 0, 2])