# For space reasons,I'm not showing the implementations of these # imported functions from index_edit import kEditDp, Index, partition def queryIndexEdit(p, t, k, index): ''' Look for occurrences of p in t with up to k edits using an index combined with dynamic-programming alignment. ''' l = index.ln occurrences = [] seen = set() # for avoiding reporting same hit twice for part, poff in partition(p, k+1): for hit in index.occurrences(part): # query index w/ partition # left edge of T to include in DP matrix lf = max(0, hit - poff - k) # right edge of T to include in DP matrix rt = min(len(t), hit - poff + len(p) + k) mn, off, xcript = kEditDp(p, t[lf:rt]) off += lf if mn <= k and (mn, off) not in seen: occurrences.append((mn, off, xcript)) seen.add((mn, off)) return occurrences t = 'I had two banana splits' ind = Index(t, ln=4) queryIndexEdit('bananasplit', t, 1, ind) t = 'haystack needle haystack noodle haystack nedle haystack' # needle noodle ne-dle # |||||| | ||| || ||| # needle needle needle p = 'needle' # Find the two occurrences that are within edit distance 1 # The substring length for the index has to be <= 3 (why?) queryIndexEdit(p, t, 1, Index(t, ln=3)) # Find the three occurrences that are within edit distance 2 # The substring length for the index has to be <= 2 (why?) queryIndexEdit('needle', t, 2, Index(t, ln=2))