import collections
import json
from collatex import collate
from tf.app import use
BASE = "~/github/among/fusus"
VERSION = "0.7"
LK = "LK"
AF = "AF"
EDITIONS = {
LK: "Lakhnawi",
AF: "Afifi",
}
A = {}
F = {}
maxSlot = {}
for (acro, name) in EDITIONS.items():
A[acro] = use(f"among/fusus/tf/{name}:clone", writing="ara", version=VERSION)
F[acro] = A[acro].api.F
maxSlot[acro] = F[acro].otype.maxSlot
maxSlot
This is Text-Fabric 9.1.3 Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html 27 features found and 0 ignored
This is Text-Fabric 9.1.3 Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html 17 features found and 0 ignored
{'LK': 40379, 'AF': 40271}
getTextLK = F[LK].lettersn.v
getTextAF = F[AF].lettersn.v
maxLK = maxSlot[LK]
maxAF = maxSlot[AF]
First a small example.
tokensLK = [dict(t=f"{getTextLK(slot)} ", s=slot) for slot in range(1, 10)]
tokensAF = [dict(t=f"{getTextAF(slot)} ", s=slot) for slot in range(1, 10)]
data = dict(
witnesses=[
dict(id=LK, tokens=tokensLK),
dict(id=AF, tokens=tokensAF),
],
)
A[LK].indent(reset=True)
A[LK].info("Run collatex")
result = collate(data, output="json", segmentation=False, near_match=True)
resultAscii = collate(data, output="table", segmentation=False, near_match=True)
A[LK].info("Done")
0.00s Run collatex 0.01s Done
print(resultAscii)
+----+-----------+-------+-------+-----+------+-------+-----+------+-------+--------+--------+ | LK | - | - | ālḥmd | llh | mnzl | ālḥkm | ʿlá | ḳlwb | ālklm | bāḥdyŧ | ālṭryḳ | | AF | bnzlylālʿ | ylrʿā | ālḥmd | lh | mnzl | ālḥk | ʿlá | ḳlwb | ālklm | - | - | +----+-----------+-------+-------+-----+------+-------+-----+------+-------+--------+--------+
output = json.loads(result)["table"]
outputLK = output[0]
outputAF = output[1]
print(output[0])
print("=========")
print(output[1])
[None, None, [{'_sigil': 'LK', '_token_array_position': 0, 's': 1, 't': 'ālḥmd '}], [{'_sigil': 'LK', '_token_array_position': 1, 's': 2, 't': 'llh '}], [{'_sigil': 'LK', '_token_array_position': 2, 's': 3, 't': 'mnzl '}], [{'_sigil': 'LK', '_token_array_position': 3, 's': 4, 't': 'ālḥkm '}], [{'_sigil': 'LK', '_token_array_position': 4, 's': 5, 't': 'ʿlá '}], [{'_sigil': 'LK', '_token_array_position': 5, 's': 6, 't': 'ḳlwb '}], [{'_sigil': 'LK', '_token_array_position': 6, 's': 7, 't': 'ālklm '}], [{'_sigil': 'LK', '_token_array_position': 7, 's': 8, 't': 'bāḥdyŧ '}], [{'_sigil': 'LK', '_token_array_position': 8, 's': 9, 't': 'ālṭryḳ '}]] ========= [[{'_sigil': 'AF', '_token_array_position': 10, 's': 1, 't': 'bnzlylālʿ '}], [{'_sigil': 'AF', '_token_array_position': 11, 's': 2, 't': 'ylrʿā '}], [{'_sigil': 'AF', '_token_array_position': 12, 's': 3, 't': 'ālḥmd '}], [{'_sigil': 'AF', '_token_array_position': 13, 's': 4, 't': 'lh '}], [{'_sigil': 'AF', '_token_array_position': 14, 's': 5, 't': 'mnzl '}], [{'_sigil': 'AF', '_token_array_position': 15, 's': 6, 't': 'ālḥk '}], [{'_sigil': 'AF', '_token_array_position': 16, 's': 7, 't': 'ʿlá '}], [{'_sigil': 'AF', '_token_array_position': 17, 's': 8, 't': 'ḳlwb '}], [{'_sigil': 'AF', '_token_array_position': 18, 's': 9, 't': 'ālklm '}], None, None]
We need to turn the output into a clean alignment list.
def makeAlignment(result):
output = json.loads(result)["table"]
outputLK = output[0]
outputAF = output[1]
alignment = []
for (chunkLK, chunkAF) in zip(outputLK, outputAF):
if chunkLK is None:
iLK = ""
textLK = ""
else:
iLK = chunkLK[0]["s"]
textLK = chunkLK[0]["t"]
if chunkAF is None:
iAF = ""
textAF = ""
else:
iAF = chunkAF[0]["s"]
textAF = chunkAF[0]["t"]
alignment.append((iLK, textLK, textAF, iAF))
return alignment
alignment = makeAlignment(result)
alignment
[('', '', 'bnzlylālʿ ', 1), ('', '', 'ylrʿā ', 2), (1, 'ālḥmd ', 'ālḥmd ', 3), (2, 'llh ', 'lh ', 4), (3, 'mnzl ', 'mnzl ', 5), (4, 'ālḥkm ', 'ālḥk ', 6), (5, 'ʿlá ', 'ʿlá ', 7), (6, 'ḳlwb ', 'ḳlwb ', 8), (7, 'ālklm ', 'ālklm ', 9), (8, 'bāḥdyŧ ', '', ''), (9, 'ālṭryḳ ', '', '')]
How is the performance?
def test(size=None):
sizeLK = maxLK if size is None else size
sizeAF = maxAF if size is None else size
tokensLK = [dict(t=f"{getTextLK(slot)} ", s=slot) for slot in range(1, sizeLK)]
tokensAF = [dict(t=f"{getTextAF(slot)} ", s=slot) for slot in range(1, sizeAF)]
data = dict(
witnesses=[
dict(id=LK, tokens=tokensLK),
dict(id=AF, tokens=tokensAF),
],
)
A[LK].indent(reset=True)
A[LK].info("Run collatex")
result = collate(data, output="json", segmentation=False, near_match=True)
A[LK].info("collation done")
alignment = makeAlignment(result)
A[LK].info(f"postprocessing done. {len(alignment)} entries in alignment table")
return alignment
alignment = test(10)
0.00s Run collatex 0.00s collation done 0.00s postprocessing done. 11 entries in alignment table
alignment = test(100)
0.00s Run collatex 0.10s collation done 0.10s postprocessing done. 102 entries in alignment table
alignment = test(1000)
0.00s Run collatex 7.87s collation done 7.87s postprocessing done. 1039 entries in alignment table
alignment = test(2000)
0.00s Run collatex 34s collation done 34s postprocessing done. 2057 entries in alignment table
alignment = test(4000)
0.00s Run collatex 2m 44s collation done 2m 44s postprocessing done. 4095 entries in alignment table
The performance does not scale well. Our editions are 40,000 words each, so running Collatex on the full input will require 100 times as much time as this, probably over 5 hours.
In our case, we are sure that we do not have to compare every part of the one edition with every part of the other edition, which would require quadratic effort and which Collatex seems to be needing. A solution would be to divide the input in 100 word chunks and run Collatex repeatedly on pairs of chunks. But that would require quite subtle coding in order to avoid cases where variants occur at chunk boundaries.
We also do not get information about the closeness of the variants.
But how is the quality of the matching?
We apply the same method as we did after applying the algorithm of the
compareAFLk
notebook, with minor modifications.
We only can do it on the first 10% of the input, because we did not wait for those 5 hours.
def printLines(start=0, end=None):
if start < 0:
start = 0
if end is None or end > len(alignment):
end = len(alignment)
lines = []
for (iLK, left, right, iAF) in alignment[start:end]:
lines.append(f"{iLK:>5} {left:>20} @{0 if left == right else 1} {right:<20} {iAF:>5}")
return "\n".join(lines)
def printDiff(before, after):
print(printLines(start=len(alignment) - before))
lastLK = None
lastAF = None
for c in range(len(alignment) - 1, -1, -1):
comp = alignment[c]
if lastLK is None:
if comp[0]:
lastLK = comp[0]
if lastAF is None:
if comp[3]:
lastAF = comp[3]
if lastLK is not None and lastAF is not None:
break
if lastLK is not None and lastAF is not None:
for i in range(after):
iLK = lastLK + 1 + i
iAF = lastAF + 1 + i
textLK = getTextLK(iLK) if iLK <= maxLK else ""
textAF = getTextAF(iAF) if iAF <= maxAF else ""
print(f"{iLK:>5} = {textLK:>20} @{0 if textLK == textAF else 1} {textAF:<20} = {iAF:>5}")
# this number of good lines between bad lines will not lead to the
# interruption of bad stretches
LOOKAHEAD = 3
def analyseStretch(start, end):
total = 0
onlyLK = 0
onlyAF = 0
for (iLK, left, right, iAF) in alignment[start:end + 1]:
total += 1
if not iLK:
onlyAF += 1
if not iAF:
onlyLK += 1
suspect = onlyAF > 1 and onlyLK > 1 and onlyAF + onlyLK > 5
return suspect
def checkAlignment(lastLK, lastAF):
errors = {}
prevILK = 0
prevIAF = 0
where = collections.Counter()
agreement = collections.Counter()
badStretches = collections.defaultdict(lambda: [])
startBad = 0
for (c, (iLK, left, right, iAF)) in enumerate(alignment):
thisBad = not iLK or not iAF
# a good line between bad lines is counted as bad
if not thisBad and startBad:
nextGood = True
for j in range(1, LOOKAHEAD + 1):
if c + j < len(alignment):
compJ = alignment[c + j]
if not compJ[0] or not compJ[-1]:
nextGood = False
break
if not nextGood:
thisBad = True
if startBad:
if not thisBad:
badStretches[c - startBad].append(startBad)
startBad = 0
else:
if thisBad:
startBad = c
agreement[0 if left == right else 1] += 1
if iLK:
if iLK != prevILK + 1:
errors.setdefault("wrong iLK", []).append(f"{c:>5}: Expected {prevILK + 1}, found {iLK}")
prevILK = iLK
if iAF:
where["both"] += 1
else:
where[AF] += 1
if iAF:
if iAF != prevIAF + 1:
errors.setdefault("wrong iAF", []).append(f"{c:>5}: Expected {prevIAF + 1}, found {iAF}")
prevIAF = iAF
else:
where[LK] += 1
if startBad:
badStretches[len(alignment) - startBad].append(startBad)
if prevILK < lastLK:
errors.setdefault("missing iLKs at the end", []).append(f"last is {prevILK}, expected {lastLK}")
elif prevILK > lastLK:
errors.setdefault("too many iLKs at the end", []).append(f"last is {prevILK}, expected {lastLK}")
if prevIAF < lastAF:
errors.setdefault("missing iAFs at the end", []).append(f"last is {prevIAF}, expected {lastAF}")
elif prevIAF > lastAF:
errors.setdefault("too many iAFs at the end", []).append(f"last is {prevIAF}, expected {lastAF}")
print("\nSANITY\n")
if not errors:
print("All OK")
else:
for (kind, msgs) in errors.items():
print(f"ERROR {kind} ({len(msgs):>5}x):")
for msg in msgs[0:10]:
print(f"\t{msg}")
if len(msgs) > 10:
print(f"\t ... and {len(msgs) - 10} more ...")
print(f"\nAGREEMENT\n")
print("Where are the words?\n")
print(f"\t{LK}-only: {where[LK]:>5} slots")
print(f"\t{AF}-only: {where[AF]:>5} slots")
print(f"\tboth: {where['both']:>5} slots")
print("\nHow well is the agreement?\n")
for (d, n) in agreement.items():
print(f"dissimilarity? {d} : {n:>5} words")
print(f"\nBAD STRETCHES\n")
print("How many of which size?\n")
allSuspects = []
someBenigns = []
for (size, starts) in sorted(badStretches.items(), key=lambda x: (-x[0], x[1])):
suspects = {start: size for start in starts if analyseStretch(start, start + size)}
benigns = {start: size for start in starts if start not in suspects}
allSuspects.extend([(start, start + size) for (start, size) in suspects.items()])
someBenigns.extend([(start, start + size) for (start, size) in list(benigns.items())[0:3]])
examples = ", ".join(str(start) for start in list(suspects.keys())[0:3])
if not suspects:
examples = ", ".join(str(start) for start in list(benigns.keys())[0:3])
print(f"bad stretches of size {size:>3} : {len(suspects):>4} suspect of total {len(starts):>4} x see e.g. {examples}")
print(f"\nShowing all {len(allSuspects)} inversion suspects" if len(allSuspects) else "\nNo suspect bad stretches\n")
for (i, (start, end)) in enumerate(reversed(allSuspects)):
print(f"\nSUSPECT {i + 1:>2}")
print(printLines(max((1, start - 5)), min((len(alignment), end + 5))))
print(f"\nShowing some ({len(someBenigns)}) benign examples" if len(someBenigns) else "\nNo bad stretches\n")
for (i, (start, end)) in enumerate(someBenigns):
print(f"\nBENIGN {i + 1:>2}")
print(printLines(max((1, start - 2)), min((len(alignment), end + 2))))
checkAlignment(4000 - 1, 4000 - 1)
SANITY All OK AGREEMENT Where are the words? LK-only: 96 slots AF-only: 96 slots both: 3903 slots How well is the agreement? dissimilarity? 1 : 520 words dissimilarity? 0 : 3575 words BAD STRETCHES How many of which size? bad stretches of size 48 : 0 suspect of total 1 x see e.g. 4047 bad stretches of size 9 : 0 suspect of total 1 x see e.g. 458 bad stretches of size 8 : 0 suspect of total 1 x see e.g. 3103 bad stretches of size 5 : 0 suspect of total 5 x see e.g. 346, 431, 513 bad stretches of size 4 : 0 suspect of total 5 x see e.g. 636, 2897, 3146 bad stretches of size 3 : 0 suspect of total 10 x see e.g. 332, 356, 380 bad stretches of size 2 : 0 suspect of total 8 x see e.g. 501, 553, 799 bad stretches of size 1 : 0 suspect of total 72 x see e.g. 1, 14, 117 No suspect bad stretches Showing some (18) benign examples BENIGN 1 3998 mtnāh @0 mtnāh 3950 3999 wān @1 ān 3951 @1 kānt 3952 @1 trǧʿ 3953 @1 ālá 3954 @1 āṣwl 3955 @1 mtnāhyŧ 3956 @1 hy 3957 @1 āmhāt 3958 @1 ālāsmāʾ 3959 @1 āw 3960 @1 ḥḍrāt 3961 @1 ālāsmāʾ 3962 @1 wʿlá 3963 @1 ālḥḳyḳŧ 3964 @1 fmā 3965 @1 ṯm 3966 @1 ālā 3967 @1 ḥḳyḳŧ 3968 @1 wāḥdŧ 3969 @1 tḳbl 3970 @1 ǧmyʿ 3971 @1 hḏh 3972 @1 ālnsb 3973 @1 wālāḍāfāt 3974 @1 ālty 3975 @1 ykná 3976 @1 ʿnhā 3977 @1 bālāsmāʾ 3978 @1 ālālhyŧ 3979 @1 wālḥḳyḳŧ 3980 @1 tʿṭy 3981 @1 ān 3982 @1 ykwn 3983 @1 lkl 3984 @1 āsm 3985 @1 yẓhr 3986 @1 ālá 3987 @1 mā 3988 @1 lā 3989 @1 ytnāhá 3990 @1 ḥḳyḳŧ 3991 @1 ytmyz 3992 @1 bhā 3993 @1 ʿn 3994 @1 āsm 3995 @1 āḫr 3996 @1 tlk 3997 @1 ālḥḳyḳŧ 3998 @1 ālty 3999 BENIGN 2 448 ālʿālm @0 ālʿālm 441 449 ālmʿbr @1 ālmʿbrʿnh 442 450 ʿnh @1 451 fy @0 fy 443 452 āṣṭlāḥ @1 āṣṭlāḥālḳwm 444 453 ālḳwm @1 454 bālānsān @1 ālānsānālkbyr 445 455 ālkbyr @1 456 fkānt @0 fkānt 446 457 ālmlāʾkŧ @0 ālmlāʾkŧ 447 458 lh @1 459 kālḳwá @1 lhkālḳwá 448 460 ālrwḥānyŧ @0 ālrwḥānyŧ 449 BENIGN 3 3066 ykwn @0 ykwn 3027 3067 ābdā @1 ābdālā 3028 3068 ālā @1 3069 bṣwrŧ @0 bṣwrŧ 3029 3070 āstʿdād @0 āstʿdād 3030 3071 ālmtǧlá @1 ālmtǧl 3031 3072 lh @1 3073 ġyr @1 3074 ḏlk @1 3075 lā @1 3076 ykwn @1 ālhwġyrḏlklāykwn 3032 3077 fāḏā @1 fāḏn 3033 BENIGN 4 339 lh @0 lh 337 340 mn @0 mn 338 341 ġyr @1 342 wǧwd @1 ġyrwǧwd 339 343 hḏā @0 hḏā 340 344 ālmḥl @0 ālmḥl 341 345 wlā @1 346 tǧlyh @1 wlātǧlyh 342 347 lh @0 lh 343 BENIGN 5 422 ābtdā @0 ābtdā 415 423 mnh @0 mnh 416 @1 ā 417 424 fāḳtḍá @0 fāḳtḍá 418 425 ālāmr @0 ālāmr 419 426 ǧlāʾ @0 ǧlāʾ 420 427 mrāŧ @1 428 ālʿālm @1 mrātālʿālm 421 429 fkān @0 fkān 422 BENIGN 6 503 ḥḳyḳŧ @0 ḥḳyḳŧ 488 504 ālḥḳāʾḳ @0 ālḥḳāʾḳ 489 @1 w 490 505 wfy @1 fy 491 506 ālnšāŧ @0 ālnšāŧ 492 507 ālḥāmlŧ @1 ālḥāmlŧlhḏh 493 508 lhḏh @1 509 ālāwṣāf @0 ālāwṣāf 494 510 ālá @1 lá 495 BENIGN 7 621 tʿālá @0 tʿālá 604 622 ālḥāfẓ @0 ālḥāfẓ 605 @1 bh 606 623 ḫlḳh @0 ḫlḳh 607 624 kmā @0 kmā 608 625 yḥfẓ @1 626 ālḫtm @1 yḥfẓālḫtm 609 627 ālḫzāʾn @1 ālḫzān 610 BENIGN 8 2861 bāʿlām @0 bāʿlām 2826 2862 āllh @0 āllh 2827 2863 āyāh @1 2864 bmā @1 2865 āʿṭāh @1 āyāmbmāʿṭāhʿynh 2828 2866 ʿynh @1 2867 mn @0 mn 2829 2868 ālʿlm @0 ālʿlm 2830 BENIGN 9 3109 lā @0 lā 3063 3110 trāhā @1 trāhāmʿ 3064 3111 mʿ @1 3112 ʿlmk @0 ʿlmk 3065 3113 ānk @1 ānkmā 3066 3114 mā @1 3115 rāyt @0 rāyt 3067 3116 ālṣwr @0 ālṣwr 3068 BENIGN 10 325 fānh @0 fānh 325 326 tẓhr @1 yẓhrlh 326 327 lh @1 328 nfsh @0 nfsh 327 329 fy @1 330 ṣwrŧ @1 fyṣwrŧ 328 331 yʿṭyhā @0 yʿṭyhā 329 BENIGN 11 349 kān @0 kān 345 350 ālḥḳ @0 ālḥḳ 346 @1 sbḥānh 347 351 āwǧd @1 āw 348 @1 ǧd 349 352 ālʿālm @0 ālʿālm 350 353 klh @0 klh 351 BENIGN 12 371 mḥlā @1 mḥl 369 372 ālā @0 ālā 370 373 wlā @1 374 bd @1 375 ān @1 376 yḳbl @1 wyḳbl 371 377 rwḥā @0 rwḥā 372 BENIGN 13 491 ālǧmʿyŧ @0 ālǧmʿyŧ 478 492 ālālhyŧ @1 ālālhyŧmā 479 493 byn @1 494 mā @1 495 yrǧʿ @0 yrǧʿ 480 496 mn @0 mn 481 BENIGN 14 542 mā @0 mā 524 543 āṣl @0 āṣl 525 544 ṣwr @1 545 ālʿālm @1 546 ālḳāblŧ @1 ṣwrālʿālmālḳāblŧ 526 547 lārwāḥh @0 lārwāḥh 527 BENIGN 15 782 wlā @0 wlā 764 783 ḳdsth @0 ḳdsth 765 @1 tḳdys 766 @1 ādm 767 784 fġlb @0 fġlb 768 785 ʿlyhā @0 ʿlyhā 769 BENIGN 16 @1 ylrʿā 2 1 ālḥmd @0 ālḥmd 3 2 llh @1 lh 4 BENIGN 17 11 mn @0 mn 13 12 ālmḳām @0 ālmḳām 14 @1 ā 15 13 ālāḳdm @0 ālāḳdm 16 14 wān @0 wān 17 BENIGN 18 113 wsālt @0 wsālt 116 114 āllh @0 āllh 117 @1 tʿālá 118 115 ān @0 ān 119 116 yǧʿlny @0 yǧʿlny 120
with Collatex
print(printLines(start=291, end=309))
287 ālāḥṣāʾ @0 ālāḥṣāʾ 288 @1 s 289 288 ān @0 ān 290 289 yrá @0 yrá 291 290 āʿyānhā @0 āʿyānhā 292 291 wān @1 ān 293 292 šʾt @1 šʾtḳlt 294 293 ḳlt @1 294 ān @0 ān 295 295 yrá @0 yrá 296 296 ʿynh @0 ʿynh 297 297 fy @1 y 298 298 kwn @1 kwnǧāmʿ 299 299 ǧāmʿ @1 yḥṣrālāmr 300 300 yḥṣr @1 kh 301 301 ālāmr @1 302 lkwnh @0 lkwnh 302 303 mtṣfā @0 mtṣfā 303
with my algorithm
287 = ālāḥṣāʾ @0 ālāḥṣāʾ = 288
288 +1 ān @1 s 2+ 289
^1 @1 ān 2+ 290
289 = yrá @0 yrá = 291
290 = āʿyānhā @0 āʿyānhā = 292
291 = wān @1 ān = 293
292 +2 šʾt @0 šʾtḳlt 1+ 294
293 +2 ḳlt @0 1^
294 = ān @0 ān = 295
295 = yrá @0 yrá = 296
296 = ʿynh @0 ʿynh = 297
297 = fy @1 y = 298
298 +2 kwn @0 kwnǧāmʿ 1+ 299
299 +2 ǧāmʿ @0 1^
300 +2 yḥṣr @0 yḥṣrālāmr 1+ 300
301 +2 ālāmr @0 1^
302 +1 lkwnh @2 kh 2+ 301
^1 @2 lkwnh 2+ 302
303 = mtṣfā @0 mtṣfā = 303
(A) Lines 288 are better handled by Collatex than by my algorithm.
(B) The lines 298-301 are better handled by my algorithm than by Collatex.
with Collatex
print(printLines(start=457, end=470))
448 = ālʿālm @0 ālʿālm = 441 449 +2 ālmʿbr @0 ālmʿbrʿnh 1+ 442 450 +2 ʿnh @0 1^ 451 = fy @0 fy = 443 452 +2 āṣṭlāḥ @0 āṣṭlāḥālḳwm 1+ 444 453 +2 ālḳwm @0 1^ 454 +2 bālānsān @1 ālānsānālkbyr 1+ 445 455 +2 ālkbyr @1 1^ 456 = fkānt @0 fkānt = 446 457 = ālmlāʾkŧ @0 ālmlāʾkŧ = 447 458 +2 lh @0 lhkālḳwá 1+ 448 459 +2 kālḳwá @0 1^ 460 = ālrwḥānyŧ @0 ālrwḥānyŧ = 449
with my algorithm
448 ālʿālm @0 ālʿālm 441
449 ālmʿbr @1 ālmʿbrʿnh 442
450 ʿnh @1
451 fy @0 fy 443
452 āṣṭlāḥ @1 āṣṭlāḥālḳwm 444
453 ālḳwm @1
454 bālānsān @1 ālānsānālkbyr 445
455 ālkbyr @1
456 fkānt @0 fkānt 446
457 ālmlāʾkŧ @0 ālmlāʾkŧ 447
458 lh @1
459 kālḳwá @1 lhkālḳwá 448
460 ālrwḥānyŧ @0 ālrwḥānyŧ 449
(C) Lines 458-459 are better handled by my algorithm than by Collatex.
Note that A and C are similar cases. Sometimes my algorithm chooses the best fit, sometimes Collatex does. Anyway, this kind of decision is not very important for the dataset we want to build from this table.
Case B is a bit more involved, and there Collatex fails to see a more obvious alignment.
The performance is the biggest obstacle for using Collatex here. A rather superficial comparison between the resulting alignments does not show marked differences in quality, although there is an indication that Collatex will deal a bit less graceful with convoluted situations.
But closer inspection might reveal that Collatex has it right more often than my algorithm.
However, because both are not perfect, it is important to be able to tweak if there are glaring mistakes. In Collatex we do not have obvious means to steer the algorithm further.
With my algorithm we have the options to define special cases, to tweak a number of parameters, and to change the orchestration of the comparisons.
That's why we stick to my algorithm.