%load_ext autoreload
%autoreload 2
from tf.app import use
A = use("ETCBC/bhsa", hoist=globals())
Locating corpus resources ...
Name | # of nodes | # slots/node | % coverage |
---|---|---|---|
book | 39 | 10938.21 | 100 |
chapter | 929 | 459.19 | 100 |
lex | 9230 | 46.22 | 100 |
verse | 23213 | 18.38 | 100 |
half_verse | 45179 | 9.44 | 100 |
sentence | 63717 | 6.70 | 100 |
sentence_atom | 64514 | 6.61 | 100 |
clause | 88131 | 4.84 | 100 |
clause_atom | 90704 | 4.70 | 100 |
phrase | 253203 | 1.68 | 100 |
phrase_atom | 267532 | 1.59 | 100 |
subphrase | 113850 | 1.42 | 38 |
word | 426590 | 1.00 | 100 |
from functools import cmp_to_key
bhsa = A.api
def convert_to_chunk(node):
slots = bhsa.L.d(node, 'word')
return (node, set(slots))
def _canonical_order(chunk_a, chunk_b):
(n1, slotsA) = chunk_a
(n2, slotsB) = chunk_b
if slotsA == slotsB:
return 0
aWithoutB = slotsA - slotsB
if not aWithoutB:
return 1
bWithoutA = slotsB - slotsA
if not bWithoutA:
return -1
aMin = min(aWithoutB)
bMin = min(bWithoutA)
return -1 if aMin < bMin else 1
canonical_order = cmp_to_key(_canonical_order)
otype = 'subphrase'
canon_order = sorted(
(convert_to_chunk(n)
for n in bhsa.F.otype.s(otype)),
key=canonical_order,
)
iter_order = [
convert_to_chunk(n) for n in bhsa.F.otype.s(otype)
]
assert canon_order == iter_order
canon_order[32:35]
[(1300573, {256, 257, 258, 259, 260}), (1300571, {256, 257}), (1300572, {259, 260})]
iter_order[32:35]
[(1300573, {256, 257, 258, 259, 260}), (1300571, {256, 257}), (1300572, {259, 260})]
F.otype.s("subphrase")[32:35]
(1300573, 1300571, 1300572)
c1 = F.otype.s("chapter")[0]
for (node, boundary) in N.walk(nodes=L.d(c1), events=True):
boundaryRep = "slot" if boundary is None else "end" if boundary else "start"
nodeRep = f"{F.otype.v(node):<20} {node}"
print(f"{boundaryRep:<5} {nodeRep}")