#!/usr/bin/env python
# coding: utf-8
# # MQL versus TF-Query
# See [TF versus MQL](tfVersusMql.ipynb) for an introduction.
# # Loading
# We load the Text-Fabric program and the BHSA data.
# In[1]:
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
# In[2]:
from tf.app import use
from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS
# In[3]:
VERSION = "2017"
# A = use('ETCBC/bhsa', hoist=globals(), version=VERSION)
A = use("ETCBC/bhsa:clone", checkout="clone", hoist=globals(), version=VERSION)
# # Example 9
# [Oliver Glanz: DHQ article: discourse pattern deviation](https://shebanq.ancient-data.org/hebrew/query?version=2017&id=491)
# ```
# [[clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Subj
# [word AS samesubject]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word AS samecomplement]
# ]
# ]
# [clause domain = "N"]* {0-1}
# [clause domain = "Q"]* {1-50}
# [clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Subj
# [word lex = samesubject.lex]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word lex = samecomplement.lex]
# ]
# ]]
# OR
# [[clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word AS samecomplement2]
# ]
# ..
# [phrase FOCUS function = Subj
# [word AS samesubject2]
# ]
# ]
# [clause domain = "N"]* {0-1}
# [clause domain = "Q"]* {1-50}
# [clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word lex = samecomplement2.lex]
# ]
# ..
# [phrase FOCUS function = Subj
# [word lex = samesubject2.lex]
# ]
# ]]
# OR
# [[clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Subj
# [word AS samesubject3]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word AS samecomplement3]
# ]
# ]
# [clause domain = "N"]* {0-1}
# [clause domain = "Q"]* {1-50}
# [clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word lex = samecomplement3.lex]
# ]
# ..
# [phrase FOCUS function = Subj
# [word lex = samesubject3.lex]
# ]
# ]]
# OR
# [[clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word AS samecomplement4]
# ]
# ..
# [phrase FOCUS function = Subj
# [word AS samesubject4]
# ]
# ]
# [clause domain = "N"]* {0-1}
# [clause domain = "Q"]* {1-50}
# [clause domain = "N"
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ..
# [phrase FOCUS function = Subj
# [word lex = samesubject4.lex]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word lex = samecomplement4.lex]
# ]
# ]]
# ```
# In[4]:
(verses, words) = getShebanqData(A, MQL_RESULTS, 9)
# This is a complex query. Let's make it simpler first.
# We see some recurring objects:
# **`speakPhrase`**
# ```
# [phrase function = Pred
# [word
# [word lex = "DBR["]
# OR
# [word lex = ">MR["]
# OR
# [word lex = "QR>["]
# ]
# ]
# ```
# **`subjPhrase`**
# ```
# [phrase FOCUS function = Subj
# [word AS samesubject]
# ]
# ```
# **`cmplPhrase`**
# ```
# [phrase FOCUS function = Cmpl
# [word AS samecomplement]
# ]
# ```
# **`clauses`**
# ```
# [clause domain = "N"]* {0-1}
# [clause domain = "Q"]* {1-50}
# ```
# The structure of the whole query is then, in pseudo TF-query terms:
# ```
# clause domain=N
# speakPhrase
# < s:subjPhrase
# < c:cmplPhrase
# clauses
# clause domain=N
# speakPhrase
# < subjPhrase (.lex. s)
# < cmplPhrase (.lex. c)
# ```
# OR
# ```
# clause domain=N
# speakPhrase
# < c:cmplPhrase
# < s:subjPhrase
# clauses
# clause domain=N
# speakPhrase
# < cmplPhrase (.lex. c)
# < subjPhrase (.lex. s)
# ```
# OR
# ```
# clause domain=N
# speakPhrase
# < s:subjPhrase
# < c:cmplPhrase
# clauses
# clause domain=N
# speakPhrase
# < cmplPhrase (.lex. c)
# < subjPhrase (.lex. s)
# ```
# OR
# ```
# clause domain=N
# speakPhrase
# < c:cmplPhrase
# < s:subjPhrase
# clauses
# clause domain=N
# speakPhrase
# < subjPhrase (.lex. s)
# < cmplPhrase (.lex. c)
# ```
# The `OR` is only used to enumerate the four different orders between **`subjPhrase`** and **`cmplPhrase`**.
# So we can simplify greatly!
# ```
# clause domain=N
# sp1:speakPhrase
# < s1:subjPhrase
# c1:cmplPhrase
# < clauses
# < clause domain=N
# sp2:speakPhrase
# < s2:subjPhrase (.lex. s1)
# c2:cmplPhrase (.lex. c1)
# sp1 < c1
# sp2 < c2
# s1 .lex. s2
# c1 .lex. c2
# ```
# There is a problem with translating the **`clauses`** bit:
# ```
# [clause domain = "N"]* {0-1}
# [clause domain = "Q"]* {1-50}
# ```
# The operator `* {n-m}` means: repeat the previous block `n` to `m` times.
# In TF-Query there is no such operator.
# We will mimic this query by means of a mixture of TF-Query and hand-coding.
# We examine the results of searching for
# ```
# clause domain=N
# sp: speakPhrase
# < subjPhrase
# c:cmplPhrase
# sp < c
# ```
# By means of hand coding we walk through the results of this query:
# 1. suppose we are at a query result
# 1. walk through the following clauses as long as they match **`clauses`**
# 1. see if the next clause is a result of the query
# 1. check whether this clause and the one we started at in 1.
# agree lexically in their **`subjPhrase`** and **`cmplPhrase`**
# 1. for each such pair of results we add the combination to the result set
# We take care to deliver the results in the same way as a TF-query would do.
# **N.B.** What does *agree* mean in 4? According to the MQL query:
# ```
# [phrase FOCUS function = Subj
# [word AS samesubject]
# ]
# ..
# [phrase FOCUS function = Cmpl
# [word AS samecomplement]
# ]
# ```
# That means that two phrases *agree* if they both have a word which are an occurrence of the same lexeme.
# In short: they share a lexeme.
# That is a fairly relaxed condition: if both phrases have the article, or the same preposition, the condition
# is met. BUt that is what the query says, and we stick to that.
# In[5]:
query = """
clause domain=N
sp:phrase function=Pred
word lex=DBR[|>MR[|QR>[
< phrase function=Subj
c:phrase function=Cmpl
sp < c
# In[6]:
speakResults = A.search(query)
# This is our starting point.
# We are going to weave these results together.
# The following function does that.
# It has to find up to 50 intervening clauses with `domain=Q` between
# two clauses with a speech verb.
# Let's parametrize this number 50, so that we can play with it later on.
# In[12]:
speakResultsIndex = {sr[0]: sr for sr in speakResults}
def weave(qLimit):
results = []
for speakResult in speakResults:
(clause, speakPhrase, speakWord, subjPhrase, cmplPhrase) = speakResult
nextClause = L.n(clause, otype="clause")
if not nextClause:
nextClause = nextClause[0]
domain = F.domain.v(nextClause)
qSeen = domain == "Q"
if not qSeen and domain != "N":
if not qSeen:
nextClause = L.n(nextClause, otype="clause")
if not nextClause:
nextClause = nextClause[0]
domain = F.domain.v(nextClause)
qSeen = domain == "Q"
if not qSeen:
qs = 1
while qs <= qLimit:
nextClause = L.n(nextClause, otype="clause")
if not nextClause:
nextClause = nextClause[0]
domain = F.domain.v(nextClause)
if domain != "Q":
qs += 1
if not nextClause:
if domain != "N":
if nextClause not in speakResultsIndex:
nextSpeakResult = speakResultsIndex[nextClause]
) = nextSpeakResult
# here we implement the "agree" bit. Note that & means: set intersection.
if (
{F.lex.v(w) for w in L.d(subjPhrase, otype="word")}
& {F.lex.v(w) for w in L.d(nextSubjPhrase, otype="word")}
) and (
{F.lex.v(w) for w in L.d(cmplPhrase, otype="word")}
& {F.lex.v(w) for w in L.d(nextCmplPhrase, otype="word")}
# note that we add the number of Q-clauses at the end of each result tuple
print(f"qLimit={qLimit}: {len(results)} results")
return results
# In[9]:
results = weave(50)
# In[10]:
(tfVerses, tfWords) = getTfVerses(A, results, (2, 3, 4, 5))
# In[11]:
compareResults(A, verses, words, tfVerses, tfWords)
# What if we allowed only strings of 49 Q-clauses?
# Would that matter?
# It would be nice if we could see the number of Q-clauses in each result.
# Well, we have sneaked that in already!
# It is added at the end of each result tuple.
# Here are the minimum and the maximum that we encountered:
# In[13]:
print(f"minimum number of Q-clauses: {min(r[-1] for r in results):>2}")
print(f"maximum number of Q-clauses: {max(r[-1] for r in results):>2}")
# So we expect that it does matter if we go from 50 to 49.
# Before we test that, let us show all Q-lengths:
# In[14]:
for r in results:
startPhrase = min(r[2], r[3])
startVerse = T.sectionFromNode(L.u(startPhrase, otype="verse")[0])
startString = "{} {}:{}".format(*startVerse)
endPhrase = min(r[4], r[5])
endVerse = T.sectionFromNode(L.u(endPhrase, otype="verse")[0])
endString = "{} {}:{}".format(*endVerse)
qs = r[-1]
print(f"{startString:<20} == {qs:>2} Q-clauses ==> {endString}")
# Let's double-check: if we allow only strings of Q-clauses up to length 49, the result in Leviticus 21:1
# should be gone.
# In[16]:
results = weave(49)
# In[17]:
(tfVerses, tfWords) = getTfVerses(A, results, (2, 3, 4, 5))
# In[18]:
compareResults(A, verses, words, tfVerses, tfWords)
# And so it is!
# **Conclusion**
# Instead of running a query and obtaining a list of results,
# we did a bit of programming and we can get much more than just the results.
# That is the power of programming.
# But programming is difficult, and mistakes will be made.
# TF-Query helps you to find a sweet spot between crafting queries
# and writing code.