#!/usr/bin/env python # coding: utf-8 #

# # # MQL versus TF-Query # # See [TF versus MQL](tfVersusMql.ipynb) for an introduction. # # Loading # # We load the Text-Fabric program and the BHSA data. # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[2]: from tf.app import use from util import getTfVerses, getShebanqData, compareResults, MQL_RESULTS # In[3]: VERSION = "2017" # A = use('ETCBC/bhsa', hoist=globals(), version=VERSION) A = use("ETCBC/bhsa:clone", checkout="clone", hoist=globals(), version=VERSION) # # Example 9 # # [Oliver Glanz: DHQ article: discourse pattern deviation](https://shebanq.ancient-data.org/hebrew/query?version=2017&id=491) # # ``` # [[clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Subj # [word AS samesubject] # ] # .. # [phrase FOCUS function = Cmpl # [word AS samecomplement] # ] # ] # [clause domain = "N"]* {0-1} # [clause domain = "Q"]* {1-50} # [clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Subj # [word lex = samesubject.lex] # ] # .. # [phrase FOCUS function = Cmpl # [word lex = samecomplement.lex] # ] # ]] # OR # [[clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Cmpl # [word AS samecomplement2] # ] # .. # [phrase FOCUS function = Subj # [word AS samesubject2] # ] # ] # [clause domain = "N"]* {0-1} # [clause domain = "Q"]* {1-50} # [clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Cmpl # [word lex = samecomplement2.lex] # ] # .. # [phrase FOCUS function = Subj # [word lex = samesubject2.lex] # ] # ]] # OR # [[clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Subj # [word AS samesubject3] # ] # .. # [phrase FOCUS function = Cmpl # [word AS samecomplement3] # ] # ] # [clause domain = "N"]* {0-1} # [clause domain = "Q"]* {1-50} # [clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Cmpl # [word lex = samecomplement3.lex] # ] # .. # [phrase FOCUS function = Subj # [word lex = samesubject3.lex] # ] # ]] # OR # [[clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Cmpl # [word AS samecomplement4] # ] # .. # [phrase FOCUS function = Subj # [word AS samesubject4] # ] # ] # [clause domain = "N"]* {0-1} # [clause domain = "Q"]* {1-50} # [clause domain = "N" # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # .. # [phrase FOCUS function = Subj # [word lex = samesubject4.lex] # ] # .. # [phrase FOCUS function = Cmpl # [word lex = samecomplement4.lex] # ] # ]] # ``` # In[4]: (verses, words) = getShebanqData(A, MQL_RESULTS, 9) # This is a complex query. Let's make it simpler first. # # We see some recurring objects: # # # **`speakPhrase`** # # ``` # [phrase function = Pred # [word # [word lex = "DBR["] # OR # [word lex = ">MR["] # OR # [word lex = "QR>["] # ] # ] # ``` # # **`subjPhrase`** # # ``` # [phrase FOCUS function = Subj # [word AS samesubject] # ] # ``` # # **`cmplPhrase`** # # ``` # [phrase FOCUS function = Cmpl # [word AS samecomplement] # ] # ``` # # **`clauses`** # # ``` # [clause domain = "N"]* {0-1} # [clause domain = "Q"]* {1-50} # ``` # The structure of the whole query is then, in pseudo TF-query terms: # # ``` # clause domain=N # speakPhrase # < s:subjPhrase # < c:cmplPhrase # clauses # clause domain=N # speakPhrase # < subjPhrase (.lex. s) # < cmplPhrase (.lex. c) # ``` # # OR # # ``` # clause domain=N # speakPhrase # < c:cmplPhrase # < s:subjPhrase # clauses # clause domain=N # speakPhrase # < cmplPhrase (.lex. c) # < subjPhrase (.lex. s) # ``` # # OR # # ``` # clause domain=N # speakPhrase # < s:subjPhrase # < c:cmplPhrase # clauses # clause domain=N # speakPhrase # < cmplPhrase (.lex. c) # < subjPhrase (.lex. s) # ``` # # OR # # ``` # clause domain=N # speakPhrase # < c:cmplPhrase # < s:subjPhrase # clauses # clause domain=N # speakPhrase # < subjPhrase (.lex. s) # < cmplPhrase (.lex. c) # ``` # The `OR` is only used to enumerate the four different orders between **`subjPhrase`** and **`cmplPhrase`**. # So we can simplify greatly! # # ``` # clause domain=N # sp1:speakPhrase # < s1:subjPhrase # c1:cmplPhrase # < clauses # < clause domain=N # sp2:speakPhrase # < s2:subjPhrase (.lex. s1) # c2:cmplPhrase (.lex. c1) # # sp1 < c1 # sp2 < c2 # s1 .lex. s2 # c1 .lex. c2 # ``` # There is a problem with translating the **`clauses`** bit: # # ``` # [clause domain = "N"]* {0-1} # [clause domain = "Q"]* {1-50} # ``` # # The operator `* {n-m}` means: repeat the previous block `n` to `m` times. # In TF-Query there is no such operator. # # We will mimic this query by means of a mixture of TF-Query and hand-coding. # # We examine the results of searching for # # ``` # clause domain=N # sp: speakPhrase # < subjPhrase # c:cmplPhrase # # sp < c # ``` # # By means of hand coding we walk through the results of this query: # # 1. suppose we are at a query result # 1. walk through the following clauses as long as they match **`clauses`** # 1. see if the next clause is a result of the query # 1. check whether this clause and the one we started at in 1. # agree lexically in their **`subjPhrase`** and **`cmplPhrase`** # 1. for each such pair of results we add the combination to the result set # # We take care to deliver the results in the same way as a TF-query would do. # # **N.B.** What does *agree* mean in 4? According to the MQL query: # # ``` # [phrase FOCUS function = Subj # [word AS samesubject] # ] # .. # [phrase FOCUS function = Cmpl # [word AS samecomplement] # ] # ``` # # That means that two phrases *agree* if they both have a word which are an occurrence of the same lexeme. # In short: they share a lexeme. # # That is a fairly relaxed condition: if both phrases have the article, or the same preposition, the condition # is met. BUt that is what the query says, and we stick to that. # In[5]: query = """ clause domain=N sp:phrase function=Pred word lex=DBR[|>MR[|QR>[ < phrase function=Subj c:phrase function=Cmpl sp < c """ # In[6]: speakResults = A.search(query) # This is our starting point. # # We are going to weave these results together. # # The following function does that. # It has to find up to 50 intervening clauses with `domain=Q` between # two clauses with a speech verb. # # Let's parametrize this number 50, so that we can play with it later on. # In[12]: speakResultsIndex = {sr[0]: sr for sr in speakResults} def weave(qLimit): results = [] for speakResult in speakResults: (clause, speakPhrase, speakWord, subjPhrase, cmplPhrase) = speakResult nextClause = L.n(clause, otype="clause") if not nextClause: continue nextClause = nextClause[0] domain = F.domain.v(nextClause) qSeen = domain == "Q" if not qSeen and domain != "N": continue if not qSeen: nextClause = L.n(nextClause, otype="clause") if not nextClause: continue nextClause = nextClause[0] domain = F.domain.v(nextClause) qSeen = domain == "Q" if not qSeen: continue qs = 1 while qs <= qLimit: nextClause = L.n(nextClause, otype="clause") if not nextClause: break nextClause = nextClause[0] domain = F.domain.v(nextClause) if domain != "Q": break qs += 1 if not nextClause: continue if domain != "N": continue if nextClause not in speakResultsIndex: continue nextSpeakResult = speakResultsIndex[nextClause] ( nextClause, nextSpeakPhrase, nextSpeakWord, nextSubjPhrase, nextCmplPhrase, ) = nextSpeakResult # here we implement the "agree" bit. Note that & means: set intersection. if ( {F.lex.v(w) for w in L.d(subjPhrase, otype="word")} & {F.lex.v(w) for w in L.d(nextSubjPhrase, otype="word")} ) and ( {F.lex.v(w) for w in L.d(cmplPhrase, otype="word")} & {F.lex.v(w) for w in L.d(nextCmplPhrase, otype="word")} ): # note that we add the number of Q-clauses at the end of each result tuple results.append( ( clause, speakPhrase, subjPhrase, cmplPhrase, nextSubjPhrase, nextCmplPhrase, qs, ) ) print(f"qLimit={qLimit}: {len(results)} results") return results # In[9]: results = weave(50) # In[10]: (tfVerses, tfWords) = getTfVerses(A, results, (2, 3, 4, 5)) # In[11]: compareResults(A, verses, words, tfVerses, tfWords) # What if we allowed only strings of 49 Q-clauses? # Would that matter? # # It would be nice if we could see the number of Q-clauses in each result. # Well, we have sneaked that in already! # It is added at the end of each result tuple. # # Here are the minimum and the maximum that we encountered: # In[13]: print(f"minimum number of Q-clauses: {min(r[-1] for r in results):>2}") print(f"maximum number of Q-clauses: {max(r[-1] for r in results):>2}") # So we expect that it does matter if we go from 50 to 49. # Before we test that, let us show all Q-lengths: # In[14]: for r in results: startPhrase = min(r[2], r[3]) startVerse = T.sectionFromNode(L.u(startPhrase, otype="verse")[0]) startString = "{} {}:{}".format(*startVerse) endPhrase = min(r[4], r[5]) endVerse = T.sectionFromNode(L.u(endPhrase, otype="verse")[0]) endString = "{} {}:{}".format(*endVerse) qs = r[-1] print(f"{startString:<20} == {qs:>2} Q-clauses ==> {endString}") # Let's double-check: if we allow only strings of Q-clauses up to length 49, the result in Leviticus 21:1 # should be gone. # In[16]: results = weave(49) # In[17]: (tfVerses, tfWords) = getTfVerses(A, results, (2, 3, 4, 5)) # In[18]: compareResults(A, verses, words, tfVerses, tfWords) # And so it is! # **Conclusion** # # Instead of running a query and obtaining a list of results, # we did a bit of programming and we can get much more than just the results. # # That is the power of programming. # But programming is difficult, and mistakes will be made. # # TF-Query helps you to find a sweet spot between crafting queries # and writing code.