#!/usr/bin/env python # coding: utf-8 #

Table of Contents

#
# In[2]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[3]: # First, I have to laod different modules that I use for analyzing the data and for plotting: import sys, os, collections import pandas as pd import numpy as np import re import csv import seaborn as sns import matplotlib.pyplot as plt; plt.rcdefaults() from matplotlib.pyplot import figure from collections import Counter # Second, I have to load the Text Fabric app from tf.fabric import Fabric from tf.app import use # In[35]: SBLGNTv1 = use('SBLGNT/tf/6.12_v1', hoist=globals()) # In[72]: SBLGNTv2 = use('SBLGNT/tf/6.12_v2', hoist=globals()) # In[57]: Searchv1_0 = ''' book book=Matthew book_code* chapter chapter=1 sentence sentence=1 verse ''' Searchv1_0 = SBLGNTv1.search(Searchv1_0) SBLGNTv1.show(Searchv1_0, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum','orig_order'}) # In[76]: Searchv2_0 = ''' book book=Matthew chapter chapter=1 verse verse=21 ''' Searchv2_0 = SBLGNTv2.search(Searchv2_0) SBLGNTv2.show(Searchv2_0, start=1, end=5, condensed=True, colorMap={3:'pink'}, extraFeatures={'verse','vrsnum', 'orig_order'}) # In[17]: Search1 = ''' verse book=Matthew chapter=1 verse=1 word ''' Search1 = SBLGNT.search(Search1) SBLGNT.show(Search1, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'}) # In[15]: Search2 = ''' book book=Revelation book_code* chapter chapter=1 sentence clause word lemma* morph* normalized_word* case* degree* gn* mood* nu* orig_order* ps* voice* tense* lemma_translit* dict_abc_order* lemma_freq* lemma_dictform* lemma_freq* lemma_gloss* lemma_strongs* ''' Search2 = SBLGNT.search(Search2) SBLGNT.show(Search2, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'}) # # New Feature Development # # Morphology # ## MorphGNT:sblgnt Data Description # Here: https://github.com/morphgnt/sblgnt # # ![image.png](attachment:image.png) # # ## Extracting morphological features # In[381]: featureprep=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/morph_copy.tf',delimiter='\t',encoding='utf-8') pd.set_option('display.max_columns', 50) featureprep.head(5) # In[382]: featureprep['orig_order'] = featureprep.index +1 featureprep.head(5) # ### Adding Part of Speech `sp` # In[383]: def spconditions(row): if re.search('^A.*', str(row)): return 'adj' if re.search('^C.*', str(row)): return 'conj' if re.search('^D.*', str(row)): return 'adv' if re.search('^I.*', str(row)): return 'interj' if re.search('^N-.*', str(row)): return 'noun' if re.search('^P.*', str(row)): return 'prep' if re.search('^RA.*', str(row)): return 'art-def' if re.search('^RD.*', str(row)): return 'pron-dem' if re.search('^RI.*', str(row)): return 'pron-inter' if re.search('^RP.*', str(row)): return 'pron-prs' if re.search('^RR.*', str(row)): return 'pron-rela' if re.search('^V-.*', str(row)): return 'verb' if re.search('^X.*', str(row)): return 'partcl' else: return '' # In[384]: featureprep['sp']=featureprep['morphology'].apply(spconditions) featureprep.head(20) # ### Adding Gender `gn` # In[385]: def gender(row): if re.search('.*F.$', str(row)): return 'f' if re.search('.*M.$', str(row)): return 'm' if re.search('.*N.$', str(row)): return 'n' else: return '' # In[386]: featureprep['gn']=featureprep['morphology'].apply(gender) featureprep.head(20) # ### Adding Number `nu` # In[387]: def number(row): if re.search('.*S..$', str(row)): return 'sg' if re.search('.*S[MFN].$', str(row)): return 'sg' if re.search('.*P..$', str(row)): return 'pl' if re.search('.*P[MFN].$', str(row)): return 'pl' else: return '' # In[388]: featureprep['nu']=featureprep['morphology'].apply(number) featureprep.head(50) # ### Adding Person `ps` # In[389]: def person(row): if re.search('.*1.*', str(row)): return 'p1' if re.search('.*2.*', str(row)): return 'p2' if re.search('.*3.*', str(row)): return 'p3' else: return '' # In[390]: featureprep['ps']=featureprep['morphology'].apply(person) featureprep.head(20) # ### Adding Case `case` # In[391]: def case(row): if re.search('......N...$', str(row)): return 'nominative' if re.search('......G...$', str(row)): return 'genitive' if re.search('......D...$', str(row)): return 'dative' if re.search('......A...$', str(row)): return 'accusative' else: return '' # In[392]: featureprep['case']=featureprep['morphology'].apply(case) featureprep.head(20) # ### Adding Tense `vt` # In[393]: def tense(row): if re.search('...A.*', str(row)): return 'aorist' if re.search('...P.*', str(row)): return 'present' if re.search('...F.*', str(row)): return 'future' if re.search('...I.*', str(row)): return 'imperfect' if re.search('...Y.*', str(row)): return 'plusquamperfect' if re.search('...X.*', str(row)): return 'perfect' else: return '' # In[394]: featureprep['vt']=featureprep['morphology'].apply(tense) featureprep.head(20) # ### Adding Voice `voice` # In[395]: def voice(row): if re.search('....A.....$', str(row)): return 'active' if re.search('....M.....$', str(row)): return 'middle' if re.search('....P.....$', str(row)): return 'passive' else: return '' # In[396]: featureprep['voice']=featureprep['morphology'].apply(voice) featureprep.head(20) # ### Adding Mood `mood` # In[397]: def mood(row): if re.search('.....I....$', str(row)): return 'indicative' if re.search('.....D....$', str(row)): return 'imperative' if re.search('.....N....$', str(row)): return 'infinitive' if re.search('.....O....$', str(row)): return 'optative' if re.search('.....P....$', str(row)): return 'participle' if re.search('.....S....$', str(row)): return 'subjunctive' else: return '' # In[398]: featureprep['mood']=featureprep['morphology'].apply(mood) featureprep.head(50) # ### Adding degree `degree` # In[399]: def degree(row): if re.search('C$', str(row)): return 'comparative' if re.search('S$', str(row)): return 'superlative' else: return '' # In[400]: featureprep['degree']=featureprep['morphology'].apply(propernoun) featureprep.head(50) # ### Exporting proces # In[401]: featureprep.head() # #### reorder # sorting first... # In[402]: featureprep.sort_values(['orig_order'], ascending=True).head(10) # #### to excel spreadsheet... # In[403]: featureprep.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/morphology.xlsx', index=None) # In[404]: # export single features into tf files featureprep['sp'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/sp2.tf', index=None) featureprep['gn'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/gn2.tf', index=None) featureprep['nu'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/nu.tf', index=None) featureprep['ps'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ps2.tf', index=None) featureprep['case'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/case.tf', index=None) featureprep['vt'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/vt.tf', index=None) featureprep['voice'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/voice.tf', index=None) featureprep['mood'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/mood.tf', index=None) featureprep['degree'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/degree.tf', index=None) # # Translitaration # In[4]: translitadd=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8') pd.set_option('display.max_columns', 50) translitadd.head(10) # In[5]: translitadd['lemma']=translitadd[0] translitadd.head(5) # In[6]: translitadd=translitadd[['lemma']] translitadd.head(5) # In[7]: translitadd['orig_order'] = translitadd.index +1 translitadd.head(5) # In[9]: from unidecode import unidecode # In[10]: s = "βίβλος" s = unidecode(s) print(s) # In[11]: translitadd['translit'] = translitadd['lemma'].apply(unidecode) # In[12]: translitadd.head(5) # In[413]: translitadd['translit'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_translit.tf', index=None) # # ABC dictionary order # # In[414]: ABC1=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8') pd.set_option('display.max_columns', 50) ABC1.head(10) # In[415]: ABC1['lemma']=lemma[0] ABC1.head(5) # In[416]: ABC1['orig_order'] = ABC1.index +1 ABC1.head(5) # In[417]: ABC1=ABC1[['orig_order','lemma']] ABC1.head(5) # In[418]: ABC1.describe() # In[419]: ABCdict = ABC1.drop_duplicates(['lemma']).sort_values(by='lemma', ascending=[True]) ABCdict.head(10) # In[420]: ABCdict.describe() # In[421]: ABC1.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ABC1order.xlsx', encoding='utf-8') # Now I am ordering the word alphabetically iwth libreoffice writer since I cannot do that in pandas (yet?). # # In[422]: ABC2=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ABC2order.xlsx') pd.set_option('display.max_columns', 50) ABC2.head(10) # Now we merge the ABCorder dataframe with the original lemma DF. # In[423]: lemma_ABC=pd.merge (ABC1, ABC2, on='lemma', how='outer') lemma_ABC.head(5) # In[424]: lemma_ABC.describe() # In[425]: lemma_ABC.sort_values(['orig_order_x'], ascending=True).head(10) # In[426]: lemma_ABC.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_abc.xlsx') # # Word Frequency # In[427]: frequencyadd=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8') pd.set_option('display.max_columns', 50) frequencyadd.head(20) # In[428]: frequencyadd['orig_order'] = frequencyadd.index +1 frequencyadd['lemma']=frequencyadd[0] frequencyadd=frequencyadd[['orig_order','lemma']] frequencyadd.head(5) # In[429]: frequencyadd["freq_lemma"]=frequencyadd.groupby(["lemma"])["lemma"].transform("count") #("count") is actually utilizing the 'count' function! frequencyadd.head(20) # In[430]: frequencyadd.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_freq.xlsx') # # English Dictionary # Lets first load the NA1904 BibleOL dictionary: # In[431]: BOLgreekDICT=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/NA1904_dictionary_v1.0.xlsx') pd.set_option('display.max_columns', 50) BOLgreekDICT.head(20) # In[432]: BOLgreekDICT=BOLgreekDICT[['Lexeme','Lexeme_dict', 'Strong\'s number', 'gloss']] BOLgreekDICT.head(10) # In[433]: BOLgreekDICT.describe() # Lets load the SBLGNT lemmas # In[434]: SBLGNTlemmas=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\t',encoding='utf-8') pd.set_option('display.max_columns', 50) SBLGNTlemmas.head(2) # In[436]: SBLGNTlemmas['orig_order']=SBLGNTlemmas.index +1 SBLGNTlemmas['Lexeme']=SBLGNTlemmas[0] SBLGNTlemmas=SBLGNTlemmas[['orig_order','Lexeme']] SBLGNTlemmas.head(5) # In[437]: SBLGNTlemmas.describe() # Now lets try a merge of the two files # In[438]: SBLGNTglosses=pd.merge (SBLGNTlemmas,BOLgreekDICT, on='Lexeme', how='outer') SBLGNTglosses.head(5) # In[439]: SBLGNTglosses.describe() # In[440]: SBLGNTglosses.head(20) # In[441]: SBLGNTglosses.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/SBLGNTglosses.xlsx') # In[ ]: # # Comparing with Tischendorf # In[442]: TISCH = use('tisch', hoist=globals()) # In[443]: Tisch1 = ''' book book=Matthew book_code* chapter chapter=1 word lex_og* ''' Tisch1 = TISCH.search(Tisch1) TISCH.show(Tisch1, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'}) # # Testing SBLGNT # In[444]: SBLGNT = use('SBLGNT/tf/6.12', hoist=globals()) # In[445]: Search1 = ''' book book=Revelation book_code* chapter chapter=1 sentence clause word lemma* morph* normalized_word* case* degree* gn* mood* nu* orig_order* ps* voice* vt* lemma_translit* dict_abc_order* lemma_freq* lemma_dictform* lemma_freq* lemma_gloss* lemma_strongs* ''' Search1 = SBLGNT.search(Search1) SBLGNT.show(Search1, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'verse','vrsnum'}) # ### Searching for word combinations # #### Searching for "the testimony of Jesus" # As we search for the phrase we want to make sure that "martyria" can have and case and "Jaesu" needs to be in the genetive case. # The operator `<:` determines that `w2` needs to follow `<` `w1` without any word inbetween `:`. # In[446]: TestimonyOfJesus = ''' clause w1:word lemma=μαρτυρία w2:word lemma=Ἰησοῦς case=genitive w1 <: w2 ''' TestimonyOfJesus=SBLGNT.search(TestimonyOfJesus) SBLGNT.show(TestimonyOfJesus, start=1, end=10, condensed=True, colorMap={2: 'red', 3: 'orange'}) # ## Complex searches # ### Lets search for corellated conjunctions # In Col 2:16-17 we find a syntactical construction that involves a series of conjunctions (καί and ἤ). The critical edition of the [NA28](https://ref.ly/logosres/na28?ref=BibleNA27.Col2.18) presents the text in the following way: # ![image.png](attachment:image.png) # # The syntactical structure is differently analyzed by scholars. Below you see a comparison between the [cascadia](https://ref.ly/logosres/csgntsbl?ref=BibleSBLGNT.Col2.16) and [opentext](https://ref.ly/logosres/opentextgraph?ref=BibleNA27.Col2.16) analysis. One needs to keep in mind that the NA edition renders the text slightly different (see ἤ => καί) than the Mehrheitstext that is followed by the SBL edition (see καί => ἤ) : # ![image-2.png](attachment:image-2.png) # # Lets look first at the conjunction sequence that we have in the NA28. Does this appear elsewhere in the SBLGNT text? # # In[447]: KaiHeHe =''' v1:verse w1:word w2:word lemma=καί w3:word lemma=ἤ w4:word lemma=ἤ w1 < w2 w2 < w3 w3 < w4 ''' KaiHeHe=SBLGNT.search(KaiHeHe) SBLGNT.show(KaiHeHe, start=1, end=20, condensed=True, colorMap={2: 'grey', 3: 'red', 4: 'magenta', 5: 'magenta'}) # In[448]: KaiHeHe2 =''' v1:verse w1:word w2:word lemma=καί w3:word lemma=ἤ w4:word lemma=ἤ v1 =20: w1 w1 < w2 w1 <10: w2 w2 < w3 w2 <6: w3 w3 < w4 w3 <6: w4 ''' KaiHeHe2=SBLGNT.search(KaiHeHe2) SBLGNT.show(KaiHeHe2, start=1, end=200, condensed=True, colorMap={2: 'grey', 3: 'red', 4: 'magenta', 5: 'magenta'}) # No "either ... or" construction in: # - Mat 6:31 (NA28) # - Mark 10:29 # - Luk 18:29 # - Rom 8:35 # - 1 Cor 5:11 (its not the first ἢ but the previous ἐάν that initiates the "either") # - 1 Cor 14:6 (its not the first ἢ but the previous ἐάν that initiates the "either") # - 1 Thes 2:19 # - 1 Peter 4:15 # # The only exception is Mk 13:35. But the construction is not triggering the "either ... or" function (see also modern translations). It rather hints at a textcritical issue which is also well documented in the text critical apparatus: # # ![image.png](attachment:image.png) # # Consequently, the proper translation is "X or Y or Z...". # # # **Conclusion: it is highly unlikely that Col 2:16 resembles an "either ... or" construction. By default if there are three ἢ conjunctions appearing in a sequence they trigger the meaning "or ... or".** # # The following query does not exclude a preceding καὶ: # In[449]: HeHeHe =''' verse word lemma=ἤ < word lemma=ἤ < word lemma=ἤ ''' HeHeHe=SBLGNT.search(HeHeHe) SBLGNT.show(HeHeHe, start=1, end=10, condensed=True, colorMap={2: 'magenta', 3: 'magenta', 4: 'magenta'}) # # Producing some graphical representations # Since we will be using the python based visualization tools we need to first export our TF search results as a table and then load that table as a pandas dataframe. # ## The distribution of Jesus as subject (nominative case) in the NT # In[450]: JesusInNT = ''' book clause word lemma=Ἰησοῦς case=nominative ''' JesusInNT=SBLGNT.search(JesusInNT) SBLGNT.show(JesusInNT, start=1, end=3, condensed=True, colorMap={2: 'gold'}) # In[451]: SBLGNT.export(JesusInNT, toDir='D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/', toFile='JesusInNT.tsv') # In[452]: JesusInNT=pd.read_csv('D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/JesusInNT.tsv',delimiter='\t',encoding='utf-16') JesusInNT.head() # In[453]: figure(num=None, figsize=(5, 5), dpi=80, facecolor='w', edgecolor='k') JesusInNT.groupby("S1").size().sort_values(ascending=True).plot.barh() plt.xlabel('occurence of Jesus as subject') plt.ylabel('NT books') plt.title('Jesus as subject (case=nominative)') plt.show() # In[454]: JesusInNT.S1.value_counts(sort=False).plot.pie(autopct='%1.0f%%', shadow=True, startangle=90) plt.show() # In[ ]: # In[ ]: # In[ ]: