#!/usr/bin/env python # coding: utf-8 # # Vocalisation of the Tetragrammaton (BHSA) # ## Table of content (TOC) # # * 1 - Introduction # * 2 - Load Text-Fabric app and data # * 3 - Performing the queries # * 3.1 - Get overview of all pointed versions # * 3.2 - Plotting the punctuations of the Tetragrammaton # * 3.3 - Some other playing around # * 4 - Required libraries # * 5 - Notebook details # # # 1 - Introduction # ##### [Back to TOC](#TOC) # # The Old Testament contains the how the Tetragrammaton יהוה written with different vowels, for example with the vowals of of אֲדֹנַי (Adonai, ETCBC transliteration: >:ADON@J). # # 2 - Load Text-Fabric app and data # ##### [Back to TOC](#TOC) # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[2]: # Loading the Text-Fabric code # Note: it is assumed Text-Fabric is installed in your environment. from tf.fabric import Fabric from tf.app import use # In[3]: # load the BHS app and data BHS = use ("etcbc/BHSA",hoist=globals()) # Note: Thefeature documentation can be found at [ETCBC GitHub](https://github.com/ETCBC/bhsa/blob/master/docs/features/0_home.md) # In[4]: # The following will push the Text-Fabric stylesheet to this notebook (to facilitate proper display with notebook viewer) BHS.dh(BHS.getCss()) # # 3 - Performing the queries # ##### [Back to TOC](#TOC) # ## 3.1 - Get overview of all pointed versions # First get all occurances of the Tetragrammaton יהוה (so without vowel pointing and other diacritical marks). See also notes on [feature g_word](https://github.com/ETCBC/bhsa/blob/master/docs/features/g_word.md). # In[5]: JHWHQuery = ''' book chapter verse word g_cons=JHWH ''' JHWHResults = BHS.search(JHWHQuery) # Now post process the results to create a nice table. # In[6]: # Libraries for table formatting and regular expressions import re import pandas as pd from IPython.display import display # Initialize dictionary for storing results resultDict = {} # Process each item in the JHWHResults for item in JHWHResults: node = item[3] # Get the pointed and unpointed representation of a word occurrence pointedWord = F.g_word.v(node) hebrewWord = F.g_word_utf8.v(node) # Remove cantillations in the BSHA (presented by digits) vocalizedWord = re.sub(r'\d', '', pointedWord) if vocalizedWord in resultDict: # If exists, increment the frequency count resultDict[vocalizedWord][0] += 1 else: # Initialize count and store the first occurrence firstOccurrence = T.sectionFromNode(node) resultDict[vocalizedWord] = [1, firstOccurrence, hebrewWord] # Convert the dictionary into a DataFrame and sort by frequency tableData = pd.DataFrame( [[key, value[0], value[1], value[2]] for key, value in resultDict.items()], columns=["Pointed Word", "Frequency", "First Occurrence", "Hebrew Word"] ) tableData = tableData.sort_values(by="Frequency", ascending=False) # Display the table display(tableData) # ## 3.2 Plotting the punctuations of the Tetragrammaton # In[7]: import pandas as pd from bokeh.plotting import figure, show, output_notebook from bokeh.models import ColumnDataSource, HoverTool from bokeh.layouts import column # Enable Bokeh output in the notebook output_notebook() # Ensure tableData has the exact column names you need tableData.columns = ["Pointed Word", "Frequency", "First Occurrence", "Hebrew Word"] # Create a ColumnDataSource for the Bokeh plot source = ColumnDataSource(tableData) # Create a Bokeh figure for the bar chart p = figure( x_range=tableData['Hebrew Word'].tolist(), # convert x_range to list explicitly height=800, width=1000, title="Frequency of Tetragrammaton vocalisation in biblical text", toolbar_location="right" ) # Create bar chart p.vbar(x='Hebrew Word', top='Frequency', width=0.5, source=source) # Add labels and customizations p.xaxis.axis_label = "Hebrew Word" p.yaxis.axis_label = "Frequency" p.xaxis.major_label_orientation = "horizontal" p.xaxis.major_label_text_font_size = "26pt" # Increase font size of x-axis labels # Add hover tool hover = HoverTool() hover.tooltips = [ ("Pointed Word", "@{Pointed Word}"), ("Frequency", "@Frequency"), ("First Occurrence", "@{First Occurrence}"), ("Hebrew Word", "@{Hebrew Word}") ] p.add_tools(hover) # Show the interactive plot show(p) # ## 3.3 Some other playing around # Add another condition to the query. This is to select for the wowels for adOnAi, translatiteratd as O and @, which should be around the Wav. The regexp inludes '.*' to allow for in-between cantilation marks. # In[8]: adonaiQuery = ''' word g_cons=JHWH g_word~O.*W.*@ ''' adonaiResults = BHS.search(adonaiQuery) # In[9]: BHS.table(adonaiResults, condensed=False, extraFeatures={'voc_lex'}) # In[10]: adonaiQuery2 = ''' word lex=JHWH/ g_word~O.*W.*@ ''' adonaiResults2 = BHS.search(adonaiQuery2) # Print the features associated with word nodes that containing data # In[11]: featureList=Fall() for item in adonaiResults2: Node=item[0] for feature in featureList: featureValue=Fs(feature).v(Node) if type(featureValue)!=type(None): print (feature,'=',featureValue) break # In[12]: import re import pandas as pd from IPython.display import display # Initialize dictionary for storing results resultDict = {} # Process each item in the JHWHResults for item in JHWHResults: node = item[3] # Get the pointed and unpointed representation of a word occurrence pointedWord = F.g_word.v(node) hebrewWord = F.g_word_utf8.v(node) # Remove cantillations in the BHSA (presented by digits) vocalizedWord = re.sub(r'\d', '', pointedWord) if vocalizedWord in resultDict: # If it exists, add the count to the existing value resultDict[vocalizedWord][0] += 1 # Increase frequency count else: # If it doesn't exist, initialize the count and store firstOccurrence firstOccurrence = T.sectionFromNode(node) resultDict[vocalizedWord] = [1, firstOccurrence, hebrewWord] # Convert the dictionary into a DataFrame and sort by frequency tableData = pd.DataFrame( [[key, value[0], value[1], value[2]] for key, value in resultDict.items()], columns=["Pointing", "Frequency", "First Occurrence", "Hebrew Word"] ) tableData = tableData.sort_values(by="Frequency", ascending=False) # Display the table display(tableData) # In[13]: qereQuery = ''' word qere_utf8 g_cons=JHWH ''' qereResults = BHS.search(qereQuery) # In[14]: for item in qereResults: node = item[0] pointedWord = F.g_word.v(node) qereWord =F.qere.v(node) uncantQereWord=re.sub(r'\d', '', qereWord) print (pointedWord,qereWord,uncantQereWord) break # # 4 - Required libraries # ##### [Back to TOC](#TOC) # # The scripts in this notebook require (beside `text-fabric`) the following Python libraries to be installed in the environment: # # bokeh # IPython # pandas # re # # You can install any missing library from within Jupyter Notebook using either`pip` or `pip3`. # # 5 - Notebook details # ##### [Back to TOC](#TOC) # #
Author | #Tony Jurg | #
Version | #1.0 | #
Date | #4 Novermber 2024 | #