#!/usr/bin/env python
# coding: utf-8
# # Identifying use of critical signs in the text (N1904GBI)
# ## Table of content
# * 1 - Introduction]
# * 2 - Load Text-Fabric app and data
# * 3 - Performing the queries
# * 3.1 - Getting an overview of leading critical signs
# * 3.2 - Query for all words that contain some critical marks
# # 1 - Introduction
# ##### [Back to TOC](#TOC)
#
# This Jupyter Notebook investigates the pressense of 'odd' values for feature 'after'.
# # 2 - Load Text-Fabric app and data
# ##### [Back to TOC](#TOC)
# In[1]:
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
# In[1]:
# Loading the New Testament TextFabric code
# Note: it is assumed Text-Fabric is installed in your environment.
from tf.fabric import Fabric
from tf.app import use
# In[2]:
# load the app and data
N1904 = use ("tonyjurg/Nestle1904GBI", version="0.4", hoist=globals())
# # 3 - Performing the queries
# ##### [Back to TOC](#TOC)
# ## 3.1 - Getting an overview of leading critical signs
# ##### [Back to TOC](#TOC)
# First get a list of all unique words in unicode (including punctuations, critical signs and trailing spaces):
# In[4]:
unicodeList = F.word.freqList()
print ('Number of results:',len(unicodeList))
# Now just look at the first character:
# In[8]:
# Initialize an empty dictionary to store the frequencies
frequencyList = {}
criticalSignsList ={}
criticalSigns={"—","[","("}
# Iterate through the list (which is a list of ordered tuples)
for item in unicodeList:
# Get the first character of the item
firstChar = item[0][0]
# Update the frequency in the dictionary for the full list
frequencyList[firstChar] = frequencyList.get(firstChar, 0) + 1
# add to other list if critical sign
if firstChar in criticalSigns:
criticalSignsList[firstChar]=criticalSignsList.get(firstChar, 0) + 1
print("Frequency list of all first character:")
print(frequencyList)
print("\nFrequency list of critical character:")
print(criticalSignsList)
# ## 3.2 - Query for all words that contain some critical marks
# ##### [Back to TOC](#TOC)
# In[10]:
# Library to format table
from tabulate import tabulate
# The actual query
SearchLeadingCriticalMarks = '''
word word~[(\(\[—\)\])]
'''
MarksList = N1904.search(SearchLeadingCriticalMarks)
# Postprocess the query results
Results=[]
for tuple in MarksList:
node=tuple[0]
location="{} {}:{}".format(F.book.v(node),F.chapter.v(node),F.verse.v(node))
result=(location,F.word.v(node),F.after.v(node))
Results.append(result)
# Produce the table
headers = ["location","unicode","word","after"]
print(tabulate(Results, headers=headers, tablefmt='fancy_grid'))
# Note: The following site can be used to build and verify a regular expression: [regex101.com](https://regex101.com/) (choose the 'Pyton flavor')
# In[ ]: