#!/usr/bin/env python
# coding: utf-8
# # Position of generally postpositive conjunctions in a clause (Nestle1904LFT)
# ## Table of content
# * 1 - Introduction
# * 2 - Load Text-Fabric app and data
# * 3 - Performing the queries
# * 3.1 - Identifying the occurences of the lemmata
# * 3.2 - Position of conjunction γάρ within a clause
# * 3.3 - Position of conjunction δέ within a clause
# * 3.4 - Position of conjunction μέν within a clause
# * 3.4 - Position of conjunction οὖν within a clause
# * 4 - Attribution and footnotes
# # 1 - Introduction
# ##### [Back to TOC](#TOC)
#
# In ancient Greek, postpositive conjunctions like δέ and γάρ often occupy the second position in a (sub)clause, following the first significant word. This placement not only structures the syntax but also subtly nuances the meaning and flow of the text. This notebook determines the positional frequency of these conjunctions within a (sub)clause within the corpus of the Greek New Testament (based upon the LowFat treebank).
#
# According to Stanley E. Porter *et.al.* the following conjuctions can be regarded to be postpositive: γάρ, δέ, μέν, and οὖν.1
#
# # 2 - Load Text-Fabric app and data
# ##### [Back to TOC](#TOC)
# In[1]:
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
# In[2]:
# Loading the Text-Fabric code
# Note: it is assumed Text-Fabric is installed in your environment
from tf.fabric import Fabric
from tf.app import use
# In[3]:
# load the N1904 app and data
N1904 = use ("tonyjurg/Nestle1904LFT", version="0.6", hoist=globals())
# In[19]:
# The following will push the Text-Fabric stylesheet to this notebook (to facilitate proper display of tables with notebook viewer)
N1904.dh(N1904.getCss())
# # 3 - Performing the queries
# ##### [Back to TOC](#TOC)
# ## 3.1 - Identifying the occurences of the lemmata
# ##### [Back to TOC](#TOC)
#
# Identifing the occurences of the conjunction under investigation can be done using a straight forward query. This will provide us with the node numbers of the word nodes containing the various lemmata which will allow for further processing.
# In[20]:
# Define the query template
GarQuery= '''
word lemma=γάρ
'''
DeQuery= '''
word lemma=δέ
'''
MenQuery= '''
word lemma=μέν
'''
OunQuery='''
word lemma=οὖν
'''
# The following will create a list containing ordered tuples consisting of node numbers of the items as they appear in the query
print('γάρ:',end='')
GarResult = N1904.search(GarQuery)
print('δέ: ',end='')
DeResult = N1904.search(DeQuery)
print('μέν:',end='')
MenResult = N1904.search(MenQuery)
print('οὖν:',end='')
OunResult = N1904.search(OunQuery)
# ## 3.2 - Position of γάρ within a clause
# ##### [Back to TOC](#TOC)
# The conjunctions γάρ is generally postpositive, appearing as the second word in a (sub)clause of the surface text. Its primary function is to provide explanation or justification for a statement. This script will determine the frequency of the positions of the conjunction γάρ within a clause (wordgroup).
# In[21]:
import unicodedata
import string
from unidecode import unidecode
def remove_punctuation(input_string):
# Create a string of all punctuation characters
punctuation_chars = ".,*;"
# Use str.translate to replace punctuation characters with empty string
result_string = input_string.translate(str.maketrans("", "", punctuation_chars))
return result_string
# small function to find position of a word
def find_word_position(sentence, target_word):
words = sentence.split()
try:
position = words.index(target_word) + 1
# Adding 1 to make it more 'natural' (i.e. 1-based index)
return position
except ValueError:
# following print reveals any occurence of 'de' which is not accounted for
print ('NOT:',sentence)
return -1 # Word not found in the sentence
target_word = unidecode('γάρ')
position_frequency = {}
number_results=0
# DeResult is a list of tuples each consisting of two integers, we need the second one _,
for word in GarResult:
# get first item from tuple of integers
parent_wg=L.u(word[0])[0]
number_results+=1
# decoded text of the parent wordgroup with punctuations removed and abreviations 'repaired'
parent_wg_text=remove_punctuation(unidecode(T.text(parent_wg)))
position = find_word_position(parent_wg_text, target_word)
# Check if the position is found
if position != -1:
# Update the frequency dictionary
position_frequency[position] = position_frequency.get(position, 0) + 1
print('Total number of occurances of γάρ:',number_results)
# Calculate percentages
total_positions = sum(position_frequency.values())
position_percentage = {pos: count / total_positions * 100 for pos, count in position_frequency.items()}
# Print the table
table_output="Position | Frequency | Percentage \n --- | --- | ---\n "
for pos in sorted(position_percentage.keys()):
table_output +=f"{pos} | {position_frequency.get(pos, 0)} | {position_percentage.get(pos, 0):.2f}%\n"
N1904.dm(table_output)
# ## 3.3 - Position of δέ within a clause
# ##### [Back to TOC](#TOC)
# The conjunctions δέ is generally postpositive, appearing as the second word in a (sub)clause of the surface text. Although its functions are diverse, it plays a crucial role in the structure and flow of Greek sentences. This script will determine the frequency of the positions of the conjunction δέ within a clause (wordgroup).
# In[22]:
import unicodedata
import string
from unidecode import unidecode
def remove_punctuation(input_string):
# Create a string of all punctuation characters
punctuation_chars = ".,*;"
# Use str.translate to replace punctuation characters with empty string
result_string = input_string.translate(str.maketrans("", "", punctuation_chars))
return result_string
def fix_abbreviated(input_string):
fixed_string = input_string.replace("d'", "de")
return fixed_string
# small function to find position of a word
def find_word_position(sentence, target_word):
words = sentence.split()
try:
position = words.index(target_word) + 1
# Adding 1 to make it more 'natural' (i.e. 1-based index)
return position
except ValueError:
# following print reveals any occurence of 'de' which is not accounted for
print ('NOT:',sentence)
return -1 # Word not found in the sentence
target_word = unidecode('δέ')
position_frequency = {}
number_results=0
# DeResult is a list of tuples each consisting of two integers, we need the second one _,
for word in DeResult:
# get first item from tuple of integers
parent_wg=L.u(word[0])[0]
number_results+=1
# decoded text of the parent wordgroup with punctuations removed and abreviations 'repaired'
parent_wg_text=fix_abbreviated(remove_punctuation(unidecode(T.text(parent_wg))))
position = find_word_position(parent_wg_text, target_word)
# Check if the position is found
if position != -1:
# Update the frequency dictionary
position_frequency[position] = position_frequency.get(position, 0) + 1
print('Total number of occurances of δέ:',number_results)
# Calculate percentages
total_positions = sum(position_frequency.values())
position_percentage = {pos: count / total_positions * 100 for pos, count in position_frequency.items()}
# Print the table
table_output="Position | Frequency | Percentage \n --- | --- | ---\n "
for pos in sorted(position_percentage.keys()):
table_output +=f"{pos} | {position_frequency.get(pos, 0)} | {position_percentage.get(pos, 0):.2f}%\n"
N1904.dm(table_output)
# ## 3.4 - Position of μέν within a clause
# ##### [Back to TOC](#TOC)
# The conjunctions μέν is generally postpositive, appearing as the second word in a (sub)clause of the surface text. Often used in contrast with δέ, μέν does not have a direct English equivalent but is used to set up a contrast or comparison, functioning similarly to "on the one hand." This script will determine the frequency of the positions of the conjunction μέν within a clause (wordgroup).
# In[23]:
import unicodedata
import string
from unidecode import unidecode
def remove_punctuation(input_string):
# Create a string of all punctuation characters
punctuation_chars = ".,*;"
# Use str.translate to replace punctuation characters with empty string
result_string = input_string.translate(str.maketrans("", "", punctuation_chars))
return result_string
# small function to find position of a word
def find_word_position(sentence, target_word):
words = sentence.split()
try:
position = words.index(target_word) + 1
# Adding 1 to make it more 'natural' (i.e. 1-based index)
return position
except ValueError:
# following print reveals any occurence of 'de' which is not accounted for
print ('NOT:',sentence)
return -1 # Word not found in the sentence
target_word = unidecode('μέν')
position_frequency = {}
number_results=0
# DeResult is a list of tuples each consisting of two integers, we need the second one _,
for word in MenResult:
# get first item from tuple of integers
parent_wg=L.u(word[0])[0]
number_results+=1
# decoded text of the parent wordgroup with punctuations removed and abreviations 'repaired'
parent_wg_text=remove_punctuation(unidecode(T.text(parent_wg)))
position = find_word_position(parent_wg_text, target_word)
# Check if the position is found
if position != -1:
# Update the frequency dictionary
position_frequency[position] = position_frequency.get(position, 0) + 1
print('Total number of occurances of μέν:',number_results)
# Calculate percentages
total_positions = sum(position_frequency.values())
position_percentage = {pos: count / total_positions * 100 for pos, count in position_frequency.items()}
# Print the table
table_output="Position | Frequency | Percentage \n --- | --- | ---\n "
for pos in sorted(position_percentage.keys()):
table_output +=f"{pos} | {position_frequency.get(pos, 0)} | {position_percentage.get(pos, 0):.2f}%\n"
N1904.dm(table_output)
# # 4 - Attribution and footnotes
# ##### [Back to TOC](#TOC)
#
# #### Footnotes:
#
# 1 Porter, Stanley E., Jeffrey T. Reed, and Matthew Brook O’Donnell. *Fundamentals of New Testament Greek* (Grand Rapids, MI; Cambridge: William B. Eerdmans Publishing Company, 2010), 181.