The following script reads the xml atributes class and morph for tag w and compare them. It will analyse the words whenever morph atribute is 'adv' and the class atribute is unequal to 'adv'. It prints first a number of examples (with verse/word location) and finishes with a table showing the frequency of the cases where morph=adv is not matching class=adv.
import os
import xml.etree.ElementTree as ET
from tabulate import tabulate
ResultDict = {}
def compare_class_and_morph(file_path):
ExampleNumber = 0
tree = ET.parse(file_path)
root = tree.getroot()
for w_tag in root.iter('w'):
# Extract attributes class and morph for the tag w
class_attr = w_tag.get('class')
morph_attr = w_tag.get('morph')
lemma_attr = w_tag.get('lemma')
ref_attr = w_tag.get('ref')
# Compare class and morph attributes
if morph_attr.lower()=='adv' and class_attr!='adv':
ExampleNumber += 1
Mapping=f"lemma={lemma_attr}, morph={morph_attr}, class={class_attr}"
# Check if this Change already exists in ResultDict
if Mapping in ResultDict:
# If it exists, add the count to the existing value
ResultDict[Mapping]+=1
else:
# If it doesn't exist, initialize the count as the value
ResultDict[Mapping]=1
if ExampleNumber<=NumberExamples:
print(f"At ref={ref_attr} found class={class_attr} and morph={morph_attr} for lemma={lemma_attr}")
return
# Following variable should contain the relative path and name of file to check
InputFile="xml/20230628/01-matthew.xml"
# How many difference to show prior to table
NumberExamples = 10
# First check if the file exists, then analyze its content
if os.path.exists(InputFile):
print(f"Comparing atributes class morph for file {InputFile}\n\nResult:\n\n", end="")
differences = compare_class_and_morph(InputFile)
# Convert the dictionary into a list of key-value pairs and sort it according to frequency
UnsortedTableData = [[key, value] for key, value in ResultDict.items()]
TableData= sorted(UnsortedTableData, key=lambda row: row[1], reverse=True)
# Produce the table
headers = ["lemma, morph, class","frequency"]
print(tabulate(TableData, headers=headers, tablefmt='fancy_grid'))
else:
print(f"Could not find file {InputFile}.")
Comparing atributes class morph for file xml/20230628/01-matthew.xml Result: At ref=MAT 1:17!7 found class=prep and morph=ADV for lemma=ἕως At ref=MAT 1:17!14 found class=prep and morph=ADV for lemma=ἕως At ref=MAT 1:17!25 found class=prep and morph=ADV for lemma=ἕως At ref=MAT 1:24!9 found class=conj and morph=ADV for lemma=ὡς At ref=MAT 1:25!5 found class=prep and morph=ADV for lemma=ἕως At ref=MAT 2:8!18 found class=conj and morph=ADV for lemma=ὅπως At ref=MAT 2:9!18 found class=conj and morph=ADV for lemma=ἕως At ref=MAT 2:9!21 found class=prep and morph=ADV for lemma=ἐπάνω At ref=MAT 2:13!28 found class=conj and morph=ADV for lemma=ἕως At ref=MAT 2:15!4 found class=prep and morph=ADV for lemma=ἕως ╒═══════════════════════════════════════╤═════════════╕ │ lemma, morph, class │ frequency │ ╞═══════════════════════════════════════╪═════════════╡ │ lemma=ὡς, morph=ADV, class=conj │ 40 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ἕως, morph=ADV, class=prep │ 35 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὅπως, morph=ADV, class=conj │ 17 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ἕως, morph=ADV, class=conj │ 14 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὅτε, morph=ADV, class=conj │ 12 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὥσπερ, morph=ADV, class=conj │ 10 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ἐπάνω, morph=ADV, class=prep │ 8 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὕστερος, morph=ADV, class=adj │ 7 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=μόνος, morph=ADV, class=adj │ 7 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὅπου, morph=ADV, class=conj │ 6 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὀπίσω, morph=ADV, class=prep │ 5 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=εὐθύς, morph=ADV, class=adj │ 5 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=πλήν, morph=ADV, class=conj │ 5 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=πέραν, morph=ADV, class=prep │ 3 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ταχύς, morph=ADV, class=adj │ 3 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ἔξω, morph=ADV, class=prep │ 3 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=χωρίς, morph=ADV, class=prep │ 3 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὅθεν, morph=ADV, class=conj │ 3 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=καθώς, morph=ADV, class=conj │ 3 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὡσεί, morph=ADV, class=conj │ 2 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=μέχρι, morph=ADV, class=prep │ 2 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ἅμα, morph=ADV, class=prep │ 2 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=μεταξύ, morph=ADV, class=prep │ 2 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=παρεκτός, morph=ADV, class=prep │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=μακράν, morph=ADV, class=adj │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=πῶς, morph=ADV, class=conj │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὄπισθεν, morph=ADV, class=prep │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=οὗ, morph=ADV, class=conj │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὑποκάτω, morph=ADV, class=prep │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ἄχρι, morph=ADV, class=prep │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=καθά, morph=ADV, class=conj │ 1 │ ├───────────────────────────────────────┼─────────────┤ │ lemma=ὀψέ, morph=ADV, class=prep │ 1 │ ╘═══════════════════════════════════════╧═════════════╛