#!/usr/bin/env python # coding: utf-8 # # Compare two XML files (Nestle1904GBI) # Jupiter Notebook intended to quicly compare the content and structure of two versions of the input XML files. # In[3]: # Following variables should contain the relative path and name of the two files to compare book="01-matthew.xml" version1="oct_27_2022" version2="apr_6_2023" file1_path = "sourcedata/"+version1+"/"+book file2_path = "sourcedata/"+version2+"/"+book # Set to True if detailed reporting is required details = True # In[4]: import xml.etree.ElementTree as ET def normalize_text(text): return ' '.join(text.strip().split()) def compare_xml_files(file1, file2, details): # Parse the XML files tree1 = ET.parse(file1) tree2 = ET.parse(file2) # Get the root elements of both trees root1 = tree1.getroot() root2 = tree2.getroot() # Compare the two root elements and their descendants recursively differences=compare_elements(root1, root2, details) if not differences: print("The XML files are identical.") else: print("The XML files are different.") if details: print("\nDetails:") for diff in differences: print(diff) def compare_elements(elem1, elem2, details): differences = [] ReportTag=ReportElement=ReportChildren=ReportChildDiff=True # Compare element tags and attributes if elem1.tag != elem2.tag or elem1.attrib != elem2.attrib: if details: differences.append(f"\n") for attr in elem1.attrib: if attr not in elem2.attrib: differences.append(f"Attribute: {attr} is present in the first file but not in the second file") for attr in elem2.attrib: if attr not in elem1.attrib: differences.append(f"Attribute: {attr} is present in the second file but not in the first file") for attr in elem1.attrib: if attr in elem2.attrib and elem1.attrib[attr] != elem2.attrib[attr]: differences.append(f"Attribute: {attr} - Value in first file: {elem1.attrib[attr]}, Value in second file: {elem2.attrib[attr]}") differences.append(f"Tag/Attributes details: {elem1.tag} != {elem2.tag} or {elem1.attrib} != {elem2.attrib}") else: if ReportTag==True: differences.append("Differences in Tag/Attributes found") ReportTag=False # Normalize and compare element text text1 = normalize_text(elem1.text) text2 = normalize_text(elem2.text) if text1 != text2: if details: differences.append("Differences in element Text found") differences.append(f"Text: {elem1.text} != {elem2.text}\n") else: if ReportElement==True: differences.append("Differences in element Text found") ReportElement=False # Compare element children recursively children1 = list(elem1) children2 = list(elem2) if len(children1) != len(children2): if details: differences.append("Differences in number of children") differences.append(f"Number of Children: {len(children1)} != {len(children2)}\n") else: if ReportChildren==True: differences.append("Differences in number of children") ReportChildren=False else: for child1, child2 in zip(children1, children2): child_diff = compare_elements(child1, child2, details) differences.extend(child_diff) return differences # this is the main part print ("Comparing file ",file1_path," with ",file2_path,"\n\nResult:",end="") compare_xml_files(file1_path, file2_path, details) # In[ ]: # In[ ]: