The following small script shows the differences between feature 'word' and 'unicode'.
# Following variables should contain the relative path and name of the two files to compare
WordFile="../tf/0.4/word.tf"
UnicodeFile="../tf/0.4/unicode.tf"
# How many difference to show
NumberExamples = 10
import os
def compare_files(file1_path, file2_path):
FoundDifferences=0
with open(file1_path, 'r', encoding='utf-8') as file1, open(file2_path, 'r',encoding='utf-8') as file2:
lines_file1 = file1.readlines()
lines_file2 = file2.readlines()
# Check the number of lines in both files
if len(lines_file1) != len(lines_file2):
print("Files have different numbers of lines.")
return
# Compare content line by line
for line_num, (line1, line2) in enumerate(zip(lines_file1, lines_file2)):
line1 = line1.strip()
line2 = line2.strip()
# Compare the lines and print any differences
if line1 != line2:
if line1.startswith("@"):
continue # Skip lines that start with "@"
if line1.startswith("\n"):
continue # Skip lines that start with " "
print(f"Line {line_num + 1} differs:")
print(f"File {file1_path}: {line1}")
print(f"File {file2_path}: {line2}")
print()
FoundDifferences+=1
if FoundDifferences==NumberExamples:
print ('Stoped comparing after ', FoundDifferences,'differences')
break
# If no differences found, print a message
print("Finished.")
# main part
#First check if the file exist, then check its content
if os.path.exists(WordFile):
if os.path.exists(UnicodeFile):
print ("Comparing file ",WordFile," with ",UnicodeFile,"\n\nResult:\n\n",end="")
compare_files(WordFile, UnicodeFile)
else:
print (f"Could not find file {UnicodeFile}.")
else:
print(f"Could not find file {WordFile}.")
Comparing file ../tf/0.4/word.tf with ../tf/0.4/unicode.tf Result: Line 28 differs: File ../tf/0.4/word.tf: Ἀβραάμ File ../tf/0.4/unicode.tf: Ἀβραάμ. Line 32 differs: File ../tf/0.4/word.tf: Ἰσαάκ File ../tf/0.4/unicode.tf: Ἰσαάκ, Line 37 differs: File ../tf/0.4/word.tf: Ἰακώβ File ../tf/0.4/unicode.tf: Ἰακώβ, Line 46 differs: File ../tf/0.4/word.tf: αὐτοῦ File ../tf/0.4/unicode.tf: αὐτοῦ, Line 57 differs: File ../tf/0.4/word.tf: Θάμαρ File ../tf/0.4/unicode.tf: Θάμαρ, Line 62 differs: File ../tf/0.4/word.tf: Ἐσρώμ File ../tf/0.4/unicode.tf: Ἐσρώμ, Line 67 differs: File ../tf/0.4/word.tf: Ἀράμ File ../tf/0.4/unicode.tf: Ἀράμ, Line 72 differs: File ../tf/0.4/word.tf: Ἀμιναδάβ File ../tf/0.4/unicode.tf: Ἀμιναδάβ, Line 77 differs: File ../tf/0.4/word.tf: Ναασσών File ../tf/0.4/unicode.tf: Ναασσών, Line 82 differs: File ../tf/0.4/word.tf: Σαλμών File ../tf/0.4/unicode.tf: Σαλμών, Stoped comparing after 10 differences Finished.