#!/usr/bin/env python # coding: utf-8 # # Difference between feature 'word' and 'unicode' # The following small script shows the differences between feature 'word' and 'unicode'. # In[1]: # Following variables should contain the relative path and name of the two files to compare WordFile="../tf/0.4/word.tf" UnicodeFile="../tf/0.4/unicode.tf" # How many difference to show NumberExamples = 10 # In[2]: import os def compare_files(file1_path, file2_path): FoundDifferences=0 with open(file1_path, 'r', encoding='utf-8') as file1, open(file2_path, 'r',encoding='utf-8') as file2: lines_file1 = file1.readlines() lines_file2 = file2.readlines() # Check the number of lines in both files if len(lines_file1) != len(lines_file2): print("Files have different numbers of lines.") return # Compare content line by line for line_num, (line1, line2) in enumerate(zip(lines_file1, lines_file2)): line1 = line1.strip() line2 = line2.strip() # Compare the lines and print any differences if line1 != line2: if line1.startswith("@"): continue # Skip lines that start with "@" if line1.startswith("\n"): continue # Skip lines that start with " " print(f"Line {line_num + 1} differs:") print(f"File {file1_path}: {line1}") print(f"File {file2_path}: {line2}") print() FoundDifferences+=1 if FoundDifferences==NumberExamples: print ('Stoped comparing after ', FoundDifferences,'differences') break # If no differences found, print a message print("Finished.") # main part #First check if the file exist, then check its content if os.path.exists(WordFile): if os.path.exists(UnicodeFile): print ("Comparing file ",WordFile," with ",UnicodeFile,"\n\nResult:\n\n",end="") compare_files(WordFile, UnicodeFile) else: print (f"Could not find file {UnicodeFile}.") else: print(f"Could not find file {WordFile}.") # In[ ]: