#!/usr/bin/env python # coding: utf-8 # # Test/create feature 'unaccented' and 'transliteration' # ## Table of content # * 1 - test] # # In[8]: import os from unidecode import unidecode import unicodedata # Following variable should contain the relative path to the tf file to read # FileToRead = "..//tf//0.4//word.tf" def make_transliteration(text): return unidecode(text) def remove_accents(text): return ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn') greek_text_with_accents = "ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος." transliterated_text = make_transliteration(greek_text_with_accents) unaccented_text= remove_accents(greek_text_with_accents) print(transliterated_text) print(unaccented_text) # In[ ]: