#!/usr/bin/env python
# coding: utf-8
# # Test/create feature 'unaccented' and 'transliteration'
# ## Table of content
# * 1 - test]
#
# In[8]:
import os
from unidecode import unidecode
import unicodedata
# Following variable should contain the relative path to the tf file to read
# FileToRead = "..//tf//0.4//word.tf"
def make_transliteration(text):
return unidecode(text)
def remove_accents(text):
return ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')
greek_text_with_accents = "ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
transliterated_text = make_transliteration(greek_text_with_accents)
unaccented_text= remove_accents(greek_text_with_accents)
print(transliterated_text)
print(unaccented_text)
# In[ ]: