#!/usr/bin/env python # coding: utf-8 # # Test/create feature 'unaccented' and 'transliteration' # This is a playground for the development of two new features. # # It depends on Python library: # # * unidecode = ASCII transliteration of unicode text # In[5]: from unidecode import unidecode import unicodedata def make_transliteration(text): return unidecode(text) def remove_accents(text): return ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn') # test with John 1:1 greek_text_with_accents = "ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος." transliterated_text = make_transliteration(greek_text_with_accents) unaccented_text= remove_accents(greek_text_with_accents) print(transliterated_text) # in TF>0.5 use with: fmt='text-transliterated' print(unaccented_text) # in TF>0.5 use with: fmt='text-unaccented' print(greek_text_with_accents) # In[ ]: