class Word(object):
"""
A word in conll-u format
See http://universaldependencies.org/format.html
"""
def __init__(self, line):
feats = line.split('\t')
if len(feats) != 10:
raise WordFormatException("A word line must have 10 columns")
self.nid = feats[0]
self.form = feats[1]
self.lemma = feats[2]
self.upostag = feats[3]
self.xpostag = feats[4]
self.feats = feats[5]
self.head = feats[6]
self.deprel = feats[7]
self.deps = feats[8]
self.misc = feats[9]
class WordFormatException(Exception):
""" Unattended word format """
def __init__(self, message):
self.message = message
w = Word("1 Je il PRON _ Number=Sing|Person=1|PronType=Prs 2 nsubj _")
w.lemma