#!/usr/bin/env python # coding: utf-8 # # POO pour données conll # # 1. Concevoir un jeu de classes pour parser des données au format conll-u. # 2. Parser les données. # In[ ]: class Word(object): """ A word in conll-u format See http://universaldependencies.org/format.html """ def __init__(self, line): feats = line.split('\t') if len(feats) != 10: raise WordFormatException("A word line must have 10 columns") self.nid = feats[0] self.form = feats[1] self.lemma = feats[2] self.upostag = feats[3] self.xpostag = feats[4] self.feats = feats[5] self.head = feats[6] self.deprel = feats[7] self.deps = feats[8] self.misc = feats[9] # In[ ]: class WordFormatException(Exception): """ Unattended word format """ def __init__(self, message): self.message = message # In[ ]: w = Word("1 Je il PRON _ Number=Sing|Person=1|PronType=Prs 2 nsubj _") w.lemma # In[ ]: