! pip3 install nltk pywsd symspellpy import nltk nltk.download('treebank') nltk.download('wordnet') nltk.download('stopwords') nltk.download('averaged_perceptron_tagger') nltk.download('punkt') # 촘스키 CGF 문법규칙 from nltk.grammar import toy_pcfg2 grammar = toy_pcfg2 print(grammar) # WordNet 데이터를 활용한 의미관계 분석 from nltk.corpus import wordnet as wn word = 'bank' print("Synset list : \n{}\n\nbank.n.02 뜻과 예제 : \n{}\n{}\n\ntrust.v.01 뜻과 예제 :\n{}\n{}".format( wn.synsets(word), wn.synset('bank.n.02').definition(), wn.synset('bank.n.02').examples(), wn.synset('trust.v.01').definition(), wn.synset('trust.v.01').examples())) # pywsd 를 사용한 문장 내 의미분석 sent = 'I went to the bank to deposit my money' # sent = 'I bank my money' ambiguous = 'bank' from pywsd.lesk import simple_lesk answer = simple_lesk(sent, ambiguous) answer.definition() from symspellpy.symspellpy import SymSpell def spellCheck(dict_file, sentence): max_edit_dist, prefix_length = 0, 7 term_index, count_index = 0, 1 sym_spell = SymSpell(max_edit_dist, prefix_length) if not sym_spell.load_dictionary(dict_file, term_index, count_index): print("사전파일을 정의하지 않았습니다"); return result = sym_spell.word_segmentation(sentence) print("오타 수정결과: {}\n편집거리 총합: {}".format( result.corrected_string, result.distance_sum)) text = "thequickbrownfoxjumpsoverthelazydog" spellCheck("frequency_dictionary_en_82_765.txt", sentence=text)