# Twitter() 를 활용하여 Stemming, Tagging 추가하기
from konlpy.tag import Twitter
twitter = Twitter()
tokens = twitter.pos('김정은과 문재인의 평양만남', stem=True)
tokens
# 명사 Token을 추출하기 1
result = []
for token in tokens:
if token[1] == "=Quiz!=":
result.append(token[0])
result
# 명사 Token을 추출하기 2
# List 객체 함수를 활용
result_list = [token[0] for token in tokens
if token[1] == "=Quiz!="]
result_list
" ".join(result)
# ! pip install wordcloud
# DDP 평양 남북정상회담 국민보고문 불러오기
with open('./data/pyongyang_fin.txt', 'r', encoding='utf-8') as f:
texts = f.read()
texts[:100]
# ! pip install matplotlib
%matplotlib inline
# Text Document를 별도의 전처리 없이 Word Cloud 모듈에 바로적용
from wordcloud import WordCloud
wcloud = WordCloud('./data/D2Coding.ttf',
relative_scaling = 0.1,
background_color='white').generate( =Quiz!= )
import matplotlib.pyplot as plt
plt.figure(figsize=(12,12))
plt.imshow(wcloud, interpolation='bilinear')
plt.axis("off")
# Token 빈도결과값
from nltk import FreqDist
from nltk.tokenize import word_tokenize
import pandas as pd
tokens = word_tokenize(texts)
freqtxt = pd.Series(dict(FreqDist(tokens))).sort_values(ascending=False)
freqtxt[:10]
# 원본 Text 내용 살펴보기
texts[:300]
# Twitter 모듈을 활용하여 명사만 추출
tokens = twitter.pos(texts, stem=True)
tokens_noun = [token[0] for token in tokens
if token[1] == "=Quiz!="]
texts_noun = " ".join(tokens_noun)
texts_noun[:300]
%matplotlib inline
wcloud = WordCloud('./data/D2Coding.ttf',
relative_scaling = 0.1,
background_color = "white").generate( =Quiz!= )
plt.figure(figsize=(12,12))
plt.imshow(wcloud, interpolation='bilinear')
plt.axis("off")
# Token 빈도결과값
tokens = word_tokenize(texts_noun)
freqtxt = pd.Series(dict(FreqDist(tokens))).sort_values(ascending=False)
freqtxt[:20]