#!/usr/bin/env python # coding: utf-8 # # 실습하기 # In[1]: # "\t \n"를 기준으로 작업을 진행하기 # In[2]: with open('data/war_of_flower.txt', 'r') as f: text = f.readlines() # In[3]: import re result, tokenizer = [], re.compile(r"^\d+") for t in text: if t.replace(" ","").find('곽철용') != -1: if "".join(tokenizer.findall(t)): pass elif t.replace(" ","").find(':') != -1: result.append(" ".join(t.split(":")[1:])) else: result.append(t.replace('\t', '')) # In[4]: from konlpy.tag import Mecab result = " ".join(result) token_kcw = [_ for _ in Mecab().nouns(result) if len(_) >2] token_kcw_unique = list(set(token_kcw)) len(token_kcw_unique) # In[5]: "// ".join(token_kcw_unique) # In[7]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt from wordcloud import WordCloud font_file = "data/D2Coding.ttf" wcloud = WordCloud(font_file, relative_scaling=0, collocations=False).generate(" ".join(token_kcw)) plt.figure(figsize=(12,12)) plt.imshow(wcloud) plt.axis("off") # In[8]: from konlpy.tag import Mecab result = " ".join(text) tokens = [_ for _ in Mecab().nouns(result) if len(_) >2] len(tokens) # In[9]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt from wordcloud import WordCloud font_file = "data/D2Coding.ttf" wcloud = WordCloud(font_file, relative_scaling=0, collocations=False).generate(" ".join(tokens)) plt.figure(figsize=(12,12)) plt.imshow(wcloud) plt.axis("off") # In[ ]: