from konlpy.tag import Okt
from collections import Counter
from wordcloud import WordCloud
import os
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
from PIL import Image # Pillow 패키지의 영상 핸들링 클래스.
import matplotlib
from IPython.display import set_matplotlib_formats
import nltk
from nltk.corpus import stopwords
%matplotlib qt
os.chdir(r'C:\Users\Gram\Desktop\네이버지식인카드불편사항\2. 네이버지식인 본문\6-2. 빈 본문에 제목을 삽입해 본문만 저장')
data = pd.read_excel("국민카드 설계사(완).xlsx")
okt = Okt()
# 새 데이터 넣을 빈 리스트 2개 생성
nanum_data=[] # word만
token_data=[] # tag(품사), word 모두
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp) #nanum_data: 품사 안나옴
temp2=okt.pos(line, join=False)
token_data.append(temp2) #token_data: 품사 나옴
#token_data에서 [Noun]만 뽑아 사용할 예정
token_data
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("국민" not in word)and("카드" not in word)and("설계사" not in word)and("설계" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 63), ('혜택', 35), ('추천', 21), ('사용', 20), ('보험', 11), ('티타늄', 11), ('할인', 11), ('탄탄', 11), ('신청', 10), ('답변', 9)]
#시각화
#from wordcloud import WordCloud
#import matplotlib.pyplot as plt
#import nltk
#from nltk.corpus import stopwords
#%matplotlib inline
#import matplotlib
#from IPython.display import set_matplotlib_formats
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("국민카드 체크카드(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("국민" not in word)and("카드" not in word)and("체크" not in word)and("체크카드" not in word)and("건가" not in word)and("지금" not in word)and("하나요" not in word)and("거" not in word)and("다시" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('결제', 64), ('사용', 49), ('돈', 41), ('신청', 36), ('신용', 30), ('한도', 25), ('소액', 24), ('발급', 24), ('사랑', 23), ('계좌', 23)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("국민카드 탄탄대로(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("국민" not in word)and("카드" not in word)and("탄탄대로" not in word)and("탄탄" not in word)and("대로" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 50), ('혜택', 21), ('티타늄', 18), ('쇼핑', 18), ('할인', 15), ('설계', 14), ('실적', 13), ('사용', 13), ('신청', 13), ('홈', 11)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("삼성카드_마일리지(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("삼성" not in word)and("카드" not in word)and("마일리지" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 29), ('적립', 16), ('앤', 16), ('사용', 12), ('혜택', 12), ('설계', 11), ('스카이', 11), ('패스', 11), ('플래티넘', 9), ('대한항공', 7)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("삼성카드_발급문의(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("삼성" not in word)and("카드" not in word)and("발급" not in word)and("문의" not in word)and("발급문의" not in word)and("시" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('혜택', 22), ('보험', 18), ('설계', 18), ('사용', 10), ('현재', 9), ('확인', 9), ('대출', 7), ('추천', 7), ('지금', 7), ('신규', 7)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("삼성카드_설계사(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("삼성" not in word)and("설계" not in word)and("카드" not in word)and("설계사" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 53), ('혜택', 31), ('추천', 16), ('사용', 16), ('지원', 13), ('크레딧', 8), ('문의', 8), ('보험', 8), ('마일리지', 8), ('상담', 7)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("삼성카드_탭탭오(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("삼성" not in word)and("카드" not in word)and("탭탭오" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 33), ('할인', 28), ('혜택', 25), ('설계', 18), ('결제', 15), ('실적', 15), ('사용', 9), ('이자', 7), ('현금', 6), ('지원', 6)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("신한카드_딥드림(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("신한" not in word)and("카드" not in word)and("뭐" not in word)and("딥드림" not in word)and("딥" not in word)and("드림" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 31), ('사용', 18), ('혜택', 17), ('포인트', 15), ('적립', 15), ('설계', 13), ('결제', 11), ('플래티넘', 9), ('금액', 6), ('조건', 5)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("신한카드_리볼빙(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("신한" not in word)and("카드" not in word)and("리볼빙" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('결제', 236), ('금액', 169), ('신청', 130), ('이자', 70), ('이월', 56), ('사용', 52), ('약정', 48), ('비율', 42), ('최소', 36), ('값', 36)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("신한카드_설계사(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("설계" not in word)and("신한" not in word)and("카드" not in word)and("설계사" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 31), ('혜택', 25), ('딥', 13), ('추천', 10), ('사용', 9), ('지원', 8), ('드림', 7), ('신규', 6), ('예정', 6), ('미스터', 5)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
data = pd.read_excel("카카오체크카드(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("카카오" not in word)and("카드" not in word)and("체크" not in word)and("체크카드" not in word)and("페이" not in word)and("카카오페이" not in word)and("뱅크" not in word)and("건가" not in word)and("거" not in word)and("하나요" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('결제', 314), ('발급', 268), ('계좌', 240), ('사용', 234), ('돈', 209), ('신청', 173), ('후불', 137), ('등록', 124), ('은행', 80), ('재발', 78)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("카카오페이카드 새마을금고(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("새마을금고" not in word)and("금고" not in word)and("카카오" not in word)and("카드" not in word)and("페이" not in word)and("새마을" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('결제', 45), ('계좌', 25), ('사용', 22), ('실적', 17), ('할인', 17), ('발급', 16), ('연결', 14), ('등록', 14), ('돈', 14), ('캐시', 13)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("카카오페이카드 신한카드(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("페이" not in word)and("카카오" not in word)and("카드" not in word)and("거" not in word)and("신한은행" not in word)and("신한" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 27), ('무지', 25), ('결제', 23), ('신청', 19), ('계좌', 19), ('사용', 17), ('돈', 12), ('등록', 11), ('적립', 9), ('기능', 8)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("카카오페이카드 어피치(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("카카오" not in word)and("카드" not in word)and("페이" not in word)and("어피치" not in word)and("어피" not in word)and("거" not in word)and("시" not in word)and("안" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('사용', 35), ('국민은행', 34), ('발급', 28), ('계좌', 27), ('국민', 21), ('신청', 18), ('돈', 12), ('농협', 11), ('결제', 11), ('연동', 10)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("현대카드_발급(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("현대" not in word)and("카드" not in word)and("발급" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('설계', 62), ('혜택', 59), ('사용', 53), ('보험', 43), ('조건', 31), ('제로', 31), ('가입', 28), ('추천', 25), ('현재', 25), ('재직', 24)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("현대카드_설계사(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("현대" not in word)and("카드" not in word)and("설계사" not in word)and("설계" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 156), ('혜택', 87), ('사용', 78), ('추천', 37), ('할인', 31), ('제로', 30), ('레드', 24), ('지원', 24), ('신규', 23), ('원금', 21)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색
data = pd.read_excel("현대카드_현금지원(완).xlsx")
okt = Okt()
nanum_data=[] # word만 나옴
token_data=[] # tag, word 모두 나옴
for line in data['content']:
temp=okt.morphs(line, stem=True)
nanum_data.append(temp)
temp2=okt.pos(line, join=False)
token_data.append(temp2)
# 불용어 Data가져오기
stop_words = pd.read_excel("korean_stop_words.xlsx")
stop_words = stop_words['불용어']
stop_words = list(stop_words)
# 본문에서 불용어 빼기
noun_list=[]
for sentence in token_data:
for word, tag in sentence :
if (tag in ['Noun']) and (word not in stop_words)and("현대" not in word)and("카드" not in word)and("현금지원" not in word)and("현금" not in word)and("지원" not in word)and("해주시" not in word)and("영사" not in word)and("시" not in word):
noun_list.append(word)
#단어 빈도
count = Counter(noun_list)
words = dict(count.most_common())
#TOP 10
import operator
top10=sorted(words.items(),key=operator.itemgetter(1), reverse=True)
top10[:10]
[('발급', 21), ('설계', 15), ('사용', 14), ('금지', 9), ('혜택', 7), ('제로', 7), ('신규', 7), ('처음', 5), ('현재', 5), ('추천', 4)]
matplotlib.rc('font',family = 'Malgun Gothic')
set_matplotlib_formats('retina') # 폰트 선명하게
matplotlib.rc('axes',unicode_minus = False) # 그래프 음수 수치 오류 방지
#워드클라우드 실행
wordcloud = WordCloud(background_color="black", font_path = 'C:/Windows/Fonts/malgun.ttf', colormap = "Set1", width=1500, height=1000).generate_from_frequencies(words)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
#colormap = "Set1" #일반색
#colormap = "Accent_r" #형광색