# 영문 내용을 소문자로 전처리
texts = 'I like such a Wonderful Snow Ice Cream'
texts = texts.lower()
texts
# text를 token으로 변환
from nltk import word_tokenize
tokens = word_tokenize(texts)
tokens
# import nltk
# nltk.download('stopwords')
# Stopwords 사용가능한 언어목록
from nltk.corpus import stopwords
stopwords.ensure_loaded
stopwords.__dict__.get('_fileids')
from nltk.corpus import stopwords
stopwords.words("english")[::18]
tokens = [word for word in tokens
if word not in stopwords.words("english")]
print(tokens)
# 한글 텍스트자료 불러오기
f = open('../data/stopword_kr.txt', 'r', encoding='utf-8')
s = f.read()
f.close()
stop_words = [ txt.split('\t')[:3] for txt in s.split('\n') ]
stop_words[:10]