In [ ]:

from sklearn.feature_extraction.text import CountVectorizer
import nltk
import numpy as np
import pandas as pd
import re

nltk.download('stopwords')

In [3]:

corpus = ['The sky is blue and beautiful.',
          'Love this blue and beautiful sky!',
          'The quick brown fox jumps over the lazy dog.',
          "A king's breakfast has sausages, ham, bacon, eggs, toast and beans",
          'I love green eggs, ham, sausages and bacon!',
          'The brown fox is quick and the blue dog is lazy!',
          'The sky is very blue and the sky is very beautiful today',
          'The dog is lazy but the brown fox is quick!'    
]

labels = ['weather', 'weather', 'animals', 'food', 'food', 'animals', 'weather', 'animals']

In [8]:

corpus = np.array(corpus)
df = pd.DataFrame({"text": corpus, "label": labels})

In [9]:

df.head()

Out[9]:

	text	label
0	The sky is blue and beautiful.	weather
1	Love this blue and beautiful sky!	weather
2	The quick brown fox jumps over the lazy dog.	animals
3	A king's breakfast has sausages, ham, bacon, e...	food
4	I love green eggs, ham, sausages and bacon!	food

In [16]:

wpt = nltk.WordPunctTokenizer()
stop_words = nltk.corpus.stopwords.words('english')
wpt.tokenize(corpus[0])

Out[16]:

['The', 'sky', 'is', 'blue', 'and', 'beautiful', '.']

In [19]:

def preprocess(doc):
  doc = doc.lower().strip()
  tokens = wpt.tokenize(doc)
  tok = [token for token in tokens if token not in stop_words]

  doc = ' '.join(tok)
  return doc

In [26]:

normalize_corpus = np.vectorize(preprocess)

In [28]:

norm_corp = normalize_corpus(corpus)

In [30]:

norm_corp

Out[30]:

array(['sky blue beautiful .', 'love blue beautiful sky !',
       'quick brown fox jumps lazy dog .',
       "king ' breakfast sausages , ham , bacon , eggs , toast beans",
       'love green eggs , ham , sausages bacon !',
       'brown fox quick blue dog lazy !', 'sky blue sky beautiful today',
       'dog lazy brown fox quick !'], dtype='<U60')

Vectorizing text¶

In [41]:

cv = CountVectorizer(min_df=0., max_df=1.)
cv_matrix = cv.fit_transform(norm_corp).toarray()

cv_matrix

Out[41]:

array([[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0],
       [1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0],
       [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
       [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1],
       [0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0]])

In [44]:

vocab = cv.get_feature_names()
vocab

Out[44]:

['bacon',
 'beans',
 'beautiful',
 'blue',
 'breakfast',
 'brown',
 'dog',
 'eggs',
 'fox',
 'green',
 'ham',
 'jumps',
 'king',
 'lazy',
 'love',
 'quick',
 'sausages',
 'sky',
 'toast',
 'today']

In [46]:

pd.DataFrame(cv_matrix, columns=vocab)

Out[46]:

	bacon	beans	beautiful	blue	breakfast	brown	dog	eggs	fox	green	ham	jumps	king	lazy	love	quick	sausages	sky	toast	today
0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0
1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	0	0
2	0	0	0	0	0	1	1	0	1	0	0	1	0	1	0	1	0	0	0	0
3	1	1	0	0	1	0	0	1	0	0	1	0	1	0	0	0	1	0	1	0
4	1	0	0	0	0	0	0	1	0	1	1	0	0	0	1	0	1	0	0	0
5	0	0	0	1	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0
6	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	2	0	1
7	0	0	0	0	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0

In [52]:

cv.transform(['sky is good and beautiful beautiful today']).toarray()

Out[52]:

array([[0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]])

ngram_range --> if set to (1, 2)

--> creates uni-gram and bi-gram

--> if set to (2, 2) --> creates only bi-gram

--> if set to (1, 3) --> creates only uni-gram, bi-gram and tri-gram

In [53]:

cv = CountVectorizer(ngram_range=(2, 2))

In [65]:

cv_matrix = cv.fit_transform(norm_corp).toarray()

In [55]:

pd.DataFrame(cv_matrix, columns=cv.get_feature_names())

Out[55]:

	bacon eggs	beautiful sky	beautiful today	blue beautiful	blue dog	blue sky	breakfast sausages	brown fox	dog lazy	eggs ham	eggs toast	fox jumps	fox quick	green eggs	ham bacon	ham sausages	jumps lazy	king breakfast	lazy brown	lazy dog	love blue	love green	quick blue	quick brown	sausages bacon	sausages ham	sky beautiful	sky blue	toast beans
0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0
1	0	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	1	0	0	0	1	0	0	0	0	1	0	0	1	0	0	0	1	0	0	0	0	0
3	1	0	0	0	0	0	1	0	0	0	1	0	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	1
4	0	0	0	0	0	0	0	0	0	1	0	0	0	1	0	1	0	0	0	0	0	1	0	0	1	0	0	0	0
5	0	0	0	0	1	0	0	1	1	0	0	0	1	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0
6	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0
7	0	0	0	0	0	0	0	1	1	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0

In [56]:

cv1 = CountVectorizer(ngram_range=(1, 3))
cv1_matrix = cv1.fit_transform(norm_corp).toarray()
pd.DataFrame(cv1_matrix, columns=cv1.get_feature_names())

Out[56]:

	bacon	bacon eggs	bacon eggs toast	beans	beautiful	beautiful sky	beautiful today	blue	blue beautiful	blue beautiful sky	blue dog	blue dog lazy	blue sky	blue sky beautiful	breakfast	breakfast sausages	breakfast sausages ham	brown	brown fox	brown fox jumps	brown fox quick	dog	dog lazy	dog lazy brown	eggs	eggs ham	eggs ham sausages	eggs toast	eggs toast beans	fox	fox jumps	fox jumps lazy	fox quick	fox quick blue	green	green eggs	green eggs ham	ham	ham bacon	ham bacon eggs	ham sausages	ham sausages bacon	jumps	jumps lazy	jumps lazy dog	king	king breakfast	king breakfast sausages	lazy	lazy brown	lazy brown fox	lazy dog	love	love blue	love blue beautiful	love green	love green eggs	quick	quick blue	quick blue dog	quick brown	quick brown fox	sausages	sausages bacon	sausages ham	sausages ham bacon	sky	sky beautiful	sky beautiful today	sky blue	sky blue beautiful	sky blue sky	toast	toast beans	today
0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0
1	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	1	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	1	0	0	1	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0
3	1	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	1	1	0	0	0	0	0	0	1	1	0
4	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	1	1	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
6	0	0	0	0	1	0	1	1	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	2	1	1	1	0	1	0	0	1
7	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	1	1	1	1	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0

Tfidf(Term frequency and Inverse document frequency)¶

In [58]:

from sklearn.feature_extraction.text import TfidfVectorizer

Creates vector based on the frequency and inverse document frequency value of ech words and displays the words based on the value or threshold frequency passed to the min_df and max_df value

In [59]:

tfidf = TfidfVectorizer()

In [61]:

tf_matrix = tfidf.fit_transform(norm_corp).toarray()
tf_matrix

Out[61]:

array([[0.        , 0.        , 0.6009782 , 0.52692542, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.6009782 , 0.        , 0.        ],
       [0.        , 0.        , 0.49316188, 0.43239428, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.57150495,
        0.        , 0.        , 0.49316188, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.38036238, 0.38036238, 0.        , 0.38036238, 0.        ,
        0.        , 0.52594895, 0.        , 0.38036238, 0.        ,
        0.38036238, 0.        , 0.        , 0.        , 0.        ],
       [0.32116401, 0.38321492, 0.        , 0.        , 0.38321492,
        0.        , 0.        , 0.32116401, 0.        , 0.        ,
        0.32116401, 0.        , 0.38321492, 0.        , 0.        ,
        0.        , 0.32116401, 0.        , 0.38321492, 0.        ],
       [0.39455357, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.39455357, 0.        , 0.47078381,
        0.39455357, 0.        , 0.        , 0.        , 0.39455357,
        0.        , 0.39455357, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.3650479 , 0.        ,
        0.41635082, 0.41635082, 0.        , 0.41635082, 0.        ,
        0.        , 0.        , 0.        , 0.41635082, 0.        ,
        0.41635082, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.36082605, 0.31636491, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.72165209, 0.        , 0.49893493],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.4472136 , 0.4472136 , 0.        , 0.4472136 , 0.        ,
        0.        , 0.        , 0.        , 0.4472136 , 0.        ,
        0.4472136 , 0.        , 0.        , 0.        , 0.        ]])

In [63]:

pd.DataFrame(tf_matrix, columns=tfidf.get_feature_names())

Out[63]:

	bacon	beans	beautiful	blue	breakfast	brown	dog	eggs	fox	green	ham	jumps	king	lazy	love	quick	sausages	sky	toast	today
0	0.000000	0.000000	0.600978	0.526925	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.600978	0.000000	0.000000
1	0.000000	0.000000	0.493162	0.432394	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.571505	0.000000	0.000000	0.493162	0.000000	0.000000
2	0.000000	0.000000	0.000000	0.000000	0.000000	0.380362	0.380362	0.000000	0.380362	0.000000	0.000000	0.525949	0.000000	0.380362	0.000000	0.380362	0.000000	0.000000	0.000000	0.000000
3	0.321164	0.383215	0.000000	0.000000	0.383215	0.000000	0.000000	0.321164	0.000000	0.000000	0.321164	0.000000	0.383215	0.000000	0.000000	0.000000	0.321164	0.000000	0.383215	0.000000
4	0.394554	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.394554	0.000000	0.470784	0.394554	0.000000	0.000000	0.000000	0.394554	0.000000	0.394554	0.000000	0.000000	0.000000
5	0.000000	0.000000	0.000000	0.365048	0.000000	0.416351	0.416351	0.000000	0.416351	0.000000	0.000000	0.000000	0.000000	0.416351	0.000000	0.416351	0.000000	0.000000	0.000000	0.000000
6	0.000000	0.000000	0.360826	0.316365	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.721652	0.000000	0.498935
7	0.000000	0.000000	0.000000	0.000000	0.000000	0.447214	0.447214	0.000000	0.447214	0.000000	0.000000	0.000000	0.000000	0.447214	0.000000	0.447214	0.000000	0.000000	0.000000	0.000000

In [67]:

len(tfidf.get_feature_names()), len(cv.get_feature_names())

Out[67]:

(20, 29)

In [ ]:

	bacon	beans	beautiful	blue	breakfast	brown	dog	eggs	fox	green	ham	jumps	king	lazy	love	quick	sausages	sky	toast	today
0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0
1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	0	0
2	0	0	0	0	0	1	1	0	1	0	0	1	0	1	0	1	0	0	0	0
3	1	1	0	0	1	0	0	1	0	0	1	0	1	0	0	0	1	0	1	0
4	1	0	0	0	0	0	0	1	0	1	1	0	0	0	1	0	1	0	0	0
5	0	0	0	1	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0
6	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	2	0	1
7	0	0	0	0	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0

	bacon eggs	beautiful sky	beautiful today	blue beautiful	blue dog	blue sky	breakfast sausages	brown fox	dog lazy	eggs ham	eggs toast	fox jumps	fox quick	green eggs	ham bacon	ham sausages	jumps lazy	king breakfast	lazy brown	lazy dog	love blue	love green	quick blue	quick brown	sausages bacon	sausages ham	sky beautiful	sky blue	toast beans
0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0
1	0	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	1	0	0	0	1	0	0	0	0	1	0	0	1	0	0	0	1	0	0	0	0	0
3	1	0	0	0	0	0	1	0	0	0	1	0	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	1
4	0	0	0	0	0	0	0	0	0	1	0	0	0	1	0	1	0	0	0	0	0	1	0	0	1	0	0	0	0
5	0	0	0	0	1	0	0	1	1	0	0	0	1	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0
6	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0
7	0	0	0	0	0	0	0	1	1	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0

	bacon	bacon eggs	bacon eggs toast	beans	beautiful	beautiful sky	beautiful today	blue	blue beautiful	blue beautiful sky	blue dog	blue dog lazy	blue sky	blue sky beautiful	breakfast	breakfast sausages	breakfast sausages ham	brown	brown fox	brown fox jumps	brown fox quick	dog	dog lazy	dog lazy brown	eggs	eggs ham	eggs ham sausages	eggs toast	eggs toast beans	fox	fox jumps	fox jumps lazy	fox quick	fox quick blue	green	green eggs	green eggs ham	ham	ham bacon	ham bacon eggs	ham sausages	ham sausages bacon	jumps	jumps lazy	jumps lazy dog	king	king breakfast	king breakfast sausages	lazy	lazy brown	lazy brown fox	lazy dog	love	love blue	love blue beautiful	love green	love green eggs	quick	quick blue	quick blue dog	quick brown	quick brown fox	sausages	sausages bacon	sausages ham	sausages ham bacon	sky	sky beautiful	sky beautiful today	sky blue	sky blue beautiful	sky blue sky	toast	toast beans	today
0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0
1	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	1	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	1	0	0	1	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0
3	1	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	1	1	0	0	0	0	0	0	1	1	0
4	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	1	1	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
6	0	0	0	0	1	0	1	1	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	2	1	1	1	0	1	0	0	1
7	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	1	1	1	1	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0

	bacon	beans	beautiful	blue	breakfast	brown	dog	eggs	fox	green	ham	jumps	king	lazy	love	quick	sausages	sky	toast	today
0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0
1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	0	0
2	0	0	0	0	0	1	1	0	1	0	0	1	0	1	0	1	0	0	0	0
3	1	1	0	0	1	0	0	1	0	0	1	0	1	0	0	0	1	0	1	0
4	1	0	0	0	0	0	0	1	0	1	1	0	0	0	1	0	1	0	0	0
5	0	0	0	1	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0
6	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	2	0	1
7	0	0	0	0	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0

	bacon eggs	beautiful sky	beautiful today	blue beautiful	blue dog	blue sky	breakfast sausages	brown fox	dog lazy	eggs ham	eggs toast	fox jumps	fox quick	green eggs	ham bacon	ham sausages	jumps lazy	king breakfast	lazy brown	lazy dog	love blue	love green	quick blue	quick brown	sausages bacon	sausages ham	sky beautiful	sky blue	toast beans
0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0
1	0	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	1	0	0	0	1	0	0	0	0	1	0	0	1	0	0	0	1	0	0	0	0	0
3	1	0	0	0	0	0	1	0	0	0	1	0	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	1
4	0	0	0	0	0	0	0	0	0	1	0	0	0	1	0	1	0	0	0	0	0	1	0	0	1	0	0	0	0
5	0	0	0	0	1	0	0	1	1	0	0	0	1	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0
6	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0
7	0	0	0	0	0	0	0	1	1	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0

	bacon	bacon eggs	bacon eggs toast	beans	beautiful	beautiful sky	beautiful today	blue	blue beautiful	blue beautiful sky	blue dog	blue dog lazy	blue sky	blue sky beautiful	breakfast	breakfast sausages	breakfast sausages ham	brown	brown fox	brown fox jumps	brown fox quick	dog	dog lazy	dog lazy brown	eggs	eggs ham	eggs ham sausages	eggs toast	eggs toast beans	fox	fox jumps	fox jumps lazy	fox quick	fox quick blue	green	green eggs	green eggs ham	ham	ham bacon	ham bacon eggs	ham sausages	ham sausages bacon	jumps	jumps lazy	jumps lazy dog	king	king breakfast	king breakfast sausages	lazy	lazy brown	lazy brown fox	lazy dog	love	love blue	love blue beautiful	love green	love green eggs	quick	quick blue	quick blue dog	quick brown	quick brown fox	sausages	sausages bacon	sausages ham	sausages ham bacon	sky	sky beautiful	sky beautiful today	sky blue	sky blue beautiful	sky blue sky	toast	toast beans	today
0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0
1	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	1	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	1	0	0	1	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0
3	1	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	1	1	0	0	0	0	0	0	1	1	0
4	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	1	1	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
6	0	0	0	0	1	0	1	1	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	2	1	1	1	0	1	0	0	1
7	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	1	1	1	1	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0

	bacon	beans	beautiful	blue	breakfast	brown	dog	eggs	fox	green	ham	jumps	king	lazy	love	quick	sausages	sky	toast	today
0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0
1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	0	0
2	0	0	0	0	0	1	1	0	1	0	0	1	0	1	0	1	0	0	0	0
3	1	1	0	0	1	0	0	1	0	0	1	0	1	0	0	0	1	0	1	0
4	1	0	0	0	0	0	0	1	0	1	1	0	0	0	1	0	1	0	0	0
5	0	0	0	1	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0
6	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	2	0	1
7	0	0	0	0	0	1	1	0	1	0	0	0	0	1	0	1	0	0	0	0

	bacon eggs	beautiful sky	beautiful today	blue beautiful	blue dog	blue sky	breakfast sausages	brown fox	dog lazy	eggs ham	eggs toast	fox jumps	fox quick	green eggs	ham bacon	ham sausages	jumps lazy	king breakfast	lazy brown	lazy dog	love blue	love green	quick blue	quick brown	sausages bacon	sausages ham	sky beautiful	sky blue	toast beans
0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0
1	0	1	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	1	0	0	0	1	0	0	0	0	1	0	0	1	0	0	0	1	0	0	0	0	0
3	1	0	0	0	0	0	1	0	0	0	1	0	0	0	1	0	0	1	0	0	0	0	0	0	0	1	0	0	1
4	0	0	0	0	0	0	0	0	0	1	0	0	0	1	0	1	0	0	0	0	0	1	0	0	1	0	0	0	0
5	0	0	0	0	1	0	0	1	1	0	0	0	1	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0
6	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0
7	0	0	0	0	0	0	0	1	1	0	0	0	1	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0

	bacon	bacon eggs	bacon eggs toast	beans	beautiful	beautiful sky	beautiful today	blue	blue beautiful	blue beautiful sky	blue dog	blue dog lazy	blue sky	blue sky beautiful	breakfast	breakfast sausages	breakfast sausages ham	brown	brown fox	brown fox jumps	brown fox quick	dog	dog lazy	dog lazy brown	eggs	eggs ham	eggs ham sausages	eggs toast	eggs toast beans	fox	fox jumps	fox jumps lazy	fox quick	fox quick blue	green	green eggs	green eggs ham	ham	ham bacon	ham bacon eggs	ham sausages	ham sausages bacon	jumps	jumps lazy	jumps lazy dog	king	king breakfast	king breakfast sausages	lazy	lazy brown	lazy brown fox	lazy dog	love	love blue	love blue beautiful	love green	love green eggs	quick	quick blue	quick blue dog	quick brown	quick brown fox	sausages	sausages bacon	sausages ham	sausages ham bacon	sky	sky beautiful	sky beautiful today	sky blue	sky blue beautiful	sky blue sky	toast	toast beans	today
0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0
1	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0
2	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	1	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	1	0	0	1	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0
3	1	1	1	1	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	1	1	0	0	0	0	0	0	1	1	0
4	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	1	1	1	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0
5	0	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	1	1	0	1	1	1	0	0	0	0	0	0	1	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
6	0	0	0	0	1	0	1	1	0	0	0	0	1	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	2	1	1	1	0	1	0	0	1
7	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	0	1	1	1	1	0	0	0	0	0	1	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1	1	1	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0