そこで → 単語(記号)の「ベクトル表現」
例:以下のデータセットを利用:
the quick brown fox jumped over the lazy dog
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import math
import os
import random
import zipfile
import numpy as np
from six.moves import urllib
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
# Step 1: Download the data.
url = 'http://mattmahoney.net/dc/'
def maybe_download(filename, expected_bytes):
"""Download a file if not present, and make sure it's the right size."""
if not os.path.exists(filename):
filename, _ = urllib.request.urlretrieve(url + filename, filename)
statinfo = os.stat(filename)
if statinfo.st_size == expected_bytes:
print('Found and verified', filename)
else:
print(statinfo.st_size)
raise Exception(
'Failed to verify ' + filename + '. Can you get to it with a browser?')
return filename
filename = maybe_download('text8.zip', 31344016)
Found and verified text8.zip
# Read the data into a list of strings.
def read_data(filename):
"""Extract the first file enclosed in a zip file as a list of words"""
with zipfile.ZipFile(filename) as f:
data = tf.compat.as_str(f.read(f.namelist()[0])).split()
return data
words = read_data(filename)
print('Data size', len(words))
Data size 17005207
# Step 2: Build the dictionary and replace rare words with UNK token.
vocabulary_size = 50000
def build_dataset(words):
count = [['UNK', -1]]
count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
dictionary = dict()
for word, _ in count:
dictionary[word] = len(dictionary)
data = list()
unk_count = 0
for word in words:
if word in dictionary:
index = dictionary[word]
else:
index = 0 # dictionary['UNK']
unk_count += 1
data.append(index)
count[0][1] = unk_count
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
return data, count, dictionary, reverse_dictionary
data, count, dictionary, reverse_dictionary = build_dataset(words)
del words # Hint to reduce memory.
print('Most common words (+UNK)', count[:5])
print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])
data_index = 0
Most common words (+UNK) [['UNK', 418391], ('the', 1061396), ('of', 593677), ('and', 416629), ('one', 411764)] Sample data [5241, 3082, 12, 6, 195, 2, 3135, 46, 59, 156] ['anarchism', 'originated', 'as', 'a', 'term', 'of', 'abuse', 'first', 'used', 'against']
# Step 3: Function to generate a training batch for the skip-gram model.
def generate_batch(batch_size, num_skips, skip_window):
global data_index
assert batch_size % num_skips == 0
assert num_skips <= 2 * skip_window
batch = np.ndarray(shape=(batch_size), dtype=np.int32)
labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
span = 2 * skip_window + 1 # [ skip_window target skip_window ]
buffer = collections.deque(maxlen=span)
for _ in range(span):
buffer.append(data[data_index])
data_index = (data_index + 1) % len(data)
for i in range(batch_size // num_skips):
target = skip_window # target label at the center of the buffer
targets_to_avoid = [ skip_window ]
for j in range(num_skips):
while target in targets_to_avoid:
target = random.randint(0, span - 1)
targets_to_avoid.append(target)
batch[i * num_skips + j] = buffer[skip_window]
labels[i * num_skips + j, 0] = buffer[target]
buffer.append(data[data_index])
data_index = (data_index + 1) % len(data)
return batch, labels
batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
for i in range(8):
print(batch[i], reverse_dictionary[batch[i]],
'->', labels[i, 0], reverse_dictionary[labels[i, 0]])
3082 originated -> 12 as 3082 originated -> 5241 anarchism 12 as -> 3082 originated 12 as -> 6 a 6 a -> 195 term 6 a -> 12 as 195 term -> 6 a 195 term -> 2 of
# Step 4: Build and train a skip-gram model.
batch_size = 128
embedding_size = 128 # Dimension of the embedding vector.
skip_window = 1 # How many words to consider left and right.
num_skips = 2 # How many times to reuse an input to generate a label.
# We pick a random validation set to sample nearest neighbors. Here we limit the
# validation samples to the words that have a low numeric ID, which by
# construction are also the most frequent.
valid_size = 16 # Random set of words to evaluate similarity on.
valid_window = 100 # Only pick dev samples in the head of the distribution.
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
num_sampled = 64 # Number of negative examples to sample.
graph = tf.Graph()
with graph.as_default():
# Input data.
train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
# Ops and variables pinned to the CPU because of missing GPU implementation
with tf.device('/cpu:0'):
# Look up embeddings for inputs.
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
# Construct the variables for the NCE loss
nce_weights = tf.Variable(
tf.truncated_normal([vocabulary_size, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))
nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
# Compute the average NCE loss for the batch.
# tf.nce_loss automatically draws a new sample of the negative labels each
# time we evaluate the loss.
loss = tf.reduce_mean(
tf.nn.nce_loss(nce_weights, nce_biases, embed, train_labels,
num_sampled, vocabulary_size))
# Construct the SGD optimizer using a learning rate of 1.0.
optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
# Compute the cosine similarity between minibatch examples and all embeddings.
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
normalized_embeddings = embeddings / norm
valid_embeddings = tf.nn.embedding_lookup(
normalized_embeddings, valid_dataset)
similarity = tf.matmul(
valid_embeddings, normalized_embeddings, transpose_b=True)
# Add variable initializer.
init = tf.initialize_all_variables()
# Step 5: Begin training.
num_steps = 100001
with tf.Session(graph=graph) as session:
# We must initialize all variables before we use them.
init.run()
print("Initialized")
average_loss = 0
for step in xrange(num_steps):
batch_inputs, batch_labels = generate_batch(
batch_size, num_skips, skip_window)
feed_dict = {train_inputs : batch_inputs, train_labels : batch_labels}
# We perform one update step by evaluating the optimizer op (including it
# in the list of returned values for session.run()
_, loss_val = session.run([optimizer, loss], feed_dict=feed_dict)
average_loss += loss_val
if step % 2000 == 0:
if step > 0:
average_loss /= 2000
# The average loss is an estimate of the loss over the last 2000 batches.
print("Average loss at step ", step, ": ", average_loss)
average_loss = 0
# Note that this is expensive (~20% slowdown if computed every 500 steps)
if step % 10000 == 0:
sim = similarity.eval()
for i in xrange(valid_size):
valid_word = reverse_dictionary[valid_examples[i]]
top_k = 8 # number of nearest neighbors
nearest = (-sim[i, :]).argsort()[1:top_k+1]
log_str = "Nearest to %s:" % valid_word
for k in xrange(top_k):
close_word = reverse_dictionary[nearest[k]]
log_str = "%s %s," % (log_str, close_word)
print(log_str)
final_embeddings = normalized_embeddings.eval()
Initialized Average loss at step 0 : 293.645874023 Nearest to had: append, revolutionaries, joaquim, gg, collaborators, animators, preaching, tz, Nearest to between: commercialize, chlorine, meritocratic, luoyang, ghetto, asu, hanging, mak, Nearest to one: freya, benny, franchise, boyz, speght, piaget, finds, punctured, Nearest to use: maligned, attraction, liu, beaten, zech, shaking, sinai, apprehended, Nearest to state: fulfillment, shield, provincias, fairy, scarecrow, brythonic, deflect, discoveries, Nearest to over: finale, due, van, meltzer, specs, revulsion, tedious, dialect, Nearest to if: skimmed, hotline, gentile, helpful, celibacy, shops, longing, subgenus, Nearest to all: lenient, repudiate, napalm, buckets, fantastic, mashal, assists, mountain, Nearest to by: aleuts, flavours, patton, sicily, strung, gloria, hijra, remote, Nearest to first: botany, mahler, disgraceful, winter, landis, personification, pstn, meiji, Nearest to would: haworth, contesting, palladium, kierkegaard, pleading, prefixed, perl, mutual, Nearest to will: embarks, checklist, arsenate, meant, harriet, instrumentation, shouting, attain, Nearest to seven: wheel, transfection, libation, agrippa, pravda, eccentricities, multidisciplinary, flatten, Nearest to years: bituminous, scriptural, bodybuilder, musicals, chef, outgoing, cattle, desi, Nearest to these: inputs, uttar, appoints, trackless, dollfuss, aforementioned, diamonds, compatriots, Nearest to the: remedy, gentleman, dactyl, equitable, reims, throats, fedora, devoid, Average loss at step 2000 : 113.844122701 Average loss at step 4000 : 52.9316125462 Average loss at step 6000 : 33.4275434546 Average loss at step 8000 : 22.976399992 Average loss at step 10000 : 17.858336467 Nearest to had: collaborators, carnival, occurring, victoriae, revolutionaries, atemi, inline, address, Nearest to between: chlorine, tourists, viennese, fins, ice, denmark, hanging, austin, Nearest to one: reginae, austin, two, altenberg, victoriae, mathbf, analogue, gland, Nearest to use: beaten, victoriae, attraction, reginae, altenberg, case, sinai, ep, Nearest to state: shield, austin, fulfillment, mathbf, discoveries, gland, cow, gollancz, Nearest to over: cl, analogue, dialect, due, filing, van, anthem, foundation, Nearest to if: points, asterism, nautical, dialects, users, advocates, inherited, revolution, Nearest to all: napalm, mathbf, austin, generally, afraid, aberdeen, loss, mountain, Nearest to by: and, in, vs, on, is, homes, victoriae, with, Nearest to first: botany, atheist, winter, boroughs, victoriae, rod, myths, by, Nearest to would: citadel, mara, spirits, truly, renewed, albanian, gollancz, networks, Nearest to will: meant, solstice, ordered, predominant, requirements, arsenate, industrial, squash, Nearest to seven: austin, victoriae, nine, cl, zero, painter, sadler, wheel, Nearest to years: aberdeen, cl, chef, cattle, nine, altenberg, anal, long, Nearest to these: appoints, inputs, the, abba, zeus, dollfuss, diamonds, aforementioned, Nearest to the: a, vs, this, and, victoriae, cl, analogue, his, Average loss at step 12000 : 14.2383399565 Average loss at step 14000 : 11.4926679152 Average loss at step 16000 : 9.83872434783 Average loss at step 18000 : 8.6584512502 Average loss at step 20000 : 7.79948694158 Nearest to had: was, is, have, were, by, are, allegory, collaborators, Nearest to between: in, asu, chlorine, mathew, cluniac, of, inherit, for, Nearest to one: two, dasyprocta, agouti, reginae, four, three, victoriae, eight, Nearest to use: beaten, agouti, attraction, dasyprocta, altenberg, victoriae, case, siege, Nearest to state: fulfillment, shield, mathbf, java, touchdowns, ho, circ, austin, Nearest to over: cl, including, dialect, anthem, analogue, filing, dinosaurs, finale, Nearest to if: but, points, asterism, is, advocates, shops, inherited, resembling, Nearest to all: dasyprocta, afraid, napalm, transverse, mathbf, lenient, polyhedra, semicircular, Nearest to by: in, with, and, is, for, was, barbed, from, Nearest to first: botany, atheist, winter, rod, brooklyn, by, boroughs, victoriae, Nearest to would: mara, prefixed, may, will, perl, can, which, citadel, Nearest to will: agouti, meant, ordered, squash, requirements, solstice, solitary, would, Nearest to seven: nine, zero, eight, six, three, four, dasyprocta, five, Nearest to years: bituminous, aberdeen, chef, bodybuilder, cl, scriptural, five, cattle, Nearest to these: the, and, appoints, aforementioned, aragon, diamonds, inputs, dasyprocta, Nearest to the: a, his, their, its, this, agouti, dasyprocta, one, Average loss at step 22000 : 7.18565776443 Average loss at step 24000 : 7.04387247622 Average loss at step 26000 : 6.66978120804 Average loss at step 28000 : 6.26890629232 Average loss at step 30000 : 6.20609010398 Nearest to had: have, was, is, were, has, akita, are, by, Nearest to between: in, with, lawrencium, asu, inherit, of, mathew, by, Nearest to one: two, four, six, three, seven, five, dasyprocta, akita, Nearest to use: beaten, agouti, maligned, attraction, dasyprocta, sinai, siege, altenberg, Nearest to state: abitibi, touchdowns, mathbf, fulfillment, austin, shield, gland, varied, Nearest to over: including, agra, akita, cl, anthem, filing, dialect, dinosaurs, Nearest to if: but, abitibi, points, unsuitable, inherited, asterism, although, shops, Nearest to all: akita, dasyprocta, trinomial, afraid, mathbf, napalm, some, branson, Nearest to by: with, in, from, and, was, for, on, as, Nearest to first: abitibi, brooklyn, rod, atheist, botany, myths, victoriae, winter, Nearest to would: will, may, can, could, to, they, mara, prefixed, Nearest to will: would, agouti, can, meant, squash, solitary, could, ordered, Nearest to seven: nine, eight, six, four, three, five, zero, two, Nearest to years: bituminous, chef, aberdeen, scriptural, akita, bodybuilder, cl, outgoing, Nearest to these: the, many, some, appoints, aforementioned, aragon, dasyprocta, diamonds, Nearest to the: their, its, a, his, this, dasyprocta, agouti, akita, Average loss at step 32000 : 5.86434604466 Average loss at step 34000 : 5.86494594765 Average loss at step 36000 : 5.74090498745 Average loss at step 38000 : 5.28396542037 Average loss at step 40000 : 5.45378877831 Nearest to had: have, has, was, were, are, is, akita, by, Nearest to between: with, in, asu, lawrencium, inherit, by, mathew, ice, Nearest to one: two, four, six, three, seven, agouti, dasyprocta, akita, Nearest to use: beaten, dasyprocta, agouti, altenberg, sinai, maligned, siege, aba, Nearest to state: abitibi, mathbf, hus, touchdowns, fulfillment, varied, provincias, ho, Nearest to over: agra, including, akita, cl, finale, filing, anthem, abitibi, Nearest to if: but, abitibi, points, in, resembling, although, unsuitable, inherited, Nearest to all: akita, dasyprocta, trinomial, afraid, some, mathbf, polyhedra, four, Nearest to by: with, was, barbed, from, were, vs, be, for, Nearest to first: abitibi, brooklyn, rod, atheist, next, botany, victoriae, stitch, Nearest to would: will, may, can, could, governorates, they, prefixed, palladium, Nearest to will: would, can, could, agouti, squash, may, should, solitary, Nearest to seven: eight, six, four, nine, five, three, zero, two, Nearest to years: bituminous, chef, aberdeen, akita, times, cl, scriptural, bodybuilder, Nearest to these: many, some, aforementioned, appoints, aragon, their, the, two, Nearest to the: their, a, this, his, its, agouti, victoriae, dasyprocta, Average loss at step 42000 : 5.33003295577 Average loss at step 44000 : 5.2821497674 Average loss at step 46000 : 5.24431006336 Average loss at step 48000 : 5.03225419652 Average loss at step 50000 : 5.15144388342 Nearest to had: has, have, was, were, is, akita, are, by, Nearest to between: with, in, asu, nine, inherit, mathew, seven, lawrencium, Nearest to one: two, six, four, three, eight, five, seven, dasyprocta, Nearest to use: beaten, dasyprocta, agouti, altenberg, sinai, aba, thibetanus, case, Nearest to state: abitibi, brythonic, transports, mathbf, touchdowns, hus, fulfillment, provincias, Nearest to over: agra, including, akita, filing, anthem, cl, finale, dinosaurs, Nearest to if: but, abitibi, points, although, magazine, unsuitable, inherited, when, Nearest to all: akita, dasyprocta, some, afraid, trinomial, polyhedra, amphibians, branson, Nearest to by: was, with, be, barbed, as, from, thibetanus, for, Nearest to first: next, abitibi, brooklyn, stitch, injuries, barracks, victoriae, rod, Nearest to would: will, may, can, could, governorates, prefixed, to, they, Nearest to will: would, can, could, may, should, agouti, squash, solitary, Nearest to seven: eight, six, four, three, five, nine, zero, dasyprocta, Nearest to years: bituminous, times, chef, aberdeen, scriptural, akita, cl, bodybuilder, Nearest to these: many, some, the, aforementioned, aragon, their, appoints, two, Nearest to the: their, its, his, recitative, a, thibetanus, this, dasyprocta, Average loss at step 52000 : 5.20123040104 Average loss at step 54000 : 5.1286575551 Average loss at step 56000 : 5.08134980881 Average loss at step 58000 : 5.12050155807 Average loss at step 60000 : 4.94976982492 Nearest to had: has, have, was, were, ssbn, akita, hogs, thibetanus, Nearest to between: with, in, asu, from, ice, mathew, inherit, against, Nearest to one: two, six, three, four, seven, five, eight, dasyprocta, Nearest to use: beaten, sinai, dasyprocta, agouti, altenberg, case, aba, siege, Nearest to state: abitibi, callithrix, cebus, brythonic, transports, mathbf, hus, provincias, Nearest to over: agra, including, when, akita, filing, ssbn, anthem, finale, Nearest to if: but, although, abitibi, when, points, where, cebus, magazine, Nearest to all: some, akita, dasyprocta, these, trinomial, amphibians, afraid, polyhedra, Nearest to by: was, with, be, under, thibetanus, barbed, as, aleuts, Nearest to first: next, cebus, brooklyn, abitibi, injuries, rod, victoriae, barracks, Nearest to would: will, can, may, could, must, to, they, prefixed, Nearest to will: would, can, could, may, should, agouti, squash, solitary, Nearest to seven: eight, six, five, four, three, nine, zero, one, Nearest to years: times, bituminous, chef, aberdeen, four, akita, months, cl, Nearest to these: many, some, all, aforementioned, their, such, the, meg, Nearest to the: their, thibetanus, its, callithrix, a, recitative, agouti, this, Average loss at step 62000 : 4.76721232343 Average loss at step 64000 : 4.78901305127 Average loss at step 66000 : 4.96960300577 Average loss at step 68000 : 4.91815975869 Average loss at step 70000 : 4.76632793283 Nearest to had: has, have, was, were, ssbn, is, by, akita, Nearest to between: with, in, asu, within, nine, from, against, ice, Nearest to one: two, six, four, seven, three, five, eight, dasyprocta, Nearest to use: beaten, dasyprocta, case, agouti, altenberg, aba, tamarin, sinai, Nearest to state: abitibi, callithrix, transports, hus, brythonic, provincias, touchdowns, upanija, Nearest to over: when, agra, filing, including, akita, ssbn, anthem, dinosaurs, Nearest to if: but, although, when, abitibi, while, points, where, clo, Nearest to all: some, many, akita, dasyprocta, these, amphibians, afraid, polyhedra, Nearest to by: was, be, with, barbed, as, had, in, thibetanus, Nearest to first: next, cebus, injuries, brooklyn, abitibi, sideways, same, inhospitable, Nearest to would: will, can, may, could, must, to, they, should, Nearest to will: would, can, could, may, should, agouti, must, to, Nearest to seven: six, eight, four, five, three, nine, zero, one, Nearest to years: times, bituminous, aberdeen, five, four, chef, months, days, Nearest to these: many, some, the, all, such, their, several, clo, Nearest to the: their, its, this, thibetanus, callithrix, his, a, recitative, Average loss at step 72000 : 4.79691971648 Average loss at step 74000 : 4.76903061247 Average loss at step 76000 : 4.87939820313 Average loss at step 78000 : 4.79787338293 Average loss at step 80000 : 4.79895290601 Nearest to had: has, have, was, were, ssbn, is, thibetanus, akita, Nearest to between: with, in, within, against, asu, from, mathew, tyndall, Nearest to one: six, two, seven, three, four, five, microcebus, dasyprocta, Nearest to use: beaten, dasyprocta, aba, agouti, case, altenberg, tamarin, akita, Nearest to state: abitibi, callithrix, hus, upanija, provincias, law, cebus, brythonic, Nearest to over: when, agra, filing, akita, however, ssbn, har, anthem, Nearest to if: but, when, although, abitibi, magazine, cebus, points, while, Nearest to all: some, many, dasyprocta, akita, these, amphibians, trinomial, both, Nearest to by: was, be, with, when, aleuts, had, eight, in, Nearest to first: next, injuries, sideways, cebus, last, brooklyn, inhospitable, same, Nearest to would: will, can, may, could, must, might, should, to, Nearest to will: would, can, could, may, should, agouti, must, to, Nearest to seven: six, eight, five, four, three, nine, zero, two, Nearest to years: times, bituminous, months, days, aberdeen, chef, akita, dinar, Nearest to these: many, some, such, all, several, both, their, clo, Nearest to the: their, its, recitative, this, thibetanus, his, callithrix, dasyprocta, Average loss at step 82000 : 4.80839304113 Average loss at step 84000 : 4.78293949842 Average loss at step 86000 : 4.76968895173 Average loss at step 88000 : 4.68960524154 Average loss at step 90000 : 4.76713479412 Nearest to had: has, have, was, were, ssbn, thibetanus, akita, hogs, Nearest to between: with, in, within, against, asu, mathew, nine, tyndall, Nearest to one: two, four, six, seven, five, eight, three, dasyprocta, Nearest to use: beaten, dasyprocta, case, iit, aba, tamarin, altenberg, akita, Nearest to state: abitibi, callithrix, law, plosives, hus, provincias, transports, upanija, Nearest to over: when, agra, har, akita, however, filing, through, ssbn, Nearest to if: when, but, although, abitibi, where, magazine, cebus, since, Nearest to all: some, many, akita, these, dasyprocta, both, amphibians, several, Nearest to by: was, when, with, as, be, barbed, eight, during, Nearest to first: next, last, injuries, cebus, second, sideways, inhospitable, abitibi, Nearest to would: will, may, can, could, must, might, should, did, Nearest to will: would, can, could, may, should, must, might, agouti, Nearest to seven: eight, six, five, four, nine, three, zero, two, Nearest to years: times, months, days, bituminous, aberdeen, chef, textrm, akita, Nearest to these: many, some, several, such, all, both, their, which, Nearest to the: its, their, callithrix, thibetanus, dasyprocta, agouti, a, akita, Average loss at step 92000 : 4.69930859935 Average loss at step 94000 : 4.62781461734 Average loss at step 96000 : 4.73461313701 Average loss at step 98000 : 4.61038712591 Average loss at step 100000 : 4.67345285773 Nearest to had: has, have, was, were, ssbn, thibetanus, is, akita, Nearest to between: with, in, within, against, asu, from, mathew, tyndall, Nearest to one: two, four, six, seven, three, five, eight, akita, Nearest to use: beaten, dasyprocta, case, akita, iit, altenberg, tamarin, aba, Nearest to state: law, abitibi, callithrix, hus, upanija, plosives, brythonic, transports, Nearest to over: when, agra, through, har, akita, four, filing, ssbn, Nearest to if: when, but, although, while, abitibi, where, since, magazine, Nearest to all: some, many, both, these, several, akita, dasyprocta, amphibians, Nearest to by: be, was, with, when, aleuts, barbed, during, thibetanus, Nearest to first: next, last, second, injuries, cebus, sideways, same, inhospitable, Nearest to would: will, may, can, could, must, might, should, did, Nearest to will: would, can, could, may, should, must, might, to, Nearest to seven: six, eight, five, four, nine, three, zero, dinar, Nearest to years: times, months, days, aberdeen, bituminous, chef, akita, dinar, Nearest to these: many, some, several, all, such, both, their, which, Nearest to the: their, its, his, recitative, agouti, a, dasyprocta, callithrix,
# Step 6: Visualize the embeddings.
def plot_with_labels(low_dim_embs, labels, filename='tsne.png'):
assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
plt.figure(figsize=(18, 18)) #in inches
for i, label in enumerate(labels):
x, y = low_dim_embs[i,:]
plt.scatter(x, y)
plt.annotate(label,
xy=(x, y),
xytext=(5, 2),
textcoords='offset points',
ha='right',
va='bottom')
# plt.savefig(filename)
plt.show()
%matplotlib inline
try:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
plot_only = 500
low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only,:])
labels = [reverse_dictionary[i] for i in xrange(plot_only)]
plot_with_labels(low_dim_embs, labels)
except ImportError:
print("Please install sklearn and matplotlib to visualize embeddings.")