%load_ext autoreload
%autoreload 2
import word2vec
word2vec.word2vec('/Users/danielfrg/Downloads/text8', '/Users/danielfrg/Downloads/text8.bin', size=100, verbose=True)
Starting training using file /Users/danielfrg/Downloads/text8 Vocab size: 71291 Words in train file: 16718843 Alpha: 0.000002 Progress: 100.03% Words/thread/sec: 276.90k
word2vec.word2vec('/Users/danielfrg/Downloads/text8', '/Users/danielfrg/Downloads/text8.txt', size=100, binary=0, verbose=True)
Starting training using file /Users/danielfrg/Downloads/text8 Vocab size: 71291 Words in train file: 16718843 Alpha: 0.000002 Progress: 100.03% Words/thread/sec: 277.29k
import word2vec
binary = word2vec.load('/Users/danielfrg/Downloads/text8.bin', kind='bin')
text = word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')
binary.vocab
array([u'</s>', u'the', u'of', ..., u'bredon', u'skirting', u'santamaria'], dtype='<U78')
text.vocab
array([u'</s>', u'the', u'of', ..., u'bredon', u'skirting', u'santamaria'], dtype='<U78')
binary.cosine('dog')
(array([ 2600, 11158, 13739, 4850, 9586, 8185, 2971, 9102, 4140, 11877]), array([ 0.83075334, 0.77584524, 0.77085914, 0.76748576, 0.76346954, 0.74456394, 0.73442467, 0.73413262, 0.73061051, 0.71777623]))
text.cosine('dog')
(array([ 2600, 11158, 9586, 13739, 4850, 8185, 9102, 4140, 2971, 8664]), array([ 0.81635393, 0.7741477 , 0.7587773 , 0.75698403, 0.75485644, 0.73609652, 0.73227508, 0.72156542, 0.71674759, 0.71594169]))
%%timeit -n1 -r1
word2vec.load('/Users/danielfrg/Downloads/text8.bin', kind='bin')
1 loops, best of 1: 2.99 s per loop
%%timeit -n1 -r1
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')
1 loops, best of 1: 12.3 s per loop
%%timeit -n1 -r1
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt', secure=True)
1 loops, best of 1: 4.4 s per loop
%%timeit -n3 -r3
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')
3 loops, best of 3: 4.23 s per loop
%%timeit -n3 -r3
word2vec.load('/Users/danielfrg/Downloads/text8.txt', kind='txt')
3 loops, best of 3: 4.29 s per loop