In [5]:

!bash download.sh

Reading package lists... Done
Building dependency tree       
Reading state information... Done
wget is already the newest version (1.19.4-1ubuntu2.1).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.
Files already extracted

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy.misc
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

from collections import Counter

%matplotlib inline

/root/miniconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
/root/miniconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
/root/miniconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)

In [2]:

labels = pd.read_csv('labels.tsv', sep='\t')
label_dict = dict(zip(labels.Label, labels.Description))
labels

Out[2]:

	Label	Description
0	0	T-shirt/top
1	1	Trouser
2	2	Pullover
3	3	Dress
4	4	Coat
5	5	Sandal
6	6	Shirt
7	7	Sneaker
8	8	Bag
9	9	Ankle boot

Following code to extract the data was adapted from here (the official repository for Fashion Mnist).

In [3]:

with open('./data/train-images-idx3-ubyte', 'rb') as imgpath:
    images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape((-1,28,28))
    
with open('./data/train-labels-idx1-ubyte', 'rb') as imgpath:
    im_labels = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=8)
    
Counter(im_labels).most_common()

Out[3]:

[(9, 6000),
 (0, 6000),
 (3, 6000),
 (2, 6000),
 (7, 6000),
 (5, 6000),
 (1, 6000),
 (6, 6000),
 (4, 6000),
 (8, 6000)]

In [4]:

!mkdir -p ./logs/
!mkdir -p ./logs/1
PATH = os.getcwd()
LOG_DIR = PATH+'/logs/1/'

In [5]:

with open(os.path.join(LOG_DIR, 'metadata.tsv'), 'w') as f:
    f.write('Class\tName\n')
    for num, name in zip(im_labels, [label_dict[l] for l in im_labels]):
        f.write('{}\t{}\n'.format(num,name))

In [6]:

def images_to_sprite(data):
    """Creates the sprite image along with any necessary padding
    Args:
      data: NxHxW[x3] tensor containing the images.
    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data

sprite = images_to_sprite(images)
scipy.misc.imsave(os.path.join(LOG_DIR, 'sprite.png'), sprite)

/root/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py:29: DeprecationWarning: `imsave` is deprecated!
`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.

The sprite image is a square image that you write all your original images into. The remaining images are put in as blank (black).

In [7]:

print(sprite.shape)
print(np.sqrt(60000)*28)

(6860, 6860, 3)
6858.571279792899

In [8]:

features = tf.Variable(images.reshape((len(images), -1)), name='features')

with tf.Session() as sess:
    saver = tf.train.Saver([features])

    sess.run(features.initializer)
    saver.save(sess, os.path.join(LOG_DIR, 'images_4_classes.ckpt'))
    
    config = projector.ProjectorConfig()
    # One can add multiple embeddings.
    embedding = config.embeddings.add()
    embedding.tensor_name = features.name
    # Link this tensor to its metadata file (e.g. labels).
    embedding.metadata_path = os.path.join(LOG_DIR, 'metadata.tsv')
    # Comment out if you don't want sprites
    embedding.sprite.image_path = os.path.join(LOG_DIR, 'sprite.png')
    embedding.sprite.single_image_dim.extend([images.shape[1], images.shape[1]])
    # Saves a config file that TensorBoard will read during startup.
    projector.visualize_embeddings(tf.summary.FileWriter(LOG_DIR), config)

In [ ]:

!tensorboard --logdir=./logs/1/

/root/miniconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
/root/miniconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
/root/miniconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
TensorBoard 1.10.0 at http://a666cd6efd0d:6006 (Press CTRL+C to quit)

click here to open tensorboard.

In [ ]: