#!/usr/bin/env python
# coding: utf-8

# # Classifying all the Tribune images
# 
# Using the simple model we created in [this notebook](Training-a-classification-model-for-the-Tribune.ipynb), let's attempt to classify all the images in the Tribune collection.

# In[65]:


from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pandas as pd
import os
from urllib.parse import urlparse
import requests
from IPython.display import display, HTML
import copy
from tqdm import tqdm_notebook


import sys
import time

import numpy as np
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


# In[66]:


# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

def load_graph(model_file):
  graph = tf.Graph()
  graph_def = tf.GraphDef()

  with open(model_file, "rb") as f:
    graph_def.ParseFromString(f.read())
  with graph.as_default():
    tf.import_graph_def(graph_def)

  return graph

def read_tensor_from_image_file(file_name, input_height=299, input_width=299,
				input_mean=0, input_std=255):
  input_name = "file_reader"
  output_name = "normalized"
  file_reader = tf.read_file(file_name, input_name)
  if file_name.endswith(".png"):
    image_reader = tf.image.decode_png(file_reader, channels = 3,
                                       name='png_reader')
  elif file_name.endswith(".gif"):
    image_reader = tf.squeeze(tf.image.decode_gif(file_reader,
                                                  name='gif_reader'))
  elif file_name.endswith(".bmp"):
    image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
  else:
    image_reader = tf.image.decode_jpeg(file_reader, channels = 3,
                                        name='jpeg_reader')
  float_caster = tf.cast(image_reader, tf.float32)
  dims_expander = tf.expand_dims(float_caster, 0);
  resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
  normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
  sess = tf.Session()
  result = sess.run(normalized)

  return result

def load_labels(label_file):
  label = []
  proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
  for l in proto_as_ascii_lines:
    label.append(l.rstrip())
  return label


# In[67]:


def label_tribune_image(file_name):
    model_file = "tensorflow-for-poets-2/tf_files/tribune_graph.pb"
    label_file = "tensorflow-for-poets-2/tf_files/tribune_labels.txt"
    input_height = 224
    input_width = 224
    input_mean = 128
    input_std = 128
    input_layer = "input"
    output_layer = "final_result"
    graph = load_graph(model_file)
    t = read_tensor_from_image_file(file_name,
                                  input_height=input_height,
                                  input_width=input_width,
                                  input_mean=input_mean,
                                  input_std=input_std)

    input_name = "import/" + input_layer
    output_name = "import/" + output_layer
    input_operation = graph.get_operation_by_name(input_name);
    output_operation = graph.get_operation_by_name(output_name);

    with tf.Session(graph=graph) as sess:
        start = time.time()
        results = sess.run(output_operation.outputs[0],
                      {input_operation.outputs[0]: t})
        end=time.time()
    results = np.squeeze(results)

    top_k = results.argsort()[-5:][::-1]
    labels = load_labels(label_file)
    scores = {}
    for i in top_k:
        scores[labels[i]] = results[i]
    return scores


def detect_all():
    '''
    I've already got copies of all the images, so I'll just point the script at them.
    '''
    face_data = []
    image_dir = '/Volumes/bigdata/mydata/SLNSW/Tribune/images/500'
    images = [i for i in os.listdir(image_dir) if i[-4:] == '.jpg']
    for image in tqdm_notebook(images):
        img_file = os.path.join(image_dir, image)
        scores = label_tribune_image(img_file)
        scores['image'] = image.replace('-500.jpg', '')
        results.append(scores)


# In[ ]:


results = []
detect_all()


# In[70]:


df = pd.DataFrame(results)


# In[71]:


df.head()


# In[73]:


df.round(5)


# In[74]:


df.to_csv('classified.csv', index=False)


# In[78]:


df = pd.read_csv('classified.csv')


# In[79]:


df.describe()


# In[97]:


portraits = df.loc[df['portraits'] > 0.95]


# In[104]:


samples = portraits.sample(6)
html = ''
for image in samples['image'].tolist():
    html += '<a title="{0}" target="_blank" href="https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/{0}.jpg"><image style="width: 300px; height: 300px; float: left; margin: 10px; object-fit: contain;" src="https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/500/{0}-500.jpg"></a>'.format(image)
display(HTML(html))


# In[103]:


protests = df.loc[df['protests'] > 0.95]


# In[101]:


samples = protests.sample(6)
html = ''
for image in samples['image'].tolist():
    html += '<a title="{0}" target="_blank" href="https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/{0}.jpg"><image style="width: 300px; height: 300px; float: left; margin: 10px; object-fit: contain;" src="https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/500/{0}-500.jpg"></a>'.format(image)
display(HTML(html))


# In[ ]: