Basic image recognition with Tribune photos

Based on the Tensorflow image recognition tutorial. This uses Inception-v3 to classify images according to these 1000 classes.

In [ ]:
import os
from urllib.parse import urlparse
import pandas as pd
import requests
from IPython.display import display, HTML
In [ ]:
# Load Tribune images data
df = pd.read_csv('https://raw.githubusercontent.com/GLAM-Workbench/ozglam-data-records-of-resistance/master/data/images.csv')
In [ ]:
# Download and unpack the pretrained model
%run classify_image.py
In [ ]:
def select_images(sample):
    images = []
    rows = df.sample(sample)
    for img_id in list(rows['images']):
        img_url = 'https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/500/{0}-500.jpg'.format(img_id)
        images.append((img_id, img_url))
    return images


def download_image(img_url):
    current_dir = os.getcwd()
    parsed = urlparse(img_url)
    filename = os.path.join(current_dir, os.path.basename(parsed.path))
    response = requests.get(img_url, stream=True)
    with open(filename, 'wb') as fd:
        for chunk in response.iter_content(chunk_size=128):
            fd.write(chunk)
    return filename


def recognise_images(sample=10):
    images = select_images(sample)
    for img_id, img_url in images:
        filename = download_image(img_url)
        display(HTML('<image src="{}"><br><a target="_blank" href="http://digital.sl.nsw.gov.au/delivery/DeliveryManagerServlet?dps_pid={}&embedded=true&toolbar=false">More details at SLNSW</a>'.format(img_url, img_id)))
        %run classify_image.py --image_file $filename
        #%run label_image.py --image $filename
In [ ]:
recognise_images(10)
In [ ]: