Why? RecordSearch lets you download a PDF of a digitised file, but sometimes it's more convenient to work with individual images.
How? Just enter the barcode of the file in the box below and click the button. When all the images have been downloaded they'll be zipped up, and a convenient download link will be displayed.
More? Click the 'Edit App' button at the top of the page to see how this works.
import os
import shutil
import time
import ipywidgets as widgets
import requests
from IPython.display import HTML, display
from recordsearch_data_scraper.scrapers import RSItem
from slugify import slugify
from tqdm.auto import tqdm
def get_images(b):
with out:
if barcode.value:
item = RSItem(barcode.value).data
if item["digitised_pages"] > 0:
series = slugify(item["series"])
control = slugify(item["control_symbol"])
dir_name = f'{series}-{control}-[{item["identifier"]}]'
dir_path = os.path.join("data", "images", dir_name)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
for page in tqdm(range(1, item["digitised_pages"] + 1)):
filename = "{}/{}-p{}.jpg".format(
dir_path, item["identifier"], page
)
if not os.path.exists(filename):
img_url = "https://recordsearch.naa.gov.au/NaaMedia/ShowImage.asp?B={}&S={}&T=P".format(
item["identifier"], page
)
response = requests.get(img_url, stream=True, verify=False)
response.raise_for_status()
with open(filename, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
time.sleep(0.5)
shutil.make_archive(dir_path, "zip", dir_path)
link = f"{dir_path}.zip"
display(
HTML(
f'Download zipped images: <a href="{link}" download="{dir_name}.zip">{link}</a>'
)
)
else:
print("Sorry, that item has not been digitised...")
else:
print("You need to provide a barcode!")
barcode = widgets.Text(
placeholder="Enter item barcode", description="Barcode:", disabled=False
)
display(barcode)
button = widgets.Button(
description="Get images",
disabled=False,
button_style="primary", # 'success', 'info', 'warning', 'danger' or ''
tooltip="Click to harvest images",
icon="",
)
button.on_click(get_images)
display(button)
out = widgets.Output()
display(out)
%%capture
# Load environment variables if available
%load_ext dotenv
%dotenv
# TESTING
if os.getenv("GW_STATUS") == "dev":
barcode.value = "149309"
button.click()
Created by Tim Sherratt as part of the GLAM Workbench.