class PDF(FPDF, HTMLMixin):
pass
pdf = PDF()
pdf.set_image_filter("DCTDecode")
#pdf.add_font('serif', fname='/Users/tim/Library/Fonts/Norasi.ttf', uni=True)
pdf.add_font('serif', fname='/System/Library/Fonts/Supplemental/Georgia.ttf', uni=True)
pdf.set_font('serif', '', 12)
pdf.compress = True
pdf.set_left_margin(15)
pdf.set_right_margin(15)
pdf.set_top_margin(15)
pdf.add_page()
pdf.write_html("""
<h1>A miscellany of ephemera, oddities, and estrays</h1>
<p> </p>
<p>This collection comprises digitised items from the Trove book zone with nary but a single page.
You will find an odd mix of posters, pamphlets, advertisements, ephemera, and other assorted documents.</p>
<p>It was compiled by <a href="https://timsherratt.org">Tim Sherratt</a> to help researchers and promote use of Australia's digital cultural collections.
The methods used to harvest the metadata and images are described in the <a href="https://glam-workbench.github.io/trove-books/">Trove Books</a>
section of the <a href="https://glam-workbench.github.io/">GLAM Workbench</a>.</p>
""")
for row in df.loc[df['pages'] == 1].drop_duplicates(subset='trove_id').sort_values(by=['date', 'trove_id']).itertuples():
img_path = Path('images', f'{row.trove_id}-1.jpg')
tmp_path = Path('temp', f'{row.trove_id}-1.jpg')
if img_path.exists():
if row.contributors and row.date:
byline = f'<p>{row.contributors.replace("|", ",")} · {row.date}</p>'
elif row.contributors or row.date:
byline = f'<p>{row.contributors.replace("|", ",")}{row.date}</p>'
else:
byline = ''
tmp_path = Path('temp', f'{row.trove_id}-1.jpg')
try:
img = Image.open(img_path)
except:
pass
else:
w, h = img.size
img.thumbnail((800, 800), resample=Image.LANCZOS)
if w > h:
img = img.transpose(Image.ROTATE_90)
img.save(tmp_path, quality=80)
pdf.add_page()
pdf.image(tmp_path, x=15, y=15, h=180)
pdf.ln(190)
pdf.write_html(f'<p>{row.title}</p>{byline}<p><a href="{row.fulltext_url}">{row.fulltext_url}</a></p>')
pdf.output("ephemera.pdf")