#!/usr/bin/env python # coding: utf-8 # # Make final figures # Makes final versions of figures. # ## Import Python packages # In[1]: import os import re import shutil import string import xml.etree.ElementTree as ElementTree import cairosvg from IPython.display import display, Image, SVG import matplotlib.pyplot as plt plt.ion() import matplotlib.image import pandas as pd from plotnine import * import svgutils import yaml from dms_tools2.ipython_utils import showPDF from dms_tools2.plot import COLOR_BLIND_PALETTE_GRAY as PALETTE # ## Define some functions # In[2]: def svg_dim(svgfile, dim): """Get width or height `dim` of `svgfile` in points.""" return float(ElementTree.parse(svgfile) .getroot().attrib[dim] .replace('px', '') .replace('pt', '')) # In[3]: def svg_to_pdf(svgfile): """`svgfile` to PDF, return converted file name.""" with open(svgfile) as f: svg = f.read() # need to eliminate units that `svgutils` incorrectly puts in viewBox viewbox_match = re.compile('viewBox="' + ' '.join(['\d+\.{0,1}\d*(px){0,1}'] * 4) + '"') if len(viewbox_match.findall(svg)) != 1: raise ValueError(f"did not find exactly one viewBox in {svgfile}") viewbox = viewbox_match.search(svg).group(0) svg = svg.replace(viewbox, viewbox.replace('px', '')) outfile = os.path.splitext(svgfile)[0] + '.pdf' cairosvg.svg2pdf(bytestring=svg, write_to=outfile) return outfile # In[4]: def png_aspect(pngfile): """Aspect ratio (width to height) of `pngfile`.""" shape = matplotlib.image.imread(pngfile).shape return shape[1] / shape[0] # ## Read configuration # In[5]: with open('config.yaml') as f: config = yaml.safe_load(f) with open(config['figure_config']) as f: figure_config = yaml.safe_load(f) os.makedirs(config['finalfigsdir'], exist_ok=True) print(f"Final figures go in {config['finalfigsdir']}") # ## Define antibody samples # Some of the figure making processes differ for the antibody and non-antibody samples. # Here are the antibody ones: # In[6]: antibodies = ['antibody_region_B', 'antibody_lower_head'] # ## Make combined logo / neut / struct plots # # First for **non**-antibody samples: # In[7]: vmargin = 35 # vertical space between panels struct_relheight = figure_config['struct_panel_height'] for fig, figconfig in figure_config['figures'].items(): if fig in antibodies: continue logo_and_neut_svg = os.path.join(config['figsdir'], f"{fig}_logo_and_neut.svg") width = svg_dim(logo_and_neut_svg, 'width') height = svg_dim(logo_and_neut_svg, 'height') # a relative height for logo_and_neut logo_and_neut_relheight = 2.5 * len(figconfig['sera']) + 0.4 + 1.3 fig_elements = [svgutils.compose.SVG(logo_and_neut_svg)] if 'sera_for_struct' in figconfig and len(figconfig['sera_for_struct']) > 0: struct_png = os.path.join(config['figsdir'], f"{fig}_struct.png") png_height = height * struct_relheight / logo_and_neut_relheight height += png_height + vmargin fig_elements.append(svgutils.compose.Image( png_aspect(struct_png) * png_height, png_height, struct_png).move(10, height - png_height)) fig_elements.append(svgutils.compose.Text(figconfig['struct_panel_label'], 15, 5, weight='bold', size='26', font='Arial').move(10, height - png_height)) f = svgutils.compose.Figure(width, height, *fig_elements) svg = os.path.join(config['finalfigsdir'], f"{fig}.svg") f.save(svg) pdf = svg_to_pdf(svg) print(f"\nWriting figure for {fig} to {pdf}") showPDF(pdf) # Now for antibody samples, combined into one figure: # In[8]: fig_elements = [] height = 0 width = max(svg_dim(os.path.join(config['figsdir'], f"{fig}_logo_and_neut.svg"), 'width') for fig in antibodies) for fig in antibodies: figconfig = figure_config['figures'][fig] logo_and_neut_svg = os.path.join(config['figsdir'], f"{fig}_logo_and_neut.svg") svg_height = svg_dim(logo_and_neut_svg, 'height') logo_and_neut_relheight = 2.5 * len(figconfig['sera']) + 0.4 + 1.3 height += svg_height fig_elements.append(svgutils.compose.SVG(logo_and_neut_svg).move(0, height - svg_height)) height += vmargin png_height = svg_height * struct_relheight / logo_and_neut_relheight height += png_height for i, fig in enumerate(antibodies): figconfig = figure_config['figures'][fig] struct_png = os.path.join(config['figsdir'], f"{fig}_struct.png") fig_elements.append(svgutils.compose.Image( png_aspect(struct_png) * png_height, png_height, struct_png).move(10 + i * 0.35 * width, height - png_height)) fig_elements.append(svgutils.compose.Text(figconfig['struct_panel_label'], 15, 5, weight='bold', size='26', font='Arial').move(10 + i * 0.35 * width, height - png_height)) f = svgutils.compose.Figure(width, height, *fig_elements) svg = os.path.join(config['finalfigsdir'], 'antibodies.svg') f.save(svg) pdf = svg_to_pdf(svg) print(f"\nWriting figure for antibodies to {pdf}") showPDF(pdf) # ## Plots of replicate-replicate correlations # First non-antibody samples: # In[9]: vmargin = 35 # vertical space between panels for fig, figconfig in figure_config['figures'].items(): if fig in antibodies: continue corr_svg = os.path.join(config['figsdir'], f"{fig}_rep_corr.svg") if fig == 'antibody_spikein': reps_svg = os.path.join(config['avgdiffsel_reps_dir'], 'serum_mAb_spike_reps.svg') else: reps_svg = os.path.join(config['avgdiffsel_reps_dir'], f"{fig}_reps.svg") fig_elements = [] height = width = 0 if fig == '2009_age_53_samples': # reps plot for this sample in that for VIDD sera letters = [''] fig_svgs = [corr_svg] scales = [1] else: letters = string.ascii_uppercase fig_svgs = [reps_svg, corr_svg] scales = [1, 2] for letter, fig_svg, scale in zip(letters, fig_svgs, scales): svg_height = svg_dim(fig_svg, 'height') * scale height += svg_height width = max(svg_dim(fig_svg, 'width') * scale, width) fig_elements.append(svgutils.compose.SVG(fig_svg).scale(scale).move(0, height - svg_height)) fig_elements.append(svgutils.compose.Text( letter, 10, 25, weight='bold', size='26', font='Arial' ).move(0, height - svg_height)) height += vmargin f = svgutils.compose.Figure(width, height, *fig_elements) merged_svg = os.path.join(config['finalfigsdir'], f"{fig}_corr.svg") f.save(merged_svg) merged_pdf = svg_to_pdf(merged_svg) print(f"\nReplicates for {fig} in {merged_svg} and {merged_pdf}:") showPDF(merged_pdf) # Now antibody samples: # In[10]: fig_elements = [] height = width = 0 for i, antibody in enumerate(antibodies): corr_svg = os.path.join(config['figsdir'], f"{antibody}_rep_corr.svg") reps_svg = os.path.join(config['avgdiffsel_reps_dir'], f"{antibody}_reps.svg") for letter, fig_svg, scale in zip(string.ascii_uppercase[2 * i: ], [reps_svg, corr_svg], [1, 2]): svg_height = svg_dim(fig_svg, 'height') * scale height += svg_height width = max(svg_dim(fig_svg, 'width') * scale, width) fig_elements.append(svgutils.compose.SVG(fig_svg).scale(scale).move(0, height - svg_height)) fig_elements.append(svgutils.compose.Text( letter, 10, 20, weight='bold', size='26', font='Arial' ).move(0, height - svg_height)) height += vmargin f = svgutils.compose.Figure(width, height, *fig_elements) merged_svg = os.path.join(config['finalfigsdir'], f"antibodies_corr.svg") f.save(merged_svg) merged_pdf = svg_to_pdf(merged_svg) print(f"\nReplicates for antibodies in {merged_svg} and {merged_pdf}:") showPDF(merged_pdf) # ## Plots of percent infectivity remaining # First for non-antibody samples: # In[11]: for fig, figconfig in figure_config['figures'].items(): if fig in antibodies or fig == 'antibody_spikein': continue existing_pdf = os.path.join(config['figsdir'], f"{fig}_percent_infectivity.pdf") new_pdf = os.path.join(config['finalfigsdir'], os.path.basename(existing_pdf)) shutil.copy(existing_pdf, new_pdf) print(f"\nPercent infectivity for {fig} in {new_pdf}:") showPDF(new_pdf) # Now antibody samples: # In[12]: vmargin = 35 # vertical space between panels fig_elements = [] height = 0 width = 0 for letter, fig in zip(string.ascii_uppercase, antibodies): fig_svg = os.path.join(config['figsdir'], f"{fig}_percent_infectivity.svg") svg_height = svg_dim(fig_svg, 'height') height += svg_height width = max(svg_dim(fig_svg, 'width'), width) fig_elements.append(svgutils.compose.SVG(fig_svg).move(0, height - svg_height)) fig_elements.append(svgutils.compose.Text( letter, 15, 0, weight='bold', size='20', font='Arial' ).move(-13, height - svg_height + 0.43 * vmargin)) height += vmargin f = svgutils.compose.Figure(width, height, *fig_elements) svg = os.path.join(config['finalfigsdir'], 'antibodies_percent_infectivity.svg') f.save(svg) pdf = svg_to_pdf(svg) print(f"\nWriting percent infectivity for antibodies to {pdf}") showPDF(pdf) # ## Plots of percent infectivity remaining for spikein # Get data frame with samples of interest: # In[13]: fig = 'antibody_spikein' sera_names = figure_config['figures'][fig]['sera_names'] with open(config['serum_info']) as f: serum_info = yaml.safe_load(f) samples = (pd.read_csv(config['sample_list']) [['serum', 'percent_infectivity', 'library', 'sample']] .assign(serum=lambda x: x['serum'].map({serumlabel: d['name'] for serumlabel, d in serum_info.items()}) ) .query('serum in @sera_names') .assign(serum=lambda x: x['serum'].map(sera_names).str.replace('\+ \w+', '+')) .assign(antibody_conc=lambda x: x.sort_values('percent_infectivity', ascending=False) .groupby(['serum', 'library']) .cumcount() .astype('category') .cat .rename_categories({0: 'low', 1: 'mid', 2: 'high'}), label=lambda x: x['percent_infectivity'] .apply(lambda y: '{0:#.2g}'.format(y)) .apply(lambda y: y[: -1] if y[-1] == '.' else y), ) .sort_values(['serum', 'library', 'antibody_conc']) ) # serum is the same for all samples = (pd.concat( [samples.query('serum != "serum alone"'), samples.query('serum == "serum alone"') .drop(columns='antibody_conc') .merge(samples[['library', 'antibody_conc']].drop_duplicates()) ], sort=False).reset_index(drop=True) .assign(label_x=lambda x: x['antibody_conc'].cat.codes + 1 + 0.6 * (x['serum'] == 'antibody alone').astype('int'), label_y=lambda x: x['percent_infectivity'] + 1.2 * (x['serum'] == 'serum alone')) ) samples # Make and save plot: # In[14]: p = ( ggplot(samples, aes('antibody_conc', 'percent_infectivity', color='serum', label='label')) + geom_point(size=3, alpha=0.7) + geom_text(aes(x='label_x', y='label_y', label='label'), size=9, nudge_x=-0.1, ha='right') + facet_wrap('library') + scale_color_manual(values=PALETTE[1: ]) + theme(legend_title=element_blank(), figure_size=(9, 3)) + ylab('percent infectivity') + scale_x_discrete(name='concentration of spiked-in antibody', expand=(0.05, 0.6)) ) _ = p.draw() plotfile = os.path.join(config['finalfigsdir'], f"{fig}_percent_infectivity.pdf") print(f"Saving figure to {plotfile}") p.save(plotfile) plt.close() # In[ ]: