#!/usr/bin/env python # coding: utf-8 # # Defining alignment targets # Example of how to define alignment targets. # ## Set up for analysis # Import necessary Python modules: # In[1]: import tempfile import Bio.SeqIO from alignparse.targets import Target, Targets # ## A single target # First we show how to define a single `Target`, using an example an amplicon for PacBio sequencing of RecA for a deep mutational scanning experiment. # The amplicon is defined in [Genbank Flat File format](https://www.ncbi.nlm.nih.gov/genbank/samplerecord/). # First, let's just look at that file: # In[2]: recA_targetfile = "../notebooks/input_files/recA_amplicon.gb" with open(recA_targetfile) as f: print(f.read()) # Read the Genbank file for the target into a BioPython SeqRecord: # In[3]: recA_seqrecord = Bio.SeqIO.read(recA_targetfile, format="genbank") # Create a `Target` object: # In[4]: target = Target( seqrecord=recA_seqrecord, req_features=[ "termini5", "gene", "spacer", "barcode", "termini3", "variant_tag5", "variant_tag3", ], ) # We can get specific features out of the `Target` object. # Below we look for two features and print the one that exists: # In[5]: for feature in ["non-existent", "termini5"]: if target.has_feature(feature): print(f"Here is feature {feature}:\n{target.get_feature(feature)}") else: print(f"target lacks feature {feature}\n") # We can get a [dna_features_viewer](https://edinburgh-genome-foundry.github.io/DnaFeaturesViewer/) `GraphicRecord` with `Target.image`, and then plot this using its `.plot` method, which returns a `matplotlib.Axes` instance. # (Note that `Target.image` also provides options for setting colors, labels): # In[6]: image = target.image() ax, _ = image.plot() _ = ax.set_title(target.name) # ## Multiple targets # We can read multiple targets into a `Targets` object. Below is an example with the two LASV GP constructs - wildtype and codon optimized - from the Josiah strain. # # First, let's look at these files: # In[7]: target_file_names = ["LASV_Josiah_WT", "LASV_Josiah_OPT"] targetfiles = [ f"../notebooks/input_files/{target_file_name}.gb" for target_file_name in target_file_names ] for targetfile in targetfiles: with open(targetfile) as f: print(f.read()) # Read the sequences into a `Targets`: # In[8]: lasv_parse_specs_file = "../notebooks/input_files/lasv_feature_parse_specs.yaml" lasv_targets = Targets( seqsfile=targetfiles, feature_parse_specs=lasv_parse_specs_file, allow_extra_features=True, allow_clipped_muts_seqs=True, ) # Iterate through lasv_targets to identify features present: # In[9]: for lasv_target in lasv_targets.targets: print(f"\ntarget = {lasv_target.name}") for feature in ["non-existent", "termini5"]: if lasv_target.has_feature(feature): print(f"Here is feature {feature}:\n{target.get_feature(feature)}") else: print(f"target lacks feature {feature}\n") # We can plot the `Targets`: # In[10]: _ = lasv_targets.plot(ax_width=10) # We can write them to a file for alignment: # In[11]: with tempfile.NamedTemporaryFile(mode="w") as f: lasv_targets.write_fasta(f.name) f.flush() fasta_text = open(f.name).read() print(fasta_text) # In[ ]: