%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Tue Dec 11 06:29:41 PST 2018 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.5 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: swoose ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 44 Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz Stepping: 2 CPU MHz: 2925.866 BogoMIPS: 5851.93 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 12288K NUMA node0 CPU(s): 0-23 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat ------------ Memory Specs total used free shared buff/cache available Mem: 70G 6.1G 2.9G 824M 61G 63G Swap: 4.7G 47M 4.6G
No LSB modules are available.
%%bash
mkdir /home/sam/data/gigas
mkdir /home/sam/data/gigas/genomes
mkdir /home/sam/data/gigas/genes
mkdir /home/sam/analyses/20181211_gigas_cox1_primeres
%%bash
cd /home/sam/data/gigas/genomes
# C.gigas mitochondrial genome
curl --silent "https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?tool=portal&save=file&log$=seqview&db=nuccore&report=fasta&id=7212445&extrafeat=null&conwithfeat=on" > NC_001276.1.fa
# C.gigas genome
curl --silent "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/297/895/GCF_000297895.1_oyster_v9/GCF_000297895.1_oyster_v9_genomic.fna.gz" > GCF_000297895.1_oyster_v9_genomic.fna.gz
gunzip --quiet GCF_000297895.1_oyster_v9_genomic.fna.gz
cd /home/sam/data/gigas/genes
# C.gigas COX1
curl --silent "https://www.ncbi.nlm.nih.gov/projects/sviewer/sequence.cgi?id=gi|7212457&format=fasta&filename=NP_037555.1.fa&ranges=0-504" > NP_037555.1.fa
ls /home/sam/data/gigas/genomes
ls /home/sam/data/gigas/genes
GCF_000297895.1_oyster_v9_genomic.fna NC_001276.1.fa NP_037555.1.fa
%%bash
mkdir /home/sam/data/gigas/genomes/GCF_000297895_fasta_splits
cd /home/sam/data/gigas/genomes/GCF_000297895_fasta_splits
# Count sequences in FastA
echo "-------------------"
echo "NUMBER OF SEQUENCES IN ORIGINAL FASTA"
grep -c ">" ../GCF_000297895.1_oyster_v9_genomic.fna
echo "-------------------"
echo ""
echo ""
# Split FastA
time \
/home/sam/software/bin/pyfaidx-0.5.5.2 \
--split-files \
../GCF_000297895.1_oyster_v9_genomic.fna
# Count number of individual FastA files
echo "-------------------"
echo "NUMBER OF INDIVIDUAL FASTA FILES"
ls -1 | wc -l
echo "-------------------"
------------------- NUMBER OF SEQUENCES IN ORIGINAL FASTA 7659 ------------------- ------------------- NUMBER OF INDIVIDUAL FASTA FILES 7659 -------------------
real 0m39.604s user 0m38.412s sys 0m1.140s
%%bash
mkdir /home/sam/data/gigas/genomes/NC_001276_fasta_splits
cd /home/sam/data/gigas/genomes/NC_001276_fasta_splits
# Count sequences in FastA
echo "-------------------"
echo "NUMBER OF SEQUENCES IN ORIGINAL FASTA"
grep -c ">" ../NC_001276.1.fa
echo "-------------------"
echo ""
echo ""
# Split FastA
time \
/home/sam/software/bin/pyfaidx-0.5.5.2 \
--split-files \
../NC_001276.1.fa
# Count number of individual FastA files
echo "-------------------"
echo "NUMBER OF INDIVIDUAL FASTA FILES"
ls -1 | wc -l
echo "-------------------"
------------------- NUMBER OF SEQUENCES IN ORIGINAL FASTA 1 ------------------- ------------------- NUMBER OF INDIVIDUAL FASTA FILES 1 -------------------
real 0m0.130s user 0m0.112s sys 0m0.016s
%%bash
cd /home/sam/data/gigas/genomes
# Download mt genome coding sequences
curl --silent "https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?tool=portal&save=file&log$=seqview&db=nuccore&report=fasta_cds_na&id=6636083&conwithfeat=on&withparts=on" > AF177226.cds.fa
### Split in to individual FastA files
mkdir /home/sam/data/gigas/genomes/AF177226.cds_splits
cd /home/sam/data/gigas/genomes/AF177226.cds_splits
# Count sequences in FastA
echo "-------------------"
echo "NUMBER OF SEQUENCES IN ORIGINAL FASTA"
grep -c ">" ../AF177226.cds.fa
echo "-------------------"
echo ""
echo ""
# Split FastA
time \
/home/sam/software/bin/pyfaidx-0.5.5.2 \
--split-files \
../AF177226.cds.fa
# Count number of individual FastA files
echo "-------------------"
echo "NUMBER OF INDIVIDUAL FASTA FILES"
ls -1 | wc -l
echo "-------------------"
ls /home/sam/data/gigas/genomes/AF177226.cds_splits
------------------- NUMBER OF SEQUENCES IN ORIGINAL FASTA 12 ------------------- ------------------- NUMBER OF INDIVIDUAL FASTA FILES 12 ------------------- lclAF177226.1_cds_AAF20042.1_1.fa lclAF177226.1_cds_AAF20043.1_2.fa lclAF177226.1_cds_AAF20044.1_3.fa lclAF177226.1_cds_AAF20045.1_4.fa lclAF177226.1_cds_AAF20046.1_5.fa lclAF177226.1_cds_AAF20047.1_6.fa lclAF177226.1_cds_AAF20048.1_7.fa lclAF177226.1_cds_AAF20049.1_8.fa lclAF177226.1_cds_AAF20050.1_9.fa lclAF177226.1_cds_AAF20051.1_10.fa lclAF177226.1_cds_AAF20052.1_11.fa lclAF177226.1_cds_AAF20053.1_12.fa
real 0m0.137s user 0m0.128s sys 0m0.008s
%%bash
cat lclAF177226.1_cds_AAF20053.1_12.fa
cat: lclAF177226.1_cds_AAF20053.1_12.fa: No such file or directory
%%bash
cat /home/sam/data/gigas/genomes/AF177226.cds_splits/lclAF177226.1_cds_AAF20053.1_12.fa
>lcl|AF177226.1_cds_AAF20053.1_12 ATGTCAACAAATCATTTAGACATTGGAAGGTTTTATATAGTATTTGGATTTTGAGCTGTTCTTGCGGGAA CTAGGTTTAGGTCTCTTATTCGTTGGAGACTTTATAACCCTGGAGCTAAGTTTTTAGACCCCGTGACTTA TAATGCAGTTGTAACTAGGCATGCGTTGGTTATGATTTTTTTCTTTGTTATACCTGTAATAATTGGGGGG TTTGGTAACTGGCTTATCCCTTTGATGCTTCTAGTAGCAGACATGCAATTTCCTCGATTAAATGCATTTA GATTTTGAGTTTTGCCAGGGTCTCTTTATCTTATGCTTATGTCTAACATTGTAGAAAACGGAGTTGGGGC AGGGTGAACAATTTACCCTCCTTTATCAACTTACTCTTATCATGGAGTTTGTATAGACCTTGCAATTCTA AGCCTTCACCTTGCTGGTATTAGCTCTATTTTCAGGTCAATTAATTTCATAGTAACGATTAGAAATATGC GATCTGTTGGGGGCCATTTACTAGCACTATTCCCTTGATCTATTAAGGTTACTTCATTCTTGCTTTTGAC TACTCTCCCAGTGTTAGCTGGAGGTCTTACTATACTTTTGACTGATCGTCATTTTAATACCTCTTTTTTT GACCCTGTCGGAGGGGGGGACCCTGTCTTATTTCAGCATTTGTTTTGATTTTTTGGTCACCCTGAGGTGT ATGTCCTTATTCTTCCAGGTTTTGGAATAATTTCTCATGTCTTATGTTTTTGGTCAAGTAAAAAGACTGC ATATGGAAATATGGGAATGTTTTATGCAATACTTAATATTGGGTTCTTAGGGTTTATTGTCTGGGGGCAT CACATGTTTGTGGCTGGAATGGATATTGATACGCGTGCTTATTTTAGTGCTGCCACCGTTATTATTGCAG TGCCAACTGGTATTAAGGTGTTTGCATGAATTAGCACAATGCTAGGCTCTAAAGTTTCAACTCAAGCACC TATGTTGTGGTCTACTGGTTTTATTATTCTTTTTACAACAGGGGGTCTTACAGGACTTATTCTATCAAGA GCTTCAGTAGATGTTACGCTTCACGACACTTATTTTGTAACTGGTCATTTTCACTACGTCTTATCAATGG GTGCGGTGTTTACAATTTTAGCTGGGTTTACTCACTGATTTCCTCTTGTTGCTAAGGTTATAATGCATCG GCAAAAAATGAAAAGTCATTTTTTAGCAATGTTTTTAGGTGTTAATGCAGCATTTTTGCCACATCATTTT TTGGGTTTGGCTGGTATACCACGTCGAGTAGTTGATTATCCAGATCATTTTTGATTTTGAAATAAAGTAT CCACATTTGGCTCTCATTTGAGTACTGGCTCATTGTTATTTTTTGTGTTTTTGTTATGAGAGTCATTTAT TGCTCAACGGCCAGTTATTTCAGTGCGAAACACTTCTAGGTCCCCCGAATGGGCTGTTGTGTCTAGCCTC CCTAAGCATGCAGGGGATGAATTAGCAAAAATGGCTAAGCTTTGTTAG
%%bash
grep "cytochrome oxidase subunit 1" /home/sam/data/gigas/genomes/AF177226.cds.fa
>lcl|AF177226.1_cds_AAF20053.1_12 [protein=cytochrome oxidase subunit 1] [protein_id=AAF20053.1] [location=15598..17115] [gbkey=CDS]
%%bash
cp /home/sam/data/gigas/genomes/AF177226.cds_splits/lclAF177226.1_cds_AAF20053.1_12.fa \
/home/sam/data/gigas/genes/
ls /home/sam/data/gigas/genes/
lclAF177226.1_cds_AAF20053.1_12.fa NP_037555.1.fa
SEQUENCE_ID=${seq_id}
SEQUENCE_TEMPLATE=${sequence}
PRIMER_TASK=generic
PRIMER_PICK_LEFT_PRIMER=3
PRIMER_PICK_RIGHT_PRIMER=3
PRIMER_OPT_SIZE=18
PRIMER_MIN_SIZE=15
PRIMER_MAX_SIZE=21
PRIMER_MAX_NS_ACCEPTED=1
PRIMER_PRODUCT_SIZE_RANGE=75-150
P3_FILE_FLAG=1
PRIMER_EXPLAIN_FLAG=1
=
Values after the "=" on each line can be changed to whatever values the user decides. The ${sequence}
must be a nucletoide sequence on a single line, with no line breaks.
The code below uses a heredoc
to write this information to a file. Everything between the following two lines gets printed (via cat
) as shown and then redirected to the indicated file (20181129_primer3_params.txt
):
cat << EOF > /home/sam/analyses/20181129_geoduck_vtg_primers/20181129_primer3_params.txt
EOF
--format_output
to make a nice, human-readable output format.%%bash
# Rename this folder
mv /home/sam/analyses/20181211_gigas_cox1_primeres /home/sam/analyses/20181211_gigas_cox1_primers
cd /home/sam/analyses/20181211_gigas_cox1_primers
# Store sequence only from desired FastA.
# Print all lines after the first line and then delete newlines
sequence=$(tail -n +2 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | tr -d '\n')
# Store file name of targeted FastA file.
## Use sed to strip leading text from FastA header
seq_id=$(head -n 1 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | sed 's/>lcl|//')
# Use heredoc to create Primer3 parameters file
cat << EOF > /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt
SEQUENCE_ID=${seq_id}
SEQUENCE_TEMPLATE=${sequence}
PRIMER_TASK=generic
PRIMER_PICK_LEFT_PRIMER=3
PRIMER_PICK_RIGHT_PRIMER=3
PRIMER_OPT_SIZE=18
PRIMER_MIN_SIZE=15
PRIMER_MAX_SIZE=21
PRIMER_MAX_NS_ACCEPTED=1
PRIMER_PRODUCT_SIZE_RANGE=75-150
P3_FILE_FLAG=1
PRIMER_EXPLAIN_FLAG=1
PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/
=
EOF
# Run Primer3
/home/sam/software/primer3-2.4.0/src/primer3_core \
--format_output \
--output=/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt \
/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt
Copyright (c) 1996-2017 Whitehead Institute for Biomedical Research, Steve Rozen (http://purl.com/STEVEROZEN/), Andreas Untergasser and Helen Skaletsky All rights reserved. This file is part of the primer3 suite and libraries. The primer3 suite and libraries are free software; you can redistribute them and/or modify them under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this software (file gpl-2.0.txt in the source distribution); if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USAGE: /home/sam/software/primer3-2.4.0/src/primer3_core [--format_output] [--default_version=1|--default_version=2] [--io_version=4] [--p3_settings_file=<file_path>] [--echo_settings_file] [--strict_tags] [--output=<file_path>] [--error=<file_path>] [input_file] This is primer3 (libprimer3 release 2.4.0) Input can also be provided on standard input. For example: $ primer3_core < my_input_file
%%bash
# Rename this folder
mv /home/sam/analyses/20181211_gigas_cox1_primeres /home/sam/analyses/20181211_gigas_cox1_primers
cd /home/sam/analyses/20181211_gigas_cox1_primers
# Store sequence only from desired FastA.
# Print all lines after the first line and then delete newlines
sequence=$(tail -n +2 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | tr -d '\n')
# Store file name of targeted FastA file.
## Use sed to strip leading text from FastA header
seq_id=$(head -n 1 /home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa | sed 's/>lcl|//')
# Use heredoc to create Primer3 parameters file
cat << EOF > /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt
SEQUENCE_ID=${seq_id}
SEQUENCE_TEMPLATE=${sequence}
PRIMER_TASK=generic
PRIMER_PICK_LEFT_PRIMER=3
PRIMER_PICK_RIGHT_PRIMER=3
PRIMER_OPT_SIZE=18
PRIMER_MIN_SIZE=15
PRIMER_MAX_SIZE=21
PRIMER_MAX_NS_ACCEPTED=1
PRIMER_PRODUCT_SIZE_RANGE=75-150
P3_FILE_FLAG=1
PRIMER_EXPLAIN_FLAG=1
PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/
=
EOF
# Run Primer3
/home/sam/software/primer3-2.4.0/src/primer3_core \
--format_output \
--output=/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers.txt \
/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt
mv: cannot stat '/home/sam/analyses/20181211_gigas_cox1_primeres': No such file or directory
%%bash
cat /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers.txt
PRIMER PICKING RESULTS FOR AF177226.1_cds_AAF20053.1_12 No mispriming library specified Using 0-based sequence positions OLIGO start len tm gc% any_th 3'_th hairpin seq LEFT PRIMER 205 19 59.54 57.89 0.00 0.00 34.59 GGGGGTTTGGTAACTGGCT RIGHT PRIMER 352 18 59.88 61.11 0.00 0.00 0.00 CCTGCCCCAACTCCGTTT SEQUENCE SIZE: 1518 INCLUDED REGION SIZE: 1518 PRODUCT SIZE: 148, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00 0 ATGTCAACAAATCATTTAGACATTGGAAGGTTTTATATAGTATTTGGATTTTGAGCTGTT 60 CTTGCGGGAACTAGGTTTAGGTCTCTTATTCGTTGGAGACTTTATAACCCTGGAGCTAAG 120 TTTTTAGACCCCGTGACTTATAATGCAGTTGTAACTAGGCATGCGTTGGTTATGATTTTT 180 TTCTTTGTTATACCTGTAATAATTGGGGGGTTTGGTAACTGGCTTATCCCTTTGATGCTT >>>>>>>>>>>>>>>>>>> 240 CTAGTAGCAGACATGCAATTTCCTCGATTAAATGCATTTAGATTTTGAGTTTTGCCAGGG 300 TCTCTTTATCTTATGCTTATGTCTAACATTGTAGAAAACGGAGTTGGGGCAGGGTGAACA <<<<<<<<<<<<<<<<<< 360 ATTTACCCTCCTTTATCAACTTACTCTTATCATGGAGTTTGTATAGACCTTGCAATTCTA 420 AGCCTTCACCTTGCTGGTATTAGCTCTATTTTCAGGTCAATTAATTTCATAGTAACGATT 480 AGAAATATGCGATCTGTTGGGGGCCATTTACTAGCACTATTCCCTTGATCTATTAAGGTT 540 ACTTCATTCTTGCTTTTGACTACTCTCCCAGTGTTAGCTGGAGGTCTTACTATACTTTTG 600 ACTGATCGTCATTTTAATACCTCTTTTTTTGACCCTGTCGGAGGGGGGGACCCTGTCTTA 660 TTTCAGCATTTGTTTTGATTTTTTGGTCACCCTGAGGTGTATGTCCTTATTCTTCCAGGT 720 TTTGGAATAATTTCTCATGTCTTATGTTTTTGGTCAAGTAAAAAGACTGCATATGGAAAT 780 ATGGGAATGTTTTATGCAATACTTAATATTGGGTTCTTAGGGTTTATTGTCTGGGGGCAT 840 CACATGTTTGTGGCTGGAATGGATATTGATACGCGTGCTTATTTTAGTGCTGCCACCGTT 900 ATTATTGCAGTGCCAACTGGTATTAAGGTGTTTGCATGAATTAGCACAATGCTAGGCTCT 960 AAAGTTTCAACTCAAGCACCTATGTTGTGGTCTACTGGTTTTATTATTCTTTTTACAACA 1020 GGGGGTCTTACAGGACTTATTCTATCAAGAGCTTCAGTAGATGTTACGCTTCACGACACT 1080 TATTTTGTAACTGGTCATTTTCACTACGTCTTATCAATGGGTGCGGTGTTTACAATTTTA 1140 GCTGGGTTTACTCACTGATTTCCTCTTGTTGCTAAGGTTATAATGCATCGGCAAAAAATG 1200 AAAAGTCATTTTTTAGCAATGTTTTTAGGTGTTAATGCAGCATTTTTGCCACATCATTTT 1260 TTGGGTTTGGCTGGTATACCACGTCGAGTAGTTGATTATCCAGATCATTTTTGATTTTGA 1320 AATAAAGTATCCACATTTGGCTCTCATTTGAGTACTGGCTCATTGTTATTTTTTGTGTTT 1380 TTGTTATGAGAGTCATTTATTGCTCAACGGCCAGTTATTTCAGTGCGAAACACTTCTAGG 1440 TCCCCCGAATGGGCTGTTGTGTCTAGCCTCCCTAAGCATGCAGGGGATGAATTAGCAAAA 1500 ATGGCTAAGCTTTGTTAG KEYS (in order of precedence): >>>>>> left primer <<<<<< right primer ADDITIONAL OLIGOS start len tm gc% any_th 3'_th hairpin seq 1 LEFT PRIMER 205 18 57.89 61.11 0.00 0.00 34.59 GGGGGTTTGGTAACTGGC RIGHT PRIMER 352 18 59.88 61.11 0.00 0.00 0.00 CCTGCCCCAACTCCGTTT PRODUCT SIZE: 148, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00 2 LEFT PRIMER 205 20 60.18 55.00 0.00 0.00 34.59 GGGGGTTTGGTAACTGGCTT RIGHT PRIMER 352 18 59.88 61.11 0.00 0.00 0.00 CCTGCCCCAACTCCGTTT PRODUCT SIZE: 148, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00 3 LEFT PRIMER 419 18 57.49 55.56 0.00 0.00 42.93 AAGCCTTCACCTTGCTGG RIGHT PRIMER 503 18 60.12 61.11 0.00 0.00 0.00 GCCCCCAACAGATCGCAT PRODUCT SIZE: 85, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00 4 LEFT PRIMER 342 18 59.80 61.11 0.00 0.00 0.00 GTTGGGGCAGGGTGAACA RIGHT PRIMER 436 18 57.49 55.56 0.00 0.00 35.27 CCAGCAAGGTGAAGGCTT PRODUCT SIZE: 95, PAIR ANY_TH COMPL: 0.00, PAIR 3'_TH COMPL: 0.00 Statistics con too in in not no tm tm high high high high sid many tar excl ok bad GC too too any_th 3'_th hair- poly end ered Ns get reg reg GC% clamp low high compl compl pin X stab ok libprimer3 release 2.4.0
%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers
# Run Primer3
/home/sam/software/primer3-2.4.0/src/primer3_core \
--output=/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt \
/home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_params.txt
cat /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt
SEQUENCE_ID=AF177226.1_cds_AAF20053.1_12 SEQUENCE_TEMPLATE=ATGTCAACAAATCATTTAGACATTGGAAGGTTTTATATAGTATTTGGATTTTGAGCTGTTCTTGCGGGAACTAGGTTTAGGTCTCTTATTCGTTGGAGACTTTATAACCCTGGAGCTAAGTTTTTAGACCCCGTGACTTATAATGCAGTTGTAACTAGGCATGCGTTGGTTATGATTTTTTTCTTTGTTATACCTGTAATAATTGGGGGGTTTGGTAACTGGCTTATCCCTTTGATGCTTCTAGTAGCAGACATGCAATTTCCTCGATTAAATGCATTTAGATTTTGAGTTTTGCCAGGGTCTCTTTATCTTATGCTTATGTCTAACATTGTAGAAAACGGAGTTGGGGCAGGGTGAACAATTTACCCTCCTTTATCAACTTACTCTTATCATGGAGTTTGTATAGACCTTGCAATTCTAAGCCTTCACCTTGCTGGTATTAGCTCTATTTTCAGGTCAATTAATTTCATAGTAACGATTAGAAATATGCGATCTGTTGGGGGCCATTTACTAGCACTATTCCCTTGATCTATTAAGGTTACTTCATTCTTGCTTTTGACTACTCTCCCAGTGTTAGCTGGAGGTCTTACTATACTTTTGACTGATCGTCATTTTAATACCTCTTTTTTTGACCCTGTCGGAGGGGGGGACCCTGTCTTATTTCAGCATTTGTTTTGATTTTTTGGTCACCCTGAGGTGTATGTCCTTATTCTTCCAGGTTTTGGAATAATTTCTCATGTCTTATGTTTTTGGTCAAGTAAAAAGACTGCATATGGAAATATGGGAATGTTTTATGCAATACTTAATATTGGGTTCTTAGGGTTTATTGTCTGGGGGCATCACATGTTTGTGGCTGGAATGGATATTGATACGCGTGCTTATTTTAGTGCTGCCACCGTTATTATTGCAGTGCCAACTGGTATTAAGGTGTTTGCATGAATTAGCACAATGCTAGGCTCTAAAGTTTCAACTCAAGCACCTATGTTGTGGTCTACTGGTTTTATTATTCTTTTTACAACAGGGGGTCTTACAGGACTTATTCTATCAAGAGCTTCAGTAGATGTTACGCTTCACGACACTTATTTTGTAACTGGTCATTTTCACTACGTCTTATCAATGGGTGCGGTGTTTACAATTTTAGCTGGGTTTACTCACTGATTTCCTCTTGTTGCTAAGGTTATAATGCATCGGCAAAAAATGAAAAGTCATTTTTTAGCAATGTTTTTAGGTGTTAATGCAGCATTTTTGCCACATCATTTTTTGGGTTTGGCTGGTATACCACGTCGAGTAGTTGATTATCCAGATCATTTTTGATTTTGAAATAAAGTATCCACATTTGGCTCTCATTTGAGTACTGGCTCATTGTTATTTTTTGTGTTTTTGTTATGAGAGTCATTTATTGCTCAACGGCCAGTTATTTCAGTGCGAAACACTTCTAGGTCCCCCGAATGGGCTGTTGTGTCTAGCCTCCCTAAGCATGCAGGGGATGAATTAGCAAAAATGGCTAAGCTTTGTTAG PRIMER_TASK=generic PRIMER_PICK_LEFT_PRIMER=3 PRIMER_PICK_RIGHT_PRIMER=3 PRIMER_OPT_SIZE=18 PRIMER_MIN_SIZE=15 PRIMER_MAX_SIZE=21 PRIMER_MAX_NS_ACCEPTED=1 PRIMER_PRODUCT_SIZE_RANGE=75-150 P3_FILE_FLAG=1 PRIMER_EXPLAIN_FLAG=1 PRIMER_THERMODYNAMIC_PARAMETERS_PATH=/home/sam/software/primer3-2.4.0/src/primer3_config/ PRIMER_LEFT_NUM_RETURNED=5 PRIMER_RIGHT_NUM_RETURNED=5 PRIMER_INTERNAL_NUM_RETURNED=0 PRIMER_PAIR_NUM_RETURNED=5 PRIMER_PAIR_0_PENALTY=1.578641 PRIMER_LEFT_0_PENALTY=1.463375 PRIMER_RIGHT_0_PENALTY=0.115266 PRIMER_LEFT_0_SEQUENCE=GGGGGTTTGGTAACTGGCT PRIMER_RIGHT_0_SEQUENCE=CCTGCCCCAACTCCGTTT PRIMER_LEFT_0=205,19 PRIMER_RIGHT_0=352,18 PRIMER_LEFT_0_TM=59.537 PRIMER_RIGHT_0_TM=59.885 PRIMER_LEFT_0_GC_PERCENT=57.895 PRIMER_RIGHT_0_GC_PERCENT=61.111 PRIMER_LEFT_0_SELF_ANY_TH=0.00 PRIMER_RIGHT_0_SELF_ANY_TH=0.00 PRIMER_LEFT_0_SELF_END_TH=0.00 PRIMER_RIGHT_0_SELF_END_TH=0.00 PRIMER_LEFT_0_HAIRPIN_TH=34.59 PRIMER_RIGHT_0_HAIRPIN_TH=0.00 PRIMER_LEFT_0_END_STABILITY=4.7500 PRIMER_RIGHT_0_END_STABILITY=3.6000 PRIMER_PAIR_0_COMPL_ANY_TH=0.00 PRIMER_PAIR_0_COMPL_END_TH=0.00 PRIMER_PAIR_0_PRODUCT_SIZE=148 PRIMER_PAIR_1_PENALTY=2.222295 PRIMER_LEFT_1_PENALTY=2.107029 PRIMER_RIGHT_1_PENALTY=0.115266 PRIMER_LEFT_1_SEQUENCE=GGGGGTTTGGTAACTGGC PRIMER_RIGHT_1_SEQUENCE=CCTGCCCCAACTCCGTTT PRIMER_LEFT_1=205,18 PRIMER_RIGHT_1=352,18 PRIMER_LEFT_1_TM=57.893 PRIMER_RIGHT_1_TM=59.885 PRIMER_LEFT_1_GC_PERCENT=61.111 PRIMER_RIGHT_1_GC_PERCENT=61.111 PRIMER_LEFT_1_SELF_ANY_TH=0.00 PRIMER_RIGHT_1_SELF_ANY_TH=0.00 PRIMER_LEFT_1_SELF_END_TH=0.00 PRIMER_RIGHT_1_SELF_END_TH=0.00 PRIMER_LEFT_1_HAIRPIN_TH=34.59 PRIMER_RIGHT_1_HAIRPIN_TH=0.00 PRIMER_LEFT_1_END_STABILITY=4.8500 PRIMER_RIGHT_1_END_STABILITY=3.6000 PRIMER_PAIR_1_COMPL_ANY_TH=0.00 PRIMER_PAIR_1_COMPL_END_TH=0.00 PRIMER_PAIR_1_PRODUCT_SIZE=148 PRIMER_PAIR_2_PENALTY=2.294210 PRIMER_LEFT_2_PENALTY=2.178944 PRIMER_RIGHT_2_PENALTY=0.115266 PRIMER_LEFT_2_SEQUENCE=GGGGGTTTGGTAACTGGCTT PRIMER_RIGHT_2_SEQUENCE=CCTGCCCCAACTCCGTTT PRIMER_LEFT_2=205,20 PRIMER_RIGHT_2=352,18 PRIMER_LEFT_2_TM=60.179 PRIMER_RIGHT_2_TM=59.885 PRIMER_LEFT_2_GC_PERCENT=55.000 PRIMER_RIGHT_2_GC_PERCENT=61.111 PRIMER_LEFT_2_SELF_ANY_TH=0.00 PRIMER_RIGHT_2_SELF_ANY_TH=0.00 PRIMER_LEFT_2_SELF_END_TH=0.00 PRIMER_RIGHT_2_SELF_END_TH=0.00 PRIMER_LEFT_2_HAIRPIN_TH=34.59 PRIMER_RIGHT_2_HAIRPIN_TH=0.00 PRIMER_LEFT_2_END_STABILITY=4.3500 PRIMER_RIGHT_2_END_STABILITY=3.6000 PRIMER_PAIR_2_COMPL_ANY_TH=0.00 PRIMER_PAIR_2_COMPL_END_TH=0.00 PRIMER_PAIR_2_PRODUCT_SIZE=148 PRIMER_PAIR_3_PENALTY=2.632121 PRIMER_LEFT_3_PENALTY=2.507969 PRIMER_RIGHT_3_PENALTY=0.124151 PRIMER_LEFT_3_SEQUENCE=AAGCCTTCACCTTGCTGG PRIMER_RIGHT_3_SEQUENCE=GCCCCCAACAGATCGCAT PRIMER_LEFT_3=419,18 PRIMER_RIGHT_3=503,18 PRIMER_LEFT_3_TM=57.492 PRIMER_RIGHT_3_TM=60.124 PRIMER_LEFT_3_GC_PERCENT=55.556 PRIMER_RIGHT_3_GC_PERCENT=61.111 PRIMER_LEFT_3_SELF_ANY_TH=0.00 PRIMER_RIGHT_3_SELF_ANY_TH=0.00 PRIMER_LEFT_3_SELF_END_TH=0.00 PRIMER_RIGHT_3_SELF_END_TH=0.00 PRIMER_LEFT_3_HAIRPIN_TH=42.93 PRIMER_RIGHT_3_HAIRPIN_TH=0.00 PRIMER_LEFT_3_END_STABILITY=4.8500 PRIMER_RIGHT_3_END_STABILITY=4.7300 PRIMER_PAIR_3_COMPL_ANY_TH=0.00 PRIMER_PAIR_3_COMPL_END_TH=0.00 PRIMER_PAIR_3_PRODUCT_SIZE=85 PRIMER_PAIR_4_PENALTY=2.705294 PRIMER_LEFT_4_PENALTY=0.197324 PRIMER_RIGHT_4_PENALTY=2.507969 PRIMER_LEFT_4_SEQUENCE=GTTGGGGCAGGGTGAACA PRIMER_RIGHT_4_SEQUENCE=CCAGCAAGGTGAAGGCTT PRIMER_LEFT_4=342,18 PRIMER_RIGHT_4=436,18 PRIMER_LEFT_4_TM=59.803 PRIMER_RIGHT_4_TM=57.492 PRIMER_LEFT_4_GC_PERCENT=61.111 PRIMER_RIGHT_4_GC_PERCENT=55.556 PRIMER_LEFT_4_SELF_ANY_TH=0.00 PRIMER_RIGHT_4_SELF_ANY_TH=0.00 PRIMER_LEFT_4_SELF_END_TH=0.00 PRIMER_RIGHT_4_SELF_END_TH=0.00 PRIMER_LEFT_4_HAIRPIN_TH=0.00 PRIMER_RIGHT_4_HAIRPIN_TH=35.27 PRIMER_LEFT_4_END_STABILITY=3.1800 PRIMER_RIGHT_4_END_STABILITY=4.3500 PRIMER_PAIR_4_COMPL_ANY_TH=0.00 PRIMER_PAIR_4_COMPL_END_TH=0.00 PRIMER_PAIR_4_PRODUCT_SIZE=95 =
Parses out sequence id, left, and right primers and creates the proper tab-delimited primer sequences file needed by primersearch
Runs primersearch
using the newly created primer sequences file and the target FastA file that was used to generate our primers in Primer3
%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers
seq_id=$(grep "SEQUENCE_ID=" /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt | sed 's/SEQUENCE_ID=//')
left_primer=$(grep "PRIMER_LEFT_0_SEQUENCE=" /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt | sed 's/PRIMER_LEFT_0_SEQUENCE=//')
right_primer=$(grep "PRIMER_RIGHT_0_SEQUENCE=" /home/sam/analyses/20181211_gigas_cox1_primers/20181211_primer3_primers_default_format.txt | sed 's/PRIMER_RIGHT_0_SEQUENCE=//')
printf "%s\t" "${seq_id}" "${left_primer}" "${right_primer}" > /home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt
# Add newline to end of file
printf "\n" >> /home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt
/home/sam/software/EMBOSS-6.6.0/emboss/primersearch \
-auto \
/home/sam/data/gigas/genes/lclAF177226.1_cds_AAF20053.1_12.fa \
/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt
ls /home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt
/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt
%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers/
ls
1_cds_aaf20053.primersearch 20181211_emboss_primers.txt 20181211_primer3_params.txt 20181211_primer3_primers_default_format.txt 20181211_primer3_primers.txt AF177226.1_cds_AAF20053.1_12.for AF177226.1_cds_AAF20053.1_12.rev
%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers/
mv 1_cds_aaf20053.primersearch AF177226.1_cds_AAF20053.1_12.primersearch
cat AF177226.1_cds_AAF20053.1_12.primersearch
Primer name AF177226.1_cds_AAF20053.1_12 Amplimer 1 Sequence: AF177226.1_cds_AAF20053.1_12 GGGGGTTTGGTAACTGGCT hits forward strand at 206 with 0 mismatches CCTGCCCCAACTCCGTTT hits reverse strand at [1166] with 0 mismatches Amplimer length: 148 bp
Primers match up to their source sequence, as expected. Now, to test the primers on the rest of the genome and mt genome to ensure specificity.
Sets variables for file/folder paths
Runs for loop over all individual CDS FastA files:
tr
to convert filenames to lowercaseprimersearch
on each CDS FastA filegrep
to evaluate if the word "Amplimer" is found in the resulting output file; if it is not, the file is deleted.%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers/
fasta_loc="/home/sam/data/gigas/genomes/NC_001276_fasta_splits"
primersearch="/home/sam/software/EMBOSS-6.6.0/emboss/primersearch"
primers="/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt"
time \
for fasta in ${fasta_loc}*.fa
do
fasta_no_path=$(echo ${fasta##*/})
fasta_no_ext=$(echo ${fasta_no_path%%.*})
fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')
${primersearch} -auto ${fasta} ${primers} 20
if ! grep --quiet "Amplimer" "${fasta_no_ext_lower}.primersearch"
then rm ${fasta_no_ext_lower}.primersearch
fi
done
ls -ltr
total 112 -rw-rw-r-- 1 sam sam 1895 Dec 11 07:46 20181211_primer3_params.txt -rw-rw-r-- 1 sam sam 5400 Dec 11 07:46 20181211_primer3_primers.txt -rw-rw-r-- 1 sam sam 43889 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.rev -rw-rw-r-- 1 sam sam 40878 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.for -rw-rw-r-- 1 sam sam 5459 Dec 11 07:50 20181211_primer3_primers_default_format.txt -rw-rw-r-- 1 sam sam 69 Dec 11 07:56 20181211_emboss_primers.txt -rw-rw-r-- 1 sam sam 256 Dec 11 07:56 AF177226.1_cds_AAF20053.1_12.primersearch
Error: Failed to open filename '/home/sam/data/gigas/genomes/NC_001276_fasta_splits*.fa' Error: Unable to read sequence '/home/sam/data/gigas/genomes/NC_001276_fasta_splits*.fa' Died: primersearch terminated: Bad value for '-seqall' with -auto defined grep: nc_001276_fasta_splits*.primersearch: No such file or directory rm: cannot remove 'nc_001276_fasta_splits*.primersearch': No such file or directory real 0m0.025s user 0m0.008s sys 0m0.004s
%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers/
fasta_loc="/home/sam/data/gigas/genomes/NC_001276_fasta_splits/"
primersearch="/home/sam/software/EMBOSS-6.6.0/emboss/primersearch"
primers="/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt"
time \
for fasta in ${fasta_loc}*.fa
do
fasta_no_path=$(echo ${fasta##*/})
fasta_no_ext=$(echo ${fasta_no_path%%.*})
fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')
${primersearch} -auto ${fasta} ${primers} 20
if ! grep --quiet "Amplimer" "${fasta_no_ext_lower}.primersearch"
then rm ${fasta_no_ext_lower}.primersearch
fi
done
ls -ltr
total 116 -rw-rw-r-- 1 sam sam 1895 Dec 11 07:46 20181211_primer3_params.txt -rw-rw-r-- 1 sam sam 5400 Dec 11 07:46 20181211_primer3_primers.txt -rw-rw-r-- 1 sam sam 43889 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.rev -rw-rw-r-- 1 sam sam 40878 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.for -rw-rw-r-- 1 sam sam 5459 Dec 11 07:50 20181211_primer3_primers_default_format.txt -rw-rw-r-- 1 sam sam 69 Dec 11 07:56 20181211_emboss_primers.txt -rw-rw-r-- 1 sam sam 256 Dec 11 07:56 AF177226.1_cds_AAF20053.1_12.primersearch -rw-rw-r-- 1 sam sam 241 Dec 11 08:05 nc_001276.primersearch
real 0m0.026s user 0m0.016s sys 0m0.000s
%%bash
cat /home/sam/analyses/20181211_gigas_cox1_primers/nc_001276.primersearch
Primer name AF177226.1_cds_AAF20053.1_12 Amplimer 1 Sequence: NC_001276.1 GGGGGTTTGGTAACTGGCT hits forward strand at 15803 with 0 mismatches CCTGCCCCAACTCCGTTT hits reverse strand at [2275] with 0 mismatches Amplimer length: 148 bp
%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers/
fasta_loc="/home/sam/data/gigas/genomes/GCF_000297895_fasta_splits/"
primersearch="/home/sam/software/EMBOSS-6.6.0/emboss/primersearch"
primers="/home/sam/analyses/20181211_gigas_cox1_primers/20181211_emboss_primers.txt"
time \
for fasta in ${fasta_loc}*.fna
do
fasta_no_path=$(echo ${fasta##*/})
fasta_no_ext=$(echo ${fasta_no_path%%.*})
fasta_no_ext_lower=$(echo ${fasta_no_ext} | tr '[:upper:]' '[:lower:]')
${primersearch} -auto ${fasta} ${primers} 20
if ! grep --quiet "Amplimer" "${fasta_no_ext_lower}.primersearch"
then rm ${fasta_no_ext_lower}.primersearch
fi
done
ls -ltr
total 120 -rw-rw-r-- 1 sam sam 1895 Dec 11 07:46 20181211_primer3_params.txt -rw-rw-r-- 1 sam sam 5400 Dec 11 07:46 20181211_primer3_primers.txt -rw-rw-r-- 1 sam sam 43889 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.rev -rw-rw-r-- 1 sam sam 40878 Dec 11 07:50 AF177226.1_cds_AAF20053.1_12.for -rw-rw-r-- 1 sam sam 5459 Dec 11 07:50 20181211_primer3_primers_default_format.txt -rw-rw-r-- 1 sam sam 69 Dec 11 07:56 20181211_emboss_primers.txt -rw-rw-r-- 1 sam sam 256 Dec 11 07:56 AF177226.1_cds_AAF20053.1_12.primersearch -rw-rw-r-- 1 sam sam 241 Dec 11 08:08 nc_001276.primersearch -rw-rw-r-- 1 sam sam 246 Dec 11 08:08 nw_011935054.primersearch
real 4m10.485s user 2m1.152s sys 0m26.576s
%%bash
cd /home/sam/analyses/20181211_gigas_cox1_primers/
cat nw_011935054.primersearch
Primer name AF177226.1_cds_AAF20053.1_12 Amplimer 1 Sequence: NW_011935054.1 CCTGCCCCAACTCCGTTT hits forward strand at 119485 with 0 mismatches GGGGGTTTGGTAACTGGCT hits reverse strand at [93393] with 0 mismatches Amplimer length: 148 bp
I belive what I'm seeing is that the entire C.gigas genome includes mitochondrial sequences. Thus, I see a single match in the mitochondrial genome and a single match in the the full genome. Will order this primer set.
%%bash
# Copy data to Gannet
cd /home/sam/analyses/
rsync \
--archive \
--relative \
./20181211_gigas_cox1_primers/ gannet:/volume1/web/Atumefaciens