%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Thu Jul 30 11:36:00 PDT 2020 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.6 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: swoose ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 44 Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz Stepping: 2 CPU MHz: 2925.990 BogoMIPS: 5851.88 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 12288K NUMA node0 CPU(s): 0-23 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb ssbd ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat flush_l1d ------------ Memory Specs total used free shared buff/cache available Mem: 70G 2.3G 1.4G 564M 67G 67G Swap: 4.7G 4.6M 4.7G
No LSB modules are available.
# Set data directories
%env data_dir=/home/sam/data/P_generosa
%env fasta=/home/sam/data/P_generosa/Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna
%env out_dir=/home/sam/analyses/20200730_pgen_primer_design
# Needed for primer3-2.4.0
%env thermo_params_dir=/home/sam/programs/primer3-2.4.0/src/primer3_config/
# Programs
%env primer3=/home/sam/programs/primer3-2.4.0/src/primer3_core
%env primersearch=/home/sam/programs/EMBOSS-6.6.0/emboss/primersearch
env: data_dir=/home/sam/data/P_generosa env: fasta=/home/sam/data/P_generosa/Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna env: out_dir=/home/sam/analyses/20200730_pgen_primer_design env: thermo_params_dir=/home/sam/programs/primer3-2.4.0/src/primer3_config/ env: primer3=/home/sam/programs/primer3-2.4.0/src/primer3_core env: primersearch=/home/sam/programs/EMBOSS-6.6.0/emboss/primersearch
%%bash
mkdir --parents "${out_dir}"
%%bash
cd ${data_dir}
wget --quiet "https://files.osf.io/v1/resources/yem8n/providers/osfstorage/5db35d9abc32f4000e0b70c2?action=download&direct&version=1" \
--output-document ${fasta}
ls -lh
total 376M -rw-rw-r-- 1 sam sam 375M Jul 30 11:53 Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna -rw-rw-r-- 1 sam sam 1.6M Jul 29 11:48 Panopea-generosa-vv0.74.a4.5d9637f372b5d-publish.genes.fna.fai
%%bash
timestamp=$(date +"%Y%m%d")
cd "${out_dir}"
# Associative array to associate gene names with sequence ids
# Requires >= Bash 4.0
declare -A seqid_array
# Populate associative array [gene_abbreviation]=seqid
seqid_array=(
[TIF3s6b]=PGEN_.00g000750-vv0.74.a \
[TIF3s12]=PGEN_.00g025890-vv0.74.a \
[APLP]=PGEN_.00g070040-vv0.74.a \
[TIF3s7]=PGEN_.00g079690-vv0.74.a \
[TIF3s5]=PGEN_.00g082590-vv0.74.a \
[NFIP1]=PGEN_.00g088260-vv0.74.a \
[GSK3B]=PGEN_.00g114060-vv0.74.a \
[TIF3s8-1]=PGEN_.00g132030-vv0.74.a \
[TIF3s8-2]=PGEN_.00g132040-vv0.74.a \
[FEN1]=PGEN_.00g188130-vv0.74.a \
[ECHD3]=PGEN_.00g194630-vv0.74.a \
[GLYG]=PGEN_.00g224740-vv0.74.a \
[TIF3s10]=PGEN_.00g245080-vv0.74.a \
[SPTN1]=PGEN_.00g280110-vv0.74.a \
[RPL5]=PGEN_.00g287540-vv0.74.a \
[TIF3s4a]=PGEN_.00g288180-vv0.74.a \
[NSF]=PGEN_.00g338640-vv0.74.a
)
# Individual FastAs array
fasta_array=()
# Extract sequences to individual FastA files
for gene_name in "${!seqid_array[@]}"
do
# Set output file names
out_file="${seqid_array[$gene_name]}"_"${gene_name}".fna
# Run faidx
faidx "${fasta}" "${seqid_array[$gene_name]}" \
--out "${out_file}"
# Add FastA to array
fasta_array+=(${out_file})
## Check output
# Count number of entries in output FastA (should be = 1)
# -H displays filename - is compatible with OSX
grep --count -H ">" "${out_file}"
# Check each FastA header
echo "${out_file}: $(head -n1 "${out_file}")"
echo ""
done
# Run Primer3
for fna in "${fasta_array[@]}"
do
# Store sequence only from desired FastA.
# Print all lines after the first line and then delete newlines
# because sequence needs to be on single line for Primer3 params file
sequence=$(tail -n +2 "${fna}" | tr -d '\n')
# Remove suffix from FastA file to use as sequence ID
seq_id=${fna%.*}
# Variables for output filenaming
params_out="${timestamp}_${seq_id}_primer3_params.txt"
primer3_def_out="${timestamp}_${seq_id}_primers_default_format.txt"
emboss_primers="${timestamp}_${seq_id}_emboss_primers.txt"
primersearch_out="${timestamp}_${seq_id}_primersearch.txt"
# Use heredoc to create Primer3 parameters file
##Values after the "=" on each line can be changed to whatever values the user decides.
##The ${sequence} must be a nucletoide sequence on a single line, with no line breaks.
## The code below uses a ```heredoc``` to write this information to a file.
## Everything between the following two lines gets printed (via ```cat```) as shown and then
## redirected to the indicated file "${params_out}".
## BTW, heredoc cannot be indented (well, it can, but requires some formatting that I couldn't get to work)
cat << EOF > "${params_out}"
SEQUENCE_ID=${seq_id}
SEQUENCE_TEMPLATE=${sequence}
PRIMER_TASK=generic
PRIMER_PICK_LEFT_PRIMER=3
PRIMER_PICK_RIGHT_PRIMER=3
PRIMER_OPT_SIZE=18
PRIMER_MIN_SIZE=15
PRIMER_MAX_SIZE=21
PRIMER_MAX_NS_ACCEPTED=1
PRIMER_PRODUCT_SIZE_RANGE=75-150
P3_FILE_FLAG=1
PRIMER_EXPLAIN_FLAG=1
PRIMER_THERMODYNAMIC_PARAMETERS_PATH=${thermo_params_dir}
=
EOF
# Run Primer3 with default output format
${primer3} \
--output="${out_dir}/${primer3_def_out}" \
"${params_out}"
# Create tab-delimited primer file for primersearch
sequence_id=$(grep "SEQUENCE_ID=" "${primer3_def_out}" | sed 's/SEQUENCE_ID=//')
left_primer=$(grep "PRIMER_LEFT_0_SEQUENCE=" "${primer3_def_out}" | sed 's/PRIMER_LEFT_0_SEQUENCE=//')
right_primer=$(grep "PRIMER_RIGHT_0_SEQUENCE=" "${primer3_def_out}" | sed 's/PRIMER_RIGHT_0_SEQUENCE=//')
printf "%s\t" "${sequence_id}" "${left_primer}" "${right_primer}" > "${emboss_primers}"
# Add required newline to end of file
printf "\n" >> "${emboss_primers}"
# Run EMBOSS primersearch
${primersearch} \
-seqall "${fasta}" \
-infile "${emboss_primers}" \
-mismatchpercent 20 \
-outfile "${primersearch_out}" \
-auto
done
PGEN_.00g194630-vv0.74.a_ECHD3.fna:1 PGEN_.00g194630-vv0.74.a_ECHD3.fna: >PGEN_.00g194630-vv0.74.a PGEN_.00g288180-vv0.74.a_TIF3s4a.fna:1 PGEN_.00g288180-vv0.74.a_TIF3s4a.fna: >PGEN_.00g288180-vv0.74.a PGEN_.00g132040-vv0.74.a_TIF3s8-2.fna:1 PGEN_.00g132040-vv0.74.a_TIF3s8-2.fna: >PGEN_.00g132040-vv0.74.a PGEN_.00g132030-vv0.74.a_TIF3s8-1.fna:1 PGEN_.00g132030-vv0.74.a_TIF3s8-1.fna: >PGEN_.00g132030-vv0.74.a PGEN_.00g338640-vv0.74.a_NSF.fna:1 PGEN_.00g338640-vv0.74.a_NSF.fna: >PGEN_.00g338640-vv0.74.a PGEN_.00g245080-vv0.74.a_TIF3s10.fna:1 PGEN_.00g245080-vv0.74.a_TIF3s10.fna: >PGEN_.00g245080-vv0.74.a PGEN_.00g025890-vv0.74.a_TIF3s12.fna:1 PGEN_.00g025890-vv0.74.a_TIF3s12.fna: >PGEN_.00g025890-vv0.74.a PGEN_.00g114060-vv0.74.a_GSK3B.fna:1 PGEN_.00g114060-vv0.74.a_GSK3B.fna: >PGEN_.00g114060-vv0.74.a PGEN_.00g287540-vv0.74.a_RPL5.fna:1 PGEN_.00g287540-vv0.74.a_RPL5.fna: >PGEN_.00g287540-vv0.74.a PGEN_.00g188130-vv0.74.a_FEN1.fna:1 PGEN_.00g188130-vv0.74.a_FEN1.fna: >PGEN_.00g188130-vv0.74.a PGEN_.00g224740-vv0.74.a_GLYG.fna:1 PGEN_.00g224740-vv0.74.a_GLYG.fna: >PGEN_.00g224740-vv0.74.a PGEN_.00g088260-vv0.74.a_NFIP1.fna:1 PGEN_.00g088260-vv0.74.a_NFIP1.fna: >PGEN_.00g088260-vv0.74.a PGEN_.00g082590-vv0.74.a_TIF3s5.fna:1 PGEN_.00g082590-vv0.74.a_TIF3s5.fna: >PGEN_.00g082590-vv0.74.a PGEN_.00g079690-vv0.74.a_TIF3s7.fna:1 PGEN_.00g079690-vv0.74.a_TIF3s7.fna: >PGEN_.00g079690-vv0.74.a PGEN_.00g000750-vv0.74.a_TIF3s6b.fna:1 PGEN_.00g000750-vv0.74.a_TIF3s6b.fna: >PGEN_.00g000750-vv0.74.a PGEN_.00g280110-vv0.74.a_SPTN1.fna:1 PGEN_.00g280110-vv0.74.a_SPTN1.fna: >PGEN_.00g280110-vv0.74.a PGEN_.00g070040-vv0.74.a_APLP.fna:1 PGEN_.00g070040-vv0.74.a_APLP.fna: >PGEN_.00g070040-vv0.74.a
%%bash
cd "${out_dir}"
ls -ltrh
total 28M -rw-rw-r-- 1 sam sam 15K Jul 30 21:13 PGEN_.00g194630-vv0.74.a_ECHD3.fna -rw-rw-r-- 1 sam sam 12K Jul 30 21:13 PGEN_.00g288180-vv0.74.a_TIF3s4a.fna -rw-rw-r-- 1 sam sam 4.9K Jul 30 21:13 PGEN_.00g132040-vv0.74.a_TIF3s8-2.fna -rw-rw-r-- 1 sam sam 17K Jul 30 21:13 PGEN_.00g132030-vv0.74.a_TIF3s8-1.fna -rw-rw-r-- 1 sam sam 21K Jul 30 21:13 PGEN_.00g338640-vv0.74.a_NSF.fna -rw-rw-r-- 1 sam sam 27K Jul 30 21:13 PGEN_.00g245080-vv0.74.a_TIF3s10.fna -rw-rw-r-- 1 sam sam 9.8K Jul 30 21:13 PGEN_.00g025890-vv0.74.a_TIF3s12.fna -rw-rw-r-- 1 sam sam 27K Jul 30 21:13 PGEN_.00g114060-vv0.74.a_GSK3B.fna -rw-rw-r-- 1 sam sam 14K Jul 30 21:13 PGEN_.00g287540-vv0.74.a_RPL5.fna -rw-rw-r-- 1 sam sam 17K Jul 30 21:13 PGEN_.00g188130-vv0.74.a_FEN1.fna -rw-rw-r-- 1 sam sam 48K Jul 30 21:13 PGEN_.00g224740-vv0.74.a_GLYG.fna -rw-rw-r-- 1 sam sam 8.4K Jul 30 21:13 PGEN_.00g088260-vv0.74.a_NFIP1.fna -rw-rw-r-- 1 sam sam 27K Jul 30 21:13 PGEN_.00g082590-vv0.74.a_TIF3s5.fna -rw-rw-r-- 1 sam sam 23K Jul 30 21:13 PGEN_.00g079690-vv0.74.a_TIF3s7.fna -rw-rw-r-- 1 sam sam 13K Jul 30 21:13 PGEN_.00g000750-vv0.74.a_TIF3s6b.fna -rw-rw-r-- 1 sam sam 69K Jul 30 21:13 PGEN_.00g280110-vv0.74.a_SPTN1.fna -rw-rw-r-- 1 sam sam 32K Jul 30 21:13 PGEN_.00g070040-vv0.74.a_APLP.fna -rw-rw-r-- 1 sam sam 15K Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primer3_params.txt -rw-rw-r-- 1 sam sam 278K Jul 30 21:13 PGEN_.00g194630-vv0.74.a_ECHD3.for -rw-rw-r-- 1 sam sam 264K Jul 30 21:13 PGEN_.00g194630-vv0.74.a_ECHD3.rev -rw-rw-r-- 1 sam sam 18K Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primers_default_format.txt -rw-rw-r-- 1 sam sam 70 Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_emboss_primers.txt -rw-rw-r-- 1 sam sam 423 Jul 30 21:13 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primersearch.txt -rw-rw-r-- 1 sam sam 12K Jul 30 21:13 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primer3_params.txt -rw-rw-r-- 1 sam sam 259K Jul 30 21:13 PGEN_.00g288180-vv0.74.a_TIF3s4a.for -rw-rw-r-- 1 sam sam 266K Jul 30 21:13 PGEN_.00g288180-vv0.74.a_TIF3s4a.rev -rw-rw-r-- 1 sam sam 15K Jul 30 21:13 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primers_default_format.txt -rw-rw-r-- 1 sam sam 72 Jul 30 21:13 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_emboss_primers.txt -rw-rw-r-- 1 sam sam 810 Jul 30 21:14 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primersearch.txt -rw-rw-r-- 1 sam sam 5.2K Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primer3_params.txt -rw-rw-r-- 1 sam sam 95K Jul 30 21:14 PGEN_.00g132040-vv0.74.a_TIF3s8-2.for -rw-rw-r-- 1 sam sam 107K Jul 30 21:14 PGEN_.00g132040-vv0.74.a_TIF3s8-2.rev -rw-rw-r-- 1 sam sam 8.7K Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primers_default_format.txt -rw-rw-r-- 1 sam sam 73 Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_emboss_primers.txt -rw-rw-r-- 1 sam sam 1.5M Jul 30 21:14 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primersearch.txt -rw-rw-r-- 1 sam sam 17K Jul 30 21:14 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primer3_params.txt -rw-rw-r-- 1 sam sam 510K Jul 30 21:14 PGEN_.00g132030-vv0.74.a_TIF3s8-1.for -rw-rw-r-- 1 sam sam 511K Jul 30 21:14 PGEN_.00g132030-vv0.74.a_TIF3s8-1.rev -rw-rw-r-- 1 sam sam 20K Jul 30 21:14 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primers_default_format.txt -rw-rw-r-- 1 sam sam 73 Jul 30 21:14 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_emboss_primers.txt -rw-rw-r-- 1 sam sam 2.0K Jul 30 21:15 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primersearch.txt -rw-rw-r-- 1 sam sam 21K Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_primer3_params.txt -rw-rw-r-- 1 sam sam 647K Jul 30 21:15 PGEN_.00g338640-vv0.74.a_NSF.for -rw-rw-r-- 1 sam sam 640K Jul 30 21:15 PGEN_.00g338640-vv0.74.a_NSF.rev -rw-rw-r-- 1 sam sam 25K Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_primers_default_format.txt -rw-rw-r-- 1 sam sam 68 Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_emboss_primers.txt -rw-rw-r-- 1 sam sam 423 Jul 30 21:15 20200730_PGEN_.00g338640-vv0.74.a_NSF_primersearch.txt -rw-rw-r-- 1 sam sam 27K Jul 30 21:15 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primer3_params.txt -rw-rw-r-- 1 sam sam 831K Jul 30 21:15 PGEN_.00g245080-vv0.74.a_TIF3s10.for -rw-rw-r-- 1 sam sam 829K Jul 30 21:15 PGEN_.00g245080-vv0.74.a_TIF3s10.rev -rw-rw-r-- 1 sam sam 30K Jul 30 21:15 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primers_default_format.txt -rw-rw-r-- 1 sam sam 72 Jul 30 21:15 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_emboss_primers.txt -rw-rw-r-- 1 sam sam 1.6K Jul 30 21:16 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primersearch.txt -rw-rw-r-- 1 sam sam 9.9K Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primer3_params.txt -rw-rw-r-- 1 sam sam 169K Jul 30 21:16 PGEN_.00g025890-vv0.74.a_TIF3s12.for -rw-rw-r-- 1 sam sam 169K Jul 30 21:16 PGEN_.00g025890-vv0.74.a_TIF3s12.rev -rw-rw-r-- 1 sam sam 14K Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primers_default_format.txt -rw-rw-r-- 1 sam sam 72 Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_emboss_primers.txt -rw-rw-r-- 1 sam sam 424 Jul 30 21:16 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primersearch.txt -rw-rw-r-- 1 sam sam 27K Jul 30 21:16 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primer3_params.txt -rw-rw-r-- 1 sam sam 693K Jul 30 21:16 PGEN_.00g114060-vv0.74.a_GSK3B.for -rw-rw-r-- 1 sam sam 707K Jul 30 21:16 PGEN_.00g114060-vv0.74.a_GSK3B.rev -rw-rw-r-- 1 sam sam 31K Jul 30 21:16 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primers_default_format.txt -rw-rw-r-- 1 sam sam 70 Jul 30 21:16 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_emboss_primers.txt -rw-rw-r-- 1 sam sam 1.6M Jul 30 21:17 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primersearch.txt -rw-rw-r-- 1 sam sam 14K Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_primer3_params.txt -rw-rw-r-- 1 sam sam 362K Jul 30 21:17 PGEN_.00g287540-vv0.74.a_RPL5.for -rw-rw-r-- 1 sam sam 355K Jul 30 21:17 PGEN_.00g287540-vv0.74.a_RPL5.rev -rw-rw-r-- 1 sam sam 17K Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_primers_default_format.txt -rw-rw-r-- 1 sam sam 69 Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_emboss_primers.txt -rw-rw-r-- 1 sam sam 480K Jul 30 21:17 20200730_PGEN_.00g287540-vv0.74.a_RPL5_primersearch.txt -rw-rw-r-- 1 sam sam 17K Jul 30 21:17 20200730_PGEN_.00g188130-vv0.74.a_FEN1_primer3_params.txt -rw-rw-r-- 1 sam sam 377K Jul 30 21:17 PGEN_.00g188130-vv0.74.a_FEN1.for -rw-rw-r-- 1 sam sam 395K Jul 30 21:17 PGEN_.00g188130-vv0.74.a_FEN1.rev -rw-rw-r-- 1 sam sam 20K Jul 30 21:17 20200730_PGEN_.00g188130-vv0.74.a_FEN1_primers_default_format.txt -rw-rw-r-- 1 sam sam 69 Jul 30 21:17 20200730_PGEN_.00g188130-vv0.74.a_FEN1_emboss_primers.txt -rw-rw-r-- 1 sam sam 423 Jul 30 21:18 20200730_PGEN_.00g188130-vv0.74.a_FEN1_primersearch.txt -rw-rw-r-- 1 sam sam 47K Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_primer3_params.txt -rw-rw-r-- 1 sam sam 1.2M Jul 30 21:18 PGEN_.00g224740-vv0.74.a_GLYG.for -rw-rw-r-- 1 sam sam 1.1M Jul 30 21:18 PGEN_.00g224740-vv0.74.a_GLYG.rev -rw-rw-r-- 1 sam sam 50K Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_primers_default_format.txt -rw-rw-r-- 1 sam sam 69 Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_emboss_primers.txt -rw-rw-r-- 1 sam sam 8.7K Jul 30 21:18 20200730_PGEN_.00g224740-vv0.74.a_GLYG_primersearch.txt -rw-rw-r-- 1 sam sam 8.6K Jul 30 21:18 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primer3_params.txt -rw-rw-r-- 1 sam sam 276K Jul 30 21:18 PGEN_.00g088260-vv0.74.a_NFIP1.for -rw-rw-r-- 1 sam sam 290K Jul 30 21:18 PGEN_.00g088260-vv0.74.a_NFIP1.rev -rw-rw-r-- 1 sam sam 13K Jul 30 21:18 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primers_default_format.txt -rw-rw-r-- 1 sam sam 70 Jul 30 21:18 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_emboss_primers.txt -rw-rw-r-- 1 sam sam 6.8K Jul 30 21:19 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primersearch.txt -rw-rw-r-- 1 sam sam 27K Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primer3_params.txt -rw-rw-r-- 1 sam sam 493K Jul 30 21:19 PGEN_.00g082590-vv0.74.a_TIF3s5.for -rw-rw-r-- 1 sam sam 471K Jul 30 21:19 PGEN_.00g082590-vv0.74.a_TIF3s5.rev -rw-rw-r-- 1 sam sam 30K Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primers_default_format.txt -rw-rw-r-- 1 sam sam 71 Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_emboss_primers.txt -rw-rw-r-- 1 sam sam 139K Jul 30 21:19 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primersearch.txt -rw-rw-r-- 1 sam sam 23K Jul 30 21:19 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primer3_params.txt -rw-rw-r-- 1 sam sam 634K Jul 30 21:20 PGEN_.00g079690-vv0.74.a_TIF3s7.for -rw-rw-r-- 1 sam sam 594K Jul 30 21:20 PGEN_.00g079690-vv0.74.a_TIF3s7.rev -rw-rw-r-- 1 sam sam 26K Jul 30 21:20 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primers_default_format.txt -rw-rw-r-- 1 sam sam 71 Jul 30 21:20 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_emboss_primers.txt -rw-rw-r-- 1 sam sam 2.7K Jul 30 21:20 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primersearch.txt -rw-rw-r-- 1 sam sam 13K Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primer3_params.txt -rw-rw-r-- 1 sam sam 279K Jul 30 21:20 PGEN_.00g000750-vv0.74.a_TIF3s6b.for -rw-rw-r-- 1 sam sam 282K Jul 30 21:20 PGEN_.00g000750-vv0.74.a_TIF3s6b.rev -rw-rw-r-- 1 sam sam 16K Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primers_default_format.txt -rw-rw-r-- 1 sam sam 72 Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_emboss_primers.txt -rw-rw-r-- 1 sam sam 2.9M Jul 30 21:20 20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primersearch.txt -rw-rw-r-- 1 sam sam 68K Jul 30 21:20 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primer3_params.txt -rw-rw-r-- 1 sam sam 2.2M Jul 30 21:21 PGEN_.00g280110-vv0.74.a_SPTN1.for -rw-rw-r-- 1 sam sam 2.3M Jul 30 21:21 PGEN_.00g280110-vv0.74.a_SPTN1.rev -rw-rw-r-- 1 sam sam 71K Jul 30 21:21 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primers_default_format.txt -rw-rw-r-- 1 sam sam 70 Jul 30 21:21 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_emboss_primers.txt -rw-rw-r-- 1 sam sam 93K Jul 30 21:21 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primersearch.txt -rw-rw-r-- 1 sam sam 32K Jul 30 21:21 20200730_PGEN_.00g070040-vv0.74.a_APLP_primer3_params.txt -rw-rw-r-- 1 sam sam 944K Jul 30 21:22 PGEN_.00g070040-vv0.74.a_APLP.for -rw-rw-r-- 1 sam sam 966K Jul 30 21:22 PGEN_.00g070040-vv0.74.a_APLP.rev -rw-rw-r-- 1 sam sam 36K Jul 30 21:22 20200730_PGEN_.00g070040-vv0.74.a_APLP_primers_default_format.txt -rw-rw-r-- 1 sam sam 69 Jul 30 21:22 20200730_PGEN_.00g070040-vv0.74.a_APLP_emboss_primers.txt -rw-rw-r-- 1 sam sam 424 Jul 30 21:22 20200730_PGEN_.00g070040-vv0.74.a_APLP_primersearch.txt
Reminder: Each set should match at least once - to its own gene sequence from which it was derived.
Reminder: These primers were designed against gene sequences, not coding sequences.
%%bash
cd "${out_dir}"
grep -c "Amplimer*" *primersearch.txt
20200730_PGEN_.00g000750-vv0.74.a_TIF3s6b_primersearch.txt:15512 20200730_PGEN_.00g025890-vv0.74.a_TIF3s12_primersearch.txt:2 20200730_PGEN_.00g070040-vv0.74.a_APLP_primersearch.txt:2 20200730_PGEN_.00g079690-vv0.74.a_TIF3s7_primersearch.txt:14 20200730_PGEN_.00g082590-vv0.74.a_TIF3s5_primersearch.txt:742 20200730_PGEN_.00g088260-vv0.74.a_NFIP1_primersearch.txt:36 20200730_PGEN_.00g114060-vv0.74.a_GSK3B_primersearch.txt:8596 20200730_PGEN_.00g132030-vv0.74.a_TIF3s8-1_primersearch.txt:10 20200730_PGEN_.00g132040-vv0.74.a_TIF3s8-2_primersearch.txt:7800 20200730_PGEN_.00g188130-vv0.74.a_FEN1_primersearch.txt:2 20200730_PGEN_.00g194630-vv0.74.a_ECHD3_primersearch.txt:2 20200730_PGEN_.00g224740-vv0.74.a_GLYG_primersearch.txt:46 20200730_PGEN_.00g245080-vv0.74.a_TIF3s10_primersearch.txt:8 20200730_PGEN_.00g280110-vv0.74.a_SPTN1_primersearch.txt:496 20200730_PGEN_.00g287540-vv0.74.a_RPL5_primersearch.txt:2570 20200730_PGEN_.00g288180-vv0.74.a_TIF3s4a_primersearch.txt:4 20200730_PGEN_.00g338640-vv0.74.a_NSF_primersearch.txt:2
%%bash
faidx -h
usage: faidx [-h] [-b BED] [-o OUT] [-i {bed,chromsizes,nucleotide,transposed}] [-c] [-r] [-y] [-a SIZE_RANGE] [-n | -f] [-t] [-x] [-l] [-s DEFAULT_SEQ] [-d DELIMITER] [-e HEADER_FUNCTION] [-u {stop,first,last,longest,shortest}] [-g REGEX] [-v] [-m | -M] [--no-output] [--no-rebuild] [--version] fasta [regions [regions ...]] Fetch sequences from FASTA. If no regions are specified, all entries in the input file are returned. Input FASTA file must be consistently line-wrapped, and line wrapping of output is based on input line lengths. positional arguments: fasta FASTA file regions space separated regions of sequence to fetch e.g. chr1:1-1000 optional arguments: -h, --help show this help message and exit --no-rebuild do not rebuild the .fai index even if it is out of date. default: False --version print pyfaidx version number input options: -b BED, --bed BED bed file of regions output options: -o OUT, --out OUT output file name (default: stdout) -i {bed,chromsizes,nucleotide,transposed}, --transform {bed,chromsizes,nucleotide,transposed} transform the requested regions into another format. default: None -c, --complement complement the sequence. default: False -r, --reverse reverse the sequence. default: False -y, --auto-strand reverse complement the sequence when start > end coordinate. default: False -a SIZE_RANGE, --size-range SIZE_RANGE selected sequences are in the size range [low, high]. example: 1,1000 default: None -x, --split-files write each region to a separate file (names are derived from regions) -l, --lazy fill in --default-seq for missing ranges. default: False -s DEFAULT_SEQ, --default-seq DEFAULT_SEQ default base for missing positions and masking. default: None -m, --mask-with-default-seq mask the FASTA file using --default-seq default: False -M, --mask-by-case mask the FASTA file by changing to lowercase. default: False --no-output do not output any sequence. default: False header options: -n, --no-names omit sequence names from output. default: False -f, --long-names output full (long) names from the input fasta headers. default: headers are truncated after the first whitespace -t, --no-coords omit coordinates (e.g. chr:start-end) from output headers. default: False -d DELIMITER, --delimiter DELIMITER delimiter for splitting names to multiple values (duplicate names will be discarded). default: None -e HEADER_FUNCTION, --header-function HEADER_FUNCTION python function to modify header lines e.g: "lambda x: x.split("|")[0]". default: lambda x: x.split()[0] -u {stop,first,last,longest,shortest}, --duplicates-action {stop,first,last,longest,shortest} entry to take when duplicate sequence names are encountered. default: stop matching arguments: -g REGEX, --regex REGEX selected sequences are those matching regular expression. default: .* -v, --invert-match selected sequences are those not matching 'regions' argument. default: False Please cite: Shirley MD, Ma Z, Pedersen BS, Wheelan SJ. (2015) Efficient "pythonic" access to FASTA files using pyfaidx. PeerJ PrePrints 3:e1196 https://dx.doi.org/10.7287/peerj.preprints.970v1
%%bash
${primer3} -help
Copyright (c) 1996-2017 Whitehead Institute for Biomedical Research, Steve Rozen (http://purl.com/STEVEROZEN/), Andreas Untergasser and Helen Skaletsky All rights reserved. This file is part of the primer3 suite and libraries. The primer3 suite and libraries are free software; you can redistribute them and/or modify them under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this software (file gpl-2.0.txt in the source distribution); if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USAGE: /home/sam/programs/primer3-2.4.0/src/primer3_core [--format_output] [--default_version=1|--default_version=2] [--io_version=4] [--p3_settings_file=<file_path>] [--echo_settings_file] [--strict_tags] [--output=<file_path>] [--error=<file_path>] [input_file] This is primer3 (libprimer3 release 2.4.0) Input can also be provided on standard input. For example: $ primer3_core < my_input_file
--------------------------------------------------------------------------- CalledProcessError Traceback (most recent call last) <ipython-input-26-ee39db9bb32c> in <module> ----> 1 get_ipython().run_cell_magic('bash', '', '${primer3} -help\n') ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell) 2360 with self.builtin_trap: 2361 args = (magic_arg_s, cell) -> 2362 result = fn(*args, **kwargs) 2363 return result 2364 ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magics/script.py in named_script_magic(line, cell) 140 else: 141 line = script --> 142 return self.shebang(line, cell) 143 144 # write a basic docstring: </home/sam/programs/minicocnda3/lib/python3.6/site-packages/decorator.py:decorator-gen-110> in shebang(self, line, cell) ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k) 185 # but it's overkill for just that one bit of state. 186 def magic_deco(arg): --> 187 call = lambda f, *a, **k: f(*a, **k) 188 189 if callable(arg): ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magics/script.py in shebang(self, line, cell) 243 sys.stderr.flush() 244 if args.raise_error and p.returncode!=0: --> 245 raise CalledProcessError(p.returncode, cell, output=out, stderr=err) 246 247 def _run_script(self, p, cell, to_close): CalledProcessError: Command 'b'${primer3} -help\n'' returned non-zero exit status 255.
%%bash
${primersearch} -help -verbose
Search DNA sequences for matches with primer pairs Version: EMBOSS:6.6.0.0 Standard (Mandatory) qualifiers: [-seqall] seqall Nucleotide sequence(s) filename and optional format, or reference (input USA) [-infile] infile Primer pairs file [-mismatchpercent] integer [0] Allowed percent mismatch (Any integer value) [-outfile] outfile [*.primersearch] Whitehead primer3_core program output file Additional (Optional) qualifiers: (none) Advanced (Unprompted) qualifiers: (none) Associated qualifiers: "-seqall" associated qualifiers -sbegin1 integer Start of each sequence to be used -send1 integer End of each sequence to be used -sreverse1 boolean Reverse (if DNA) -sask1 boolean Ask for begin/end/reverse -snucleotide1 boolean Sequence is nucleotide -sprotein1 boolean Sequence is protein -slower1 boolean Make lower case -supper1 boolean Make upper case -scircular1 boolean Sequence is circular -squick1 boolean Read id and sequence only -sformat1 string Input sequence format -iquery1 string Input query fields or ID list -ioffset1 integer Input start position offset -sdbname1 string Database name -sid1 string Entryname -ufo1 string UFO features -fformat1 string Features format -fopenfile1 string Features file name "-outfile" associated qualifiers -odirectory4 string Output directory General qualifiers: -auto boolean Turn off prompts -stdout boolean Write first file to standard output -filter boolean Read first file from standard input, write first file to standard output -options boolean Prompt for standard and additional values -debug boolean Write debug output to program.dbg -verbose boolean Report some/full command line options -help boolean Report command line options and exit. More information on associated and general qualifiers can be found with -help -verbose -warning boolean Report warnings -error boolean Report errors -fatal boolean Report fatal errors -die boolean Report dying program messages -version boolean Report version number and exit