#!/usr/bin/env python # coding: utf-8 # # Compare E5 coral sRNA-seq trimming options # # Simple adapter trimming vs. adapter trimming and trimming to expect sRNA lengths. # # ### List computer specs # In[1]: get_ipython().run_cell_magic('bash', '', 'echo "TODAY\'S DATE:"\ndate\necho "------------"\necho ""\n#Display operating system info\nlsb_release -a\necho ""\necho "------------"\necho "HOSTNAME: "; hostname \necho ""\necho "------------"\necho "Computer Specs:"\necho ""\nlscpu\necho ""\necho "------------"\necho ""\necho "Memory Specs"\necho ""\nfree -mh\n') # ### Set variables # - `%env` indicates a bash variable # # - without `%env` is Python variable # In[2]: # Set directories, input/output files get_ipython().run_line_magic('env', 'data_dir=/home/shared/8TB_HDD_01/sam/data/A_pulchra/sRNAseq') get_ipython().run_line_magic('env', 'analysis_dir=/home/shared/8TB_HDD_01/sam/analyses/20230524-E5-coral-sRNAseq_trimmings_comparisons') analysis_dir="/home/shared/8TB_HDD_01/sam/20230524-E5-coral-sRNAseq_trimmings_comparisons" get_ipython().run_line_magic('env', 'R1_fastq=/home/shared/8TB_HDD_01/sam/data/A_pulchra/sRNAseq/sRNA-ACR-140-S1-TP2_R1_001.fastq.gz') get_ipython().run_line_magic('env', 'R2_fastq=/home/shared/8TB_HDD_01/sam/data/A_pulchra/sRNAseq/sRNA-ACR-140-S1-TP2_R2_001.fastq.gz') # Set CPU threads get_ipython().run_line_magic('env', 'threads=40') # Max read length get_ipython().run_line_magic('env', 'max_read_length=50') # Set program locations get_ipython().run_line_magic('env', 'fastqc=/home/shared/FastQC/fastqc') get_ipython().run_line_magic('env', 'flexbar=/home/shared/flexbar-3.5.0-linux/flexbar') # Set some formatting stuff get_ipython().run_line_magic('env', 'break_line=--------------------------------------------------------------------------') # ### Create analysis directory # In[3]: get_ipython().run_cell_magic('bash', '', '# Make analysis and data directory, if doesn\'t exist\nmkdir --parents "${analysis_dir}"\n\nmkdir --parents "${data_dir}"\n') # # Adapter only trimming # ### Inspect NEB Adapter FastA # # Adapter sequences are in the NEB sRNA kit protocol used by Azenta for library construction. # In[4]: get_ipython().run_cell_magic('bash', '', 'cat "${data_dir}/NEB-adapters.fasta"\n') # ### Trim adapters # # Options: # # - `-ap`: For paired-end analysis; recommended by NEB # # - `-qf il.8`: Sets quality type as Illumina v1.8 # # - `qt`: Mean quality score of 25 # # - `--target`: Sets output filename # # - `--zip-output GZ`: Sets gzip compression for trimmed files # In[5]: get_ipython().run_cell_magic('bash', '', 'cd ${analysis_dir}\n\n${flexbar} \\\n-r ${R1_fastq} \\\n-p ${R2_fastq} \\\n-a ${data_dir}/NEB-adapters.fasta \\\n-ap ON \\\n-qf i1.8 \\\n-qt 25 \\\n--threads ${threads} \\\n--target sRNA-ACR-140-S1-TP2_R1_001-adapter_trim_only \\\n--zip-output GZ\n\nls -lh\n') # ### Check log file # In[6]: get_ipython().run_cell_magic('bash', '', 'cd ${analysis_dir}\n\ncat sRNA-ACR-140-S1-TP2_R1_001-adapter_trim_only.log\n') # # Adapter and length trimming # ### Trim adapters and set max length (trimmed from 3' end) # Options: # # - `-ap`: For paired-end analysis; recommended by NEB # # - `-qf il.8`: Sets quality type as Illumina v1.8 # # - `qt`: Mean quality score of 25 # # - `--post-trim-length`: Trim reads from 3' end to length specified after adapter and quality trimming. # # - `--target`: Sets output filename # # - `--zip-output GZ`: Sets gzip compression for trimmed files # In[7]: get_ipython().run_cell_magic('bash', '', 'cd ${analysis_dir}\n\n${flexbar} \\\n-r ${R1_fastq} \\\n-p ${R2_fastq} \\\n-a ${data_dir}/NEB-adapters.fasta \\\n-ap ON \\\n-qf i1.8 \\\n-qt 25 \\\n--post-trim-length ${max_read_length} \\\n--threads ${threads} \\\n--target sRNA-ACR-140-S1-TP2_R1_001-adapter-and-length-50 \\\n--zip-output GZ\n\nls -lh\n') # ### Check log file # In[8]: get_ipython().run_cell_magic('bash', '', 'cd ${analysis_dir}\n\ncat sRNA-ACR-140-S1-TP2_R1_001-adapter-and-length-50.log\n') # # FastQC # In[9]: get_ipython().run_cell_magic('bash', '', 'cd ${analysis_dir}\n\ntrimmed_fastq_array=(*.fastq.gz)\n\n# Pass array contents to new variable as space-delimited list\ntrimmed_fastqc_list=$(echo "${trimmed_fastq_array[*]}")\n\n${fastqc} \\\n${trimmed_fastqc_list} \\\n--threads ${threads} \\\n--outdir ./ \\\n--quiet\n') # # MultiQC # In[10]: get_ipython().run_cell_magic('bash', '', 'cd ${analysis_dir}\n\nmultiqc .\n') # ### Document program options # In[11]: get_ipython().run_cell_magic('bash', '', '${flexbar} -hh\n')