#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_cell_magic('bash', '', 'echo "TODAY\'S DATE:"\ndate\necho "------------"\necho ""\n#Display operating system info\nlsb_release -a\necho ""\necho "------------"\necho "HOSTNAME: "; hostname \necho ""\necho "------------"\necho "Computer Specs:"\necho ""\nlscpu\necho ""\necho "------------"\necho ""\necho "Memory Specs"\necho ""\nfree -mh\n') # ### Download necessary files # In[2]: get_ipython().run_cell_magic('bash', '', 'mkdir /home/sam/data/Cvirginica\ncd /home/sam/data/Cvirginica\ntime \\\nwget \\\n--quiet \\\n--no-directories \\\n--recursive \\\n--accept gz \\\n--accept-regex "2112_lane1_[ATCG]"\nhttp://owl.fish.washington.edu/nightingales/C_virginica/\nsed \'/^Subject:/ s/ / virginica download JOB COMPLETE/\' ~/.default-subject.mail | msmtp "$EMAIL"\n\nls -ltrh\n') # #### Fix typo - forgot line continuation slash after regex line. # In[3]: get_ipython().run_cell_magic('bash', '', 'cd /home/sam/data/Cvirginica\ntime \\\nwget \\\n--quiet \\\n--no-directories \\\n--recursive \\\n--accept gz \\\n--accept-regex "2112_lane1_[ATCG]" \\\nhttp://owl.fish.washington.edu/nightingales/C_virginica/\nsed \'/^Subject:/ s/ / virginica download JOB COMPLETE/\' ~/.default-subject.mail | msmtp "$EMAIL"\n\nls -ltrh\n') # In[4]: get_ipython().run_cell_magic('bash', '', 'cd /home/sam/data/Cvirginica\nls -lh\n') # ### Concatenate files # # Also renamed one file to maintain same naming structure as concatenated files. # In[5]: get_ipython().run_cell_magic('bash', '', 'cd /home/sam/data/Cvirginica\ncat \\\n2112_lane1_ACAGTG_L001_R1_001.fastq.gz \\\n2112_lane1_ACAGTG_L001_R1_002.fastq.gz \\\n> 2112_lane1_ACAGTG.fastq.gz\n\ncat \\\n2112_lane1_ATCACG_L001_R1_001.fastq.gz \\\n2112_lane1_ATCACG_L001_R1_002.fastq.gz \\\n2112_lane1_ATCACG_L001_R1_003.fastq.gz \\\n> 2112_lane1_ATCACG.fastq.gz\n\ncat \\\n2112_lane1_CAGATC_L001_R1_001.fastq.gz \\\n2112_lane1_CAGATC_L001_R1_002.fastq.gz \\\n2112_lane1_CAGATC_L001_R1_003.fastq.gz \\\n> 2112_lane1_CAGATC.fastq.gz\n\ncat \\\n2112_lane1_GCCAAT_L001_R1_001.fastq.gz \\\n2112_lane1_GCCAAT_L001_R1_002.fastq.gz \\\n> 2112_lane1_GCCAAT.fastq.gz\n\nmv \\\n2112_lane1_TGACCA_L001_R1_001.fastq.gz \\\n2112_lane1_TGACCA.fastq.gz\n\ncat \\\n2112_lane1_TTAGGC_L001_R1_001.fastq.gz \\\n2112_lane1_TTAGGC_L001_R1_002.fastq.gz \\\n> 2112_lane1_TTAGGC.fastq.gz\n\nsed \'/^Subject:/ s/ / concatenation JOB COMPLETE/\' ~/.default-subject.mail | msmtp "$EMAIL"\n') # In[6]: get_ipython().run_cell_magic('bash', '', 'cd /home/sam/data/Cvirginica\nls -lhtr\n') # ### Move files around to improve organization # In[7]: get_ipython().run_cell_magic('bash', '', 'data=/home/sam/data/Cvirginica/\ndata_cat=/home/sam/data/Cvirginica/concatenated\nmkdir $data/concatenated\nmv $data/2112_lane1_TGACCA.fastq.gz $data_cat/\nmv $data/2112_lane1_ACAGTG.fastq.gz $data_cat/\nmv $data/2112_lane1_ATCACG.fastq.gz $data_cat/\nmv $data/2112_lane1_CAGATC.fastq.gz $data_cat/\nmv $data/2112_lane1_GCCAAT.fastq.gz $data_cat/\nmv $data/2112_lane1_TTAGGC.fastq.gz $data_cat/\n\nls -lh $data_cat\n') # ### Run FastQC # # The code belows creates a space-delimited list of the FastQ files (FASTQ_LIST). # # This is then passed to FastQC. # In[8]: get_ipython().run_cell_magic('bash', '', 'data_cat=/home/sam/data/Cvirginica/concatenated\nmkdir $data_cat/20180910_Cvirginica_oil_fastqc\ncd $data_cat\n\nFASTQ_LIST="$(ls -1 *.gz| tr \'\\n\' \' \')"\ntime \\\n/home/shared/fastqc_v0.11.7/fastqc \\\n--extract \\\n--threads 16 \\\n--quiet \\\n--outdir $data_cat/20180910_Cvirginica_oil_fastqc \\\n$FASTQ_LIST\n\nsed \'/^Subject:/ s/ / fastqc JOB COMPLETE/\' ~/.default-subject.mail | msmtp "$EMAIL"\n') # In[9]: get_ipython().run_cell_magic('bash', '', 'cd /home/sam/data/Cvirginica/concatenated/20180910_Cvirginica_oil_fastqc/\nls\n') # ### Run MultiQC. # In[10]: get_ipython().run_cell_magic('bash', '', 'cd /home/sam/data/Cvirginica/concatenated/20180910_Cvirginica_oil_fastqc/\ntime \\\nmultiqc .\nls\n') # ### Files copied to Owl. # # Performed outside of notebook, due to ```sudo``` requirement. # In[11]: get_ipython().run_cell_magic('bash', '', 'ls /mnt/owl/Athaliana/20180910_Cvirginica_oil_fastqc/\n') # In[ ]: