%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Mon Sep 10 10:03:21 PDT 2018 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.5 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: roadrunner ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0-15 Thread(s) per core: 2 Core(s) per socket: 4 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 26 Model name: Intel(R) Xeon(R) CPU E5520 @ 2.27GHz Stepping: 5 CPU MHz: 2394.000 CPU max MHz: 2394.0000 CPU min MHz: 1596.0000 BogoMIPS: 4521.80 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 8192K NUMA node0 CPU(s): 0-15 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology nonstop_tsc aperfmperf eagerfpu pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 sse4_2 popcnt lahf_lm kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida ------------ Memory Specs total used free shared buff/cache available Mem: 47G 2.4G 36G 567M 8.7G 43G Swap: 47G 0B 47G
No LSB modules are available.
%%bash
mkdir /home/sam/data/Cvirginica
cd /home/sam/data/Cvirginica
time \
wget \
--quiet \
--no-directories \
--recursive \
--accept gz \
--accept-regex "2112_lane1_[ATCG]"
http://owl.fish.washington.edu/nightingales/C_virginica/
sed '/^Subject:/ s/ / virginica download JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
ls -ltrh
total 0
wget: missing URL Usage: wget [OPTION]... [URL]... Try `wget --help' for more options. real 0m0.003s user 0m0.000s sys 0m0.000s bash: line 10: http://owl.fish.washington.edu/nightingales/C_virginica/: No such file or directory
%%bash
cd /home/sam/data/Cvirginica
time \
wget \
--quiet \
--no-directories \
--recursive \
--accept gz \
--accept-regex "2112_lane1_[ATCG]" \
http://owl.fish.washington.edu/nightingales/C_virginica/
sed '/^Subject:/ s/ / virginica download JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
ls -ltrh
total 14G -rw-rw-r-- 1 sam sam 117M Apr 13 2015 2112_lane1_ACAGTG_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 872M Apr 13 2015 2112_lane1_TTAGGC_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_TTAGGC_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_GCCAAT_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_ATCACG_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_CAGATC_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_ATCACG_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 133M Apr 13 2015 2112_lane1_ATCACG_L001_R1_003.fastq.gz -rw-rw-r-- 1 sam sam 690M Apr 13 2015 2112_lane1_CAGATC_L001_R1_003.fastq.gz -rw-rw-r-- 1 sam sam 704M Apr 13 2015 2112_lane1_TGACCA_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 789M Apr 13 2015 2112_lane1_GCCAAT_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.4G Apr 13 2015 2112_lane1_CAGATC_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.4G Apr 13 2015 2112_lane1_ACAGTG_L001_R1_001.fastq.gz
real 7m52.814s user 0m10.672s sys 1m4.880s
%%bash
cd /home/sam/data/Cvirginica
ls -lh
total 14G -rw-rw-r-- 1 sam sam 1.4G Apr 13 2015 2112_lane1_ACAGTG_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 117M Apr 13 2015 2112_lane1_ACAGTG_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_ATCACG_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_ATCACG_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 133M Apr 13 2015 2112_lane1_ATCACG_L001_R1_003.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_CAGATC_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.4G Apr 13 2015 2112_lane1_CAGATC_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 690M Apr 13 2015 2112_lane1_CAGATC_L001_R1_003.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_GCCAAT_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 789M Apr 13 2015 2112_lane1_GCCAAT_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 704M Apr 13 2015 2112_lane1_TGACCA_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_TTAGGC_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 872M Apr 13 2015 2112_lane1_TTAGGC_L001_R1_002.fastq.gz
Also renamed one file to maintain same naming structure as concatenated files.
%%bash
cd /home/sam/data/Cvirginica
cat \
2112_lane1_ACAGTG_L001_R1_001.fastq.gz \
2112_lane1_ACAGTG_L001_R1_002.fastq.gz \
> 2112_lane1_ACAGTG.fastq.gz
cat \
2112_lane1_ATCACG_L001_R1_001.fastq.gz \
2112_lane1_ATCACG_L001_R1_002.fastq.gz \
2112_lane1_ATCACG_L001_R1_003.fastq.gz \
> 2112_lane1_ATCACG.fastq.gz
cat \
2112_lane1_CAGATC_L001_R1_001.fastq.gz \
2112_lane1_CAGATC_L001_R1_002.fastq.gz \
2112_lane1_CAGATC_L001_R1_003.fastq.gz \
> 2112_lane1_CAGATC.fastq.gz
cat \
2112_lane1_GCCAAT_L001_R1_001.fastq.gz \
2112_lane1_GCCAAT_L001_R1_002.fastq.gz \
> 2112_lane1_GCCAAT.fastq.gz
mv \
2112_lane1_TGACCA_L001_R1_001.fastq.gz \
2112_lane1_TGACCA.fastq.gz
cat \
2112_lane1_TTAGGC_L001_R1_001.fastq.gz \
2112_lane1_TTAGGC_L001_R1_002.fastq.gz \
> 2112_lane1_TTAGGC.fastq.gz
sed '/^Subject:/ s/ / concatenation JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
%%bash
cd /home/sam/data/Cvirginica
ls -lhtr
total 26G -rw-rw-r-- 1 sam sam 117M Apr 13 2015 2112_lane1_ACAGTG_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 872M Apr 13 2015 2112_lane1_TTAGGC_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_TTAGGC_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_GCCAAT_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_ATCACG_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_CAGATC_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Apr 13 2015 2112_lane1_ATCACG_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 133M Apr 13 2015 2112_lane1_ATCACG_L001_R1_003.fastq.gz -rw-rw-r-- 1 sam sam 690M Apr 13 2015 2112_lane1_CAGATC_L001_R1_003.fastq.gz -rw-rw-r-- 1 sam sam 704M Apr 13 2015 2112_lane1_TGACCA.fastq.gz -rw-rw-r-- 1 sam sam 789M Apr 13 2015 2112_lane1_GCCAAT_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.4G Apr 13 2015 2112_lane1_CAGATC_L001_R1_002.fastq.gz -rw-rw-r-- 1 sam sam 1.4G Apr 13 2015 2112_lane1_ACAGTG_L001_R1_001.fastq.gz -rw-rw-r-- 1 sam sam 1.5G Sep 10 10:59 2112_lane1_ACAGTG.fastq.gz -rw-rw-r-- 1 sam sam 3.1G Sep 10 10:59 2112_lane1_ATCACG.fastq.gz -rw-rw-r-- 1 sam sam 3.5G Sep 10 10:59 2112_lane1_CAGATC.fastq.gz -rw-rw-r-- 1 sam sam 2.3G Sep 10 10:59 2112_lane1_GCCAAT.fastq.gz -rw-rw-r-- 1 sam sam 2.3G Sep 10 10:59 2112_lane1_TTAGGC.fastq.gz
%%bash
data=/home/sam/data/Cvirginica/
data_cat=/home/sam/data/Cvirginica/concatenated
mkdir $data/concatenated
mv $data/2112_lane1_TGACCA.fastq.gz $data_cat/
mv $data/2112_lane1_ACAGTG.fastq.gz $data_cat/
mv $data/2112_lane1_ATCACG.fastq.gz $data_cat/
mv $data/2112_lane1_CAGATC.fastq.gz $data_cat/
mv $data/2112_lane1_GCCAAT.fastq.gz $data_cat/
mv $data/2112_lane1_TTAGGC.fastq.gz $data_cat/
ls -lh $data_cat
total 14G -rw-rw-r-- 1 sam sam 1.5G Sep 10 10:59 2112_lane1_ACAGTG.fastq.gz -rw-rw-r-- 1 sam sam 3.1G Sep 10 10:59 2112_lane1_ATCACG.fastq.gz -rw-rw-r-- 1 sam sam 3.5G Sep 10 10:59 2112_lane1_CAGATC.fastq.gz -rw-rw-r-- 1 sam sam 2.3G Sep 10 10:59 2112_lane1_GCCAAT.fastq.gz -rw-rw-r-- 1 sam sam 704M Apr 13 2015 2112_lane1_TGACCA.fastq.gz -rw-rw-r-- 1 sam sam 2.3G Sep 10 10:59 2112_lane1_TTAGGC.fastq.gz
The code belows creates a space-delimited list of the FastQ files (FASTQ_LIST).
This is then passed to FastQC.
%%bash
data_cat=/home/sam/data/Cvirginica/concatenated
mkdir $data_cat/20180910_Cvirginica_oil_fastqc
cd $data_cat
FASTQ_LIST="$(ls -1 *.gz| tr '\n' ' ')"
time \
/home/shared/fastqc_v0.11.7/fastqc \
--extract \
--threads 16 \
--quiet \
--outdir $data_cat/20180910_Cvirginica_oil_fastqc \
$FASTQ_LIST
sed '/^Subject:/ s/ / fastqc JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
real 7m47.963s user 28m26.772s sys 0m42.752s
%%bash
cd /home/sam/data/Cvirginica/concatenated/20180910_Cvirginica_oil_fastqc/
ls
2112_lane1_ACAGTG_fastqc 2112_lane1_ACAGTG_fastqc.html 2112_lane1_ACAGTG_fastqc.zip 2112_lane1_ATCACG_fastqc 2112_lane1_ATCACG_fastqc.html 2112_lane1_ATCACG_fastqc.zip 2112_lane1_CAGATC_fastqc 2112_lane1_CAGATC_fastqc.html 2112_lane1_CAGATC_fastqc.zip 2112_lane1_GCCAAT_fastqc 2112_lane1_GCCAAT_fastqc.html 2112_lane1_GCCAAT_fastqc.zip 2112_lane1_TGACCA_fastqc 2112_lane1_TGACCA_fastqc.html 2112_lane1_TGACCA_fastqc.zip 2112_lane1_TTAGGC_fastqc 2112_lane1_TTAGGC_fastqc.html 2112_lane1_TTAGGC_fastqc.zip
%%bash
cd /home/sam/data/Cvirginica/concatenated/20180910_Cvirginica_oil_fastqc/
time \
multiqc .
ls
Searching 114 files.. 2112_lane1_ACAGTG_fastqc 2112_lane1_ACAGTG_fastqc.html 2112_lane1_ACAGTG_fastqc.zip 2112_lane1_ATCACG_fastqc 2112_lane1_ATCACG_fastqc.html 2112_lane1_ATCACG_fastqc.zip 2112_lane1_CAGATC_fastqc 2112_lane1_CAGATC_fastqc.html 2112_lane1_CAGATC_fastqc.zip 2112_lane1_GCCAAT_fastqc 2112_lane1_GCCAAT_fastqc.html 2112_lane1_GCCAAT_fastqc.zip 2112_lane1_TGACCA_fastqc 2112_lane1_TGACCA_fastqc.html 2112_lane1_TGACCA_fastqc.zip 2112_lane1_TTAGGC_fastqc 2112_lane1_TTAGGC_fastqc.html 2112_lane1_TTAGGC_fastqc.zip multiqc_data multiqc_report.html
[WARNING] multiqc : MultiQC Version v1.6 now available! [INFO ] multiqc : This is MultiQC v1.5.dev0 [INFO ] multiqc : Template : default [INFO ] multiqc : Searching '.' [INFO ] fastqc : Found 6 reports [INFO ] multiqc : Compressing plot data [INFO ] multiqc : Report : multiqc_report.html [INFO ] multiqc : Data : multiqc_data [INFO ] multiqc : MultiQC complete real 0m7.913s user 0m3.312s sys 0m0.220s
Performed outside of notebook, due to sudo
requirement.
%%bash
ls /mnt/owl/Athaliana/20180910_Cvirginica_oil_fastqc/
2112_lane1_ACAGTG_fastqc 2112_lane1_ACAGTG_fastqc.html 2112_lane1_ACAGTG_fastqc.zip 2112_lane1_ACAGTG.fastq.gz 2112_lane1_ATCACG_fastqc 2112_lane1_ATCACG_fastqc.html 2112_lane1_ATCACG_fastqc.zip 2112_lane1_ATCACG.fastq.gz 2112_lane1_CAGATC_fastqc 2112_lane1_CAGATC_fastqc.html 2112_lane1_CAGATC_fastqc.zip 2112_lane1_CAGATC.fastq.gz 2112_lane1_GCCAAT_fastqc 2112_lane1_GCCAAT_fastqc.html 2112_lane1_GCCAAT_fastqc.zip 2112_lane1_GCCAAT.fastq.gz 2112_lane1_TGACCA_fastqc 2112_lane1_TGACCA_fastqc.html 2112_lane1_TGACCA_fastqc.zip 2112_lane1_TGACCA.fastq.gz 2112_lane1_TTAGGC_fastqc 2112_lane1_TTAGGC_fastqc.html 2112_lane1_TTAGGC_fastqc.zip 2112_lane1_TTAGGC.fastq.gz multiqc_data multiqc_report.html