%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Tue Jun 25 12:51:41 PDT 2019 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.6 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: swoose ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 44 Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz Stepping: 2 CPU MHz: 2926.094 BogoMIPS: 5851.96 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 12288K NUMA node0 CPU(s): 0-23 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb ssbd ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat flush_l1d ------------ Memory Specs total used free shared buff/cache available Mem: 70G 11G 10G 553M 49G 58G Swap: 4.7G 292M 4.4G
No LSB modules are available.
# Set working directory - %env is useful for bash
%env work_dir = /home/sam/analyses/20190625_pgen_v070_scaffold_subsetting
work_dir = "/home/sam/analyses/20190625_pgen_v070_scaffold_subsetting"
env: work_dir=/home/sam/analyses/20190625_pgen_v070_scaffold_subsetting
# Set samtools path - %env is useful for bash
%env samtools = /home/sam/programs/samtools-1.9/samtools
env: samtools=/home/sam/programs/samtools-1.9/samtools
# Set output filenames
%env fasta_subset_names=fasta_subset_names.txt
%env out_fasta=Pgenerosa_v070.18.fa
%env p70_fasta=Pgenerosa_v070.fa
%env p70_fai=Pgenerosa_v070.fa.fai
env: fasta_subset_names=fasta_subset_names.txt env: out_fasta=Pgenerosa_v070.18.fa env: p70_fasta=Pgenerosa_v070.fa env: p70_fai=Pgenerosa_v070.fa.fai
%%bash
mkdir "${work_dir}"
cd $work_dir
/home/sam/analyses/20190625_pgen_v070_scaffold_subsetting
Downloaded via rsync
- uncomment wget
lines if needed.
%%bash
rsync -av owl:/volume1/web/halfshell/genomic-databank/Pgenerosa_v070.fa .
rsync -av owl:/volume1/web/halfshell/genomic-databank/Pgenerosa_v070.fa.fai .
#wget http://owl.fish.washington.edu/halfshell/genomic-databank/Pgenerosa_v070.fa
#wget http://owl.fish.washington.edu/halfshell/genomic-databank/Pgenerosa_v070.fa.fai
echo "-----------------------"
echo ""
ls -ltrh
receiving incremental file list Pgenerosa_v070.fa sent 30 bytes received 2,247,392,295 bytes 15,995,674.91 bytes/sec total size is 2,247,117,885 speedup is 1.00 receiving incremental file list Pgenerosa_v070.fa.fai sent 30 bytes received 20,294,786 bytes 5,798,518.86 bytes/sec total size is 20,292,201 speedup is 1.00 ----------------------- total 2.2G -rw-r--r-- 1 sam users 2.1G Feb 11 12:13 Pgenerosa_v070.fa -rw-rw-rw- 1 sam users 20M Feb 11 13:49 Pgenerosa_v070.fa.fai
%%bash
head -n 20 "${p70_fai}"
PGA_scaffold1__77_contigs__length_89643857 89643857 44 80 81 PGA_scaffold2__36_contigs__length_69596280 69596280 90764494 80 81 PGA_scaffold3__111_contigs__length_57743597 57743597 161230773 80 81 PGA_scaffold4__129_contigs__length_65288255 65288255 219696210 80 81 PGA_scaffold5__109_contigs__length_67248332 67248332 285800614 80 81 PGA_scaffold6__104_contigs__length_61759565 61759565 353889596 80 81 PGA_scaffold7__69_contigs__length_43120122 43120122 416421200 80 81 PGA_scaffold8__63_contigs__length_61151155 61151155 460080368 80 81 PGA_scaffold9__45_contigs__length_38581958 38581958 521995957 80 81 PGA_scaffold10__49_contigs__length_53961475 53961475 561060235 80 81 PGA_scaffold11__79_contigs__length_51449921 51449921 615696274 80 81 PGA_scaffold12__71_contigs__length_50438331 50438331 667789365 80 81 PGA_scaffold13__52_contigs__length_44396874 44396874 718858221 80 81 PGA_scaffold14__91_contigs__length_45393038 45393038 763810101 80 81 PGA_scaffold15__101_contigs__length_47938513 47938513 809770598 80 81 PGA_scaffold16__33_contigs__length_31980953 31980953 858308388 80 81 PGA_scaffold17__51_contigs__length_34923512 34923512 890689148 80 81 PGA_scaffold18__69_contigs__length_27737463 27737463 926049249 80 81 PGA_scaffold19__1_contigs__length_6170 6170 954133471 80 81 PGA_scaffold20__1_contigs__length_7749 7749 954139759 80 81
%%bash
awk -F'\t' '{print $1}' "${p70_fai}" \
| head -n 18 \
> "${fasta_subset_names}"
cat "${fasta_subset_names}"
PGA_scaffold1__77_contigs__length_89643857 PGA_scaffold2__36_contigs__length_69596280 PGA_scaffold3__111_contigs__length_57743597 PGA_scaffold4__129_contigs__length_65288255 PGA_scaffold5__109_contigs__length_67248332 PGA_scaffold6__104_contigs__length_61759565 PGA_scaffold7__69_contigs__length_43120122 PGA_scaffold8__63_contigs__length_61151155 PGA_scaffold9__45_contigs__length_38581958 PGA_scaffold10__49_contigs__length_53961475 PGA_scaffold11__79_contigs__length_51449921 PGA_scaffold12__71_contigs__length_50438331 PGA_scaffold13__52_contigs__length_44396874 PGA_scaffold14__91_contigs__length_45393038 PGA_scaffold15__101_contigs__length_47938513 PGA_scaffold16__33_contigs__length_31980953 PGA_scaffold17__51_contigs__length_34923512 PGA_scaffold18__69_contigs__length_27737463
%%bash
xargs "${samtools}" faidx "${p70_fasta}" < "${fasta_subset_names}" > "${out_fasta}"
grep -c ">" "${out_fasta}"
18
%%bash
"${samtools}" faidx "${out_fasta}"
ls -ltrh
total 3.1G -rw-r--r-- 1 sam users 2.1G Feb 11 12:13 Pgenerosa_v070.fa -rw-rw-rw- 1 sam users 20M Feb 11 13:49 Pgenerosa_v070.fa.fai -rw-rw-r-- 1 sam sam 788 Jun 25 12:55 fasta_subset_names.txt -rw-rw-r-- 1 sam sam 914M Jun 25 12:55 Pgenerosa_v070.18.fa -rw-rw-r-- 1 sam sam 1.3K Jun 25 12:56 Pgenerosa_v070.18.fa.fai
%%bash
rm "${p70_fasta}" "${p70_fai}"
ls -ltrh
total 914M -rw-rw-r-- 1 sam sam 788 Jun 25 12:55 fasta_subset_names.txt -rw-rw-r-- 1 sam sam 914M Jun 25 12:55 Pgenerosa_v070.18.fa -rw-rw-r-- 1 sam sam 1.3K Jun 25 12:56 Pgenerosa_v070.18.fa.fai