%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Thu Oct 31 08:44:36 PDT 2019 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.6 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: swoose ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 44 Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz Stepping: 2 CPU MHz: 2925.971 BogoMIPS: 5851.97 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 12288K NUMA node0 CPU(s): 0-23 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb ssbd ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat flush_l1d ------------ Memory Specs total used free shared buff/cache available Mem: 70G 29G 435M 452M 41G 40G Swap: 4.7G 251M 4.4G
No LSB modules are available.
%env
variables are good for passing to bash cells
# Set workding directory
%env wd=/home/sam/analyses/20191031_pgen_v074_stringtie_BAM_splitting
wd="/home/sam/analyses/20191031_pgen_v074_stringtie_BAM_splitting"
%env rsync_gannet=gannet:/volume2/web/Atumefaciens/20190723_stringtie_pgen_v074/
%env wget_bam=--quiet --no-check-certificate https://gannet.fish.washington.edu/Atumefaciens/20190723_stringtie_pgen_v074/20190723_sorted.merged.bam
%env original_bam=20190723_sorted.merged.bam
%env reassembled_bam=20190723_sorted.merged.reassembled.bam
env: wd=/home/sam/analyses/20191031_pgen_v074_stringtie_BAM_splitting env: rsync_gannet=gannet:/volume2/web/Atumefaciens/20190723_stringtie_pgen_v074/ env: wget_bam=--quiet --no-check-certificate https://gannet.fish.washington.edu/Atumefaciens/20190723_stringtie_pgen_v074/20190723_sorted.merged.bam env: original_bam=20190723_sorted.merged.bam env: reassembled_bam=20190723_sorted.merged.reassembled.bam
%%bash
mkdir --parents ${wd}
cd {wd}
/home/sam/analyses/20191031_pgen_v074_stringtie_BAM_splitting
Info on the BAM is here: https://robertslab.github.io/sams-notebook/2019/07/23/Genome-Annotation-Pgenerosa_v074-Transcript-Isoform-ID-with-Stringtie-on-Mox.html
# %%bash
# time \
# wget "${wget_gffs}"
# wget "${wget_fasta}"
# ls -lh ${wd}
%%bash
rsync \
--archive \
--verbose \
"${rsync_gannet}${original_bam}" \
.
receiving incremental file list 20190723_sorted.merged.bam sent 30 bytes received 77,940,299,973 bytes 64,869,163.55 bytes/sec total size is 77,930,786,826 speedup is 1.00
%%bash
ls -lh
total 73G -rw-rw-r-- 1 sam users 73G Aug 29 10:54 20190723_sorted.merged.bam
%%bash
split \
--bytes 5GB \
"${original_bam}" \
"${original_bam}_"
ls -lh
total 146G -rw-rw-r-- 1 sam users 73G Aug 29 10:54 20190723_sorted.merged.bam -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:21 20190723_sorted.merged.bam_aa -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:23 20190723_sorted.merged.bam_ab -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:25 20190723_sorted.merged.bam_ac -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:27 20190723_sorted.merged.bam_ad -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:30 20190723_sorted.merged.bam_ae -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:32 20190723_sorted.merged.bam_af -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:34 20190723_sorted.merged.bam_ag -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:37 20190723_sorted.merged.bam_ah -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:39 20190723_sorted.merged.bam_ai -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:41 20190723_sorted.merged.bam_aj -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:43 20190723_sorted.merged.bam_ak -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:46 20190723_sorted.merged.bam_al -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:48 20190723_sorted.merged.bam_am -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:50 20190723_sorted.merged.bam_an -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:53 20190723_sorted.merged.bam_ao -rw-rw-r-- 1 sam sam 2.8G Oct 31 13:54 20190723_sorted.merged.bam_ap
%%bash
cat ${original_bam}_* > ${reassembled_bam}
# Let's see if MD5 checksums are the same..
diff <(md5sum ${original_bam}) <(md5sum ${reassembled_bam})
1c1 < e53c73db4145ba65522f771535d10a52 20190723_sorted.merged.bam --- > e53c73db4145ba65522f771535d10a52 20190723_sorted.merged.reassembled.bam
--------------------------------------------------------------------------- CalledProcessError Traceback (most recent call last) <ipython-input-21-3eac65acf363> in <module> ----> 1 get_ipython().run_cell_magic('bash', '', "cat ${original_bam}_* > ${reassembled_bam}\n\n# Let's see if MD5 checksums are the same..\ndiff <(md5sum ${original_bam}) <(md5sum ${reassembled_bam})\n") ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell) 2350 with self.builtin_trap: 2351 args = (magic_arg_s, cell) -> 2352 result = fn(*args, **kwargs) 2353 return result 2354 ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magics/script.py in named_script_magic(line, cell) 140 else: 141 line = script --> 142 return self.shebang(line, cell) 143 144 # write a basic docstring: </home/sam/programs/minicocnda3/lib/python3.6/site-packages/decorator.py:decorator-gen-110> in shebang(self, line, cell) ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k) 185 # but it's overkill for just that one bit of state. 186 def magic_deco(arg): --> 187 call = lambda f, *a, **k: f(*a, **k) 188 189 if callable(arg): ~/programs/minicocnda3/lib/python3.6/site-packages/IPython/core/magics/script.py in shebang(self, line, cell) 243 sys.stderr.flush() 244 if args.raise_error and p.returncode!=0: --> 245 raise CalledProcessError(p.returncode, cell, output=out, stderr=err) 246 247 def _run_script(self, p, cell, to_close): CalledProcessError: Command 'b"cat ${original_bam}_* > ${reassembled_bam}\n\n# Let's see if MD5 checksums are the same..\ndiff <(md5sum ${original_bam}) <(md5sum ${reassembled_bam})\n"' returned non-zero exit status 1.
%%bash
rm ${original_bam} ${reassembled_bam}
ls -ltrh
total 73G -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:21 20190723_sorted.merged.bam_aa -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:23 20190723_sorted.merged.bam_ab -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:25 20190723_sorted.merged.bam_ac -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:27 20190723_sorted.merged.bam_ad -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:30 20190723_sorted.merged.bam_ae -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:32 20190723_sorted.merged.bam_af -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:34 20190723_sorted.merged.bam_ag -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:37 20190723_sorted.merged.bam_ah -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:39 20190723_sorted.merged.bam_ai -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:41 20190723_sorted.merged.bam_aj -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:43 20190723_sorted.merged.bam_ak -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:46 20190723_sorted.merged.bam_al -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:48 20190723_sorted.merged.bam_am -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:50 20190723_sorted.merged.bam_an -rw-rw-r-- 1 sam sam 4.7G Oct 31 13:53 20190723_sorted.merged.bam_ao -rw-rw-r-- 1 sam sam 2.8G Oct 31 13:54 20190723_sorted.merged.bam_ap