%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Wed Dec 19 11:16:08 PST 2018 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.5 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: emu ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0-15 Thread(s) per core: 2 Core(s) per socket: 4 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 26 Model name: Intel(R) Xeon(R) CPU E5520 @ 2.27GHz Stepping: 5 CPU MHz: 2394.000 CPU max MHz: 2394.0000 CPU min MHz: 1596.0000 BogoMIPS: 4521.80 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 8192K NUMA node0 CPU(s): 0-15 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology nonstop_tsc aperfmperf pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 sse4_2 popcnt lahf_lm tpr_shadow vnmi flexpriority ept vpid dtherm ida ------------ Memory Specs total used free shared buff/cache available Mem: 47G 1.5G 43G 377M 2.2G 44G Swap: 11G 438M 11G
No LSB modules are available.
%%bash
mkdir /home/sam/analyses/20181219_Pgenerosa_repeatmodeler
%env work_dir = /home/sam/analyses/20181219_Pgenerosa_repeatmodeler
%env rptm = /home/shared/RepeatModeler-open-1.0.11
%env Pgenerosa_v070_fasta = /home/sam/data/genomes/geoduck/Pgenerosa_v070.fa
env: work_dir=/home/sam/analyses/20181219_Pgenerosa_repeatmodeler env: rptm=/home/shared/RepeatModeler-open-1.0.11 env: Pgenerosa_v070_fasta=/home/sam/data/genomes/geoduck/Pgenerosa_v070.fa
%%bash
cd /home/sam/data/genomes/geoduck/
time \
rsync \
--archive \
owl:/volume1/web/halfshell/genomic-databank/Pgenerosa_v070.fa \
.
ls -lh
# Uncomment following line(s) to download from web
# wget http://owl.fish.washington.edu/halfshell/genomic-databank/Pgenerosa_v070.fa
total 2.1G -rw-rw-rw- 1 sam sam 2.1G Aug 14 11:45 Pgenerosa_v070.fa
real 1m24.224s user 0m29.692s sys 0m12.028s
%%bash
cd ${work_dir}
time \
perl ${rptm}/BuildDatabase \
-name Pgenerosa_v070 \
-engine ncbi \
${Pgenerosa_v070_fasta} \
>& database_build_run.out
sed '/^Subject:/ s/ / repeatmodeler db JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
echo "------------------------------------------------------------------------"
echo ""
echo "------------------------------------------------------------------------"
cat ${work_dir}/database_build_run.out
------------------------------------------------------------------------ ------------------------------------------------------------------------ Building database Pgenerosa_v070: Adding /home/sam/data/genomes/geoduck/Pgenerosa_v070.fa to database Number of sequences (bp) added to database: 313649 ( 2205688688 bp )
real 1m23.003s user 1m2.300s sys 0m5.360s
%%bash
cd ${work_dir}
time \
perl ${rptm}/RepeatModeler \
-database ${work_dir}/Pgenerosa_v070 \
-engine ncbi \
-pa 16 \
>& run.out
sed '/^Subject:/ s/ / repeatmodeler JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
real 2056m18.020s user 20838m22.216s sys 407m24.308s
%%bash
tail -n 50 ${work_dir}/run.out
- Saving elements to a file... - 16 elements found. Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time Refining family-1617 model... - numRounds = 7 - Consensus Length = 233 ( orig = 233 ) - Avg Kimura Divergence = 0.01 - Unaligned sequences = 0 ( orig = 0 ) Build Consensus: 0:0:1 Elapsed Time Refinement: 00:00:02 (hh:mm:ss) Elapsed Time Processing RECON family: 6568 - Saving elements to a file... - 16 elements found. Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time Refining family-6568 model... - numRounds = 6 - Consensus Length = 404 ( orig = 417 ) - Avg Kimura Divergence = 0.01 - Unaligned sequences = 3 ( orig = 3 ) Build Consensus: 0:0:2 Elapsed Time Refinement: 00:00:03 (hh:mm:ss) Elapsed Time Family Refinement: 00:23:28 (hh:mm:ss) Elapsed Time Round Time: 16:14:42 (hh:mm:ss) Elapsed Time Discovery complete: 2001 families found Classifying Repeats... RepeatClassifier Version open-1.0.11 =============================== Search Engine = ncbi - Looking for Simple and Low Complexity sequences.. - Looking for similarity to known repeat proteins.. - Looking for similarity to known repeat consensi.. Classification Time: 01:28:39 (hh:mm:ss) Elapsed Time Program Time: 34:16:10 (hh:mm:ss) Elapsed Time Working directory: /home/sam/analyses/20181219_Pgenerosa_repeatmodeler/RM_31027.WedDec191236402018 may be deleted unless there were problems with the run. The results have been saved to: /home/sam/analyses/20181219_Pgenerosa_repeatmodeler/Pgenerosa_v070-families.fa - Consensus sequences for each family identified. /home/sam/analyses/20181219_Pgenerosa_repeatmodeler/Pgenerosa_v070-families.stk - Seed alignments for each family identified. This version of RepeatModeler can upload families directly to the open repeat database - Dfam_consensus. Please consider uploading your final curated library using the RepeatModeler "util/dfamConsensusTool.pl" script ( details at http://www.repeatmasker.org/RepeatModeler/dfamConsensusTool ) or posting your raw (uncurated) RepeatModeler results to the TE Raw Dataset Repository ( http://www.repeatmasker.org/Dfam_consensus/#/public/repository ).
%%bash
cd /home/sam/analyses/
rsync --archive --relative ./20181219_Pgenerosa_repeatmodeler gannet:/volume1/web/Atumefaciens
sed '/^Subject:/ s/ / rsync JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"