%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Wed Jun 26 11:28:31 PDT 2019 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.6 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: emu ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0-15 Thread(s) per core: 2 Core(s) per socket: 4 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 26 Model name: Intel(R) Xeon(R) CPU E5520 @ 2.27GHz Stepping: 5 CPU MHz: 2394.000 CPU max MHz: 2394.0000 CPU min MHz: 1596.0000 BogoMIPS: 4521.81 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 8192K NUMA node0 CPU(s): 0-15 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology nonstop_tsc aperfmperf pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 sse4_2 popcnt lahf_lm tpr_shadow vnmi flexpriority ept vpid dtherm ida ------------ Memory Specs total used free shared buff/cache available Mem: 47G 1.4G 43G 130M 2.0G 45G Swap: 11G 0B 11G
No LSB modules are available.
# Set working directories - %env useful for bash
data_dir = "/home/sam/data/genomes"
%env data_dir = /home/sam/data/genomes
work_dir = "/home/sam/analyses/20190626_Pgenerosa_v074_repeatmodeler"
%env work_dir = /home/sam/analyses/20190626_Pgenerosa_v074_repeatmodeler
env: data_dir=/home/sam/data/genomes env: work_dir=/home/sam/analyses/20190626_Pgenerosa_v074_repeatmodeler
# Set file paths/names
%env Pgenerosa_v074_fasta = /home/sam/data/genomes/Pgenerosa_v074.fa
%env rptm_db_name = Pgenerosa_v074
env: Pgenerosa_v074_fasta=/home/sam/data/genomes/Pgenerosa_v074.fa env: rptm_db_name=Pgenerosa_v074
# Set program paths
%env rptm = /home/shared/RepeatModeler-open-1.0.11
env: rptm=/home/shared/RepeatModeler-open-1.0.11
%%bash
mkdir --parents "${work_dir}"
%%bash
cd "${data_dir}"
time \
rsync \
--archive \
owl:/volume1/web/halfshell/genomic-databank/Pgenerosa_v074.fa \
.
ls -lh
# Uncomment following line(s) to download from web
# wget https://owl.fish.washington.edu/halfshell/genomic-databank/Pgenerosa_v074.fa
total 914M -rw-rw-rw- 1 sam sam 914M Jun 26 08:49 Pgenerosa_v074.fa
real 2m52.522s user 0m14.052s sys 0m5.720s
%%bash
cd "${work_dir}"
time \
perl ${rptm}/BuildDatabase \
-name "${rptm_db_name}" \
-engine ncbi \
"${Pgenerosa_v074_fasta}" \
>& database_build_run.out
echo "------------------------------------------------------------------------"
echo ""
echo "------------------------------------------------------------------------"
cat ${work_dir}/database_build_run.out
------------------------------------------------------------------------ ------------------------------------------------------------------------ Building database Pgenerosa_v074: Adding /home/sam/data/genomes/Pgenerosa_v074.fa to database Number of sequences (bp) added to database: 18 ( 942353201 bp )
real 0m25.107s user 0m22.716s sys 0m1.740s
%%bash
cd ${work_dir}
time \
perl ${rptm}/RepeatModeler \
-database "${work_dir}"/"${rptm_db_name}" \
-engine ncbi \
-pa 16 \
>& run.out
sed '/^Subject:/ s/ / repeatmodeler JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
real 1974m13.579s user 22777m27.440s sys 44m16.452s
%%bash
tail -n 50 ${work_dir}/run.out
- Saving elements to a file... - 16 elements found. Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time Refining family-2168 model... - numRounds = 5 - Consensus Length = 524 ( orig = 525 ) - Avg Kimura Divergence = 0.01 - Unaligned sequences = 2 ( orig = 2 ) Build Consensus: 0:0:1 Elapsed Time Refinement: 00:00:01 (hh:mm:ss) Elapsed Time Processing RECON family: 2536 - Saving elements to a file... - 16 elements found. Element Gathering: 00:00:00 (hh:mm:ss) Elapsed Time Refining family-2536 model... - numRounds = 5 - Consensus Length = 426 ( orig = 430 ) - Avg Kimura Divergence = 0.01 - Unaligned sequences = 1 ( orig = 3 ) Build Consensus: 0:0:1 Elapsed Time Refinement: 00:00:01 (hh:mm:ss) Elapsed Time Family Refinement: 00:22:39 (hh:mm:ss) Elapsed Time Round Time: 15:23:11 (hh:mm:ss) Elapsed Time Discovery complete: 2029 families found Classifying Repeats... RepeatClassifier Version open-1.0.11 =============================== Search Engine = ncbi - Looking for Simple and Low Complexity sequences.. - Looking for similarity to known repeat proteins.. - Looking for similarity to known repeat consensi.. Classification Time: 01:31:26 (hh:mm:ss) Elapsed Time Program Time: 32:54:13 (hh:mm:ss) Elapsed Time Working directory: /home/sam/analyses/20190626_Pgenerosa_v074_repeatmodeler/RM_8927.WedJun261133512019 may be deleted unless there were problems with the run. The results have been saved to: /home/sam/analyses/20190626_Pgenerosa_v074_repeatmodeler/Pgenerosa_v074-families.fa - Consensus sequences for each family identified. /home/sam/analyses/20190626_Pgenerosa_v074_repeatmodeler/Pgenerosa_v074-families.stk - Seed alignments for each family identified. This version of RepeatModeler can upload families directly to the open repeat database - Dfam_consensus. Please consider uploading your final curated library using the RepeatModeler "util/dfamConsensusTool.pl" script ( details at http://www.repeatmasker.org/RepeatModeler/dfamConsensusTool ) or posting your raw (uncurated) RepeatModeler results to the TE Raw Dataset Repository ( http://www.repeatmasker.org/Dfam_consensus/#/public/repository ).
%%bash
cd /home/sam/analyses/
rsync --archive --relative ./20190626_Pgenerosa_v074_repeatmodeler gannet:/volume2/web/Atumefaciens
sed '/^Subject:/ s/ / rsync JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"