%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
lsb_release -a #Display operating system info
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Tue May 29 12:23:57 PDT 2018 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.4 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: roadrunner ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0-15 Thread(s) per core: 2 Core(s) per socket: 4 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 26 Model name: Intel(R) Xeon(R) CPU E5520 @ 2.27GHz Stepping: 5 CPU MHz: 1596.000 CPU max MHz: 2394.0000 CPU min MHz: 1596.0000 BogoMIPS: 4521.78 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 8192K NUMA node0 CPU(s): 0-15 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts nopl xtopology nonstop_tsc aperfmperf pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm dca sse4_1 sse4_2 popcnt lahf_lm kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida ------------ Memory Specs total used free shared buff/cache available Mem: 47G 472M 15G 274M 31G 45G Swap: 47G 0B 47G
No LSB modules are available.
%%bash
mkdir /home/sam/analyses/20180529_virginica_repeatmasker
%%bash
cd /home/sam/analyses/20180529_virginica_repeatmasker
wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/002/022/765/GCF_002022765.2_C_virginica-3.0/GCF_002022765.2_C_virginica-3.0_genomic.fna.gz \
1> wget_stout.txt \
2> wget_stderr.txt
tar -xzvf GCF_002022765.2_C_virginica-3.0_genomic.fna.gz
ls
GCF_002022765.2_C_virginica-3.0_genomic.fna.gz wget_stderr.txt wget_stout.txt
tar: This does not look like a tar archive tar: Skipping to next header tar: Exiting with failure status due to previous errors
%%bash
gunzip /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fna.gz
ls
20150313_LSU_Oil_Spill_IndexID_Comparisons.ipynb 20150316_LSU_OilSpill_Adapter_ID.ipynb 20150317_LSU_OilSpill_EpinextAdaptor1_ID.ipynb 20150408_Install_Bismark_bisulfite_mapper.ipynb 20150414_C_gigas_Larvae_OA_Trimmomatic_FASTQC.ipynb 20150414_C_virginica_LSU_Oil_Spill_Trimmomatic_FASTQC.ipynb 20150429_Gigas_larvae_OA_BLASTn.ipynb 20150501_Cgigas_larvae_OA_BLASTn_nt.ipynb 20150506_Cgigas_larvae_OA_trimmomatic_FASTQC.ipynb 20150521_Cgigas_larvae_OA_Trimmomatic_FASTQC.ipynb 20160114_wasted_space_synologies.ipynb 20160126_Olurida_BGI_data_handling.ipynb 20160126_Pgenerosa_BGI_data_handling.ipynb 20160203_Olurida_Zymo_Data_Handling.ipynb 20160308_find_rename_2bRAD_undetermined_fastqs.ipynb 20160314_Olurida_GBS_data_management.ipynb 20160406_Oly_GBS_STACKS.ipynb 20160406_STACKS_install.ipynb 20160411_Concatenate_Oly_MBDseq.ipynb 20160418_Oly_GBS_PE-Pyrad_populations.ipynb 20160418_pyrad_oly_PE-GBS.ipynb 20160427_Oly_GBS_data_management.ipynb 20160427_speed_comparison.ipynb 20160428_Oly_GBS_STACKS.ipynb 20160502_Oly_GBS_barcode_repair.ipynb 20160516_Oly_Small_Insert_Library_Genome_Read_Counts.ipynb 20160523_Oly_GBS_Stacks.ipynb 20160525_pyrad_oly_gbs_bgi.ipynb 20160609_pyrad_oly_gbs_bgi.ipynb 20160714_EC2_Oly_GBS_stacks_analysis.ipynb 20160715_ec2_oly_gbs_pyrad.ipynb 20160816_oly_gbs_fst_calcs.ipynb 20161025_Pgenerosa_Small_Library_Genome_Read_Counts.ipynb 20161117_docker_oly_genome_fastq_corruption.ipynb 20161117_docker_oly_vcf_analysis.ipynb 20161129_docker_R_magics_failure.ipynb 20161206_docker_BGI_genome_downloads.ipynb 20161214_docker_BGI_data_integrity_check.ipynb 20161214_docker_notebook_trimming.ipynb 20161229_docker_genewiz_geoduck_RRBS_data.ipynb 20161230_docker_geoduck_RRBS_md5_checks.ipynb 20170104_docker_oly_BGI_genome_corruption_solved.ipynb 20170227_docker_jay_ngs_data_retrieval.ipynb 20170301_docker_fastqc_nondemultiplexed_bgi_oly_gbs.ipynb 20170306_docker_fastqc_demultiplexed_bgi_oly_gbs.ipynb 20170314_docker_Oly_BGI_GBS_demultiplexing_reproducibility.ipynb 20170320_docker_Oly_BGI_GBS_demultiplexing_reproducibility.ipynb 20170622_oly_pacbio_data_management.ipynb 20170907_docker_pacbio_oly_minimap2.ipynb 20170918_docker_pacbio_oly_miniasm0.2.ipynb 20170918_docker_pacbio_oly_racon0.5.0.ipynb 20171003_docker_oly_assembly_comparisons.ipynb 20171004_docker_oly_redundans.ipynb 20171005_docker_oly_redundans.ipynb 20171018_docker_oly_canu.ipynb 20171023_docker_oly_pacbio_canu_comparisons.ipynb 20171023_docker_oly_redundans.ipynb 20171023_restore_scaphapoda_data.ipynb 20171113_emu_pbjelly_22mer_plat.ipynb 20171114_emu_pbjelly_BGI_scaffold.ipynb 20171114_swoose_oly_assembly_comparisons_quast.ipynb 20171130_emu_pbjelly.ipynb 20180103_emu_pbjelly.ipynb 20180116_swoose_oly_assembly_comparisons_quast.ipynb 20180125_roadrunner_trimming_geoduck_novaseq.ipynb 20180205_roadrunner_meraculous_geoduck_novaseq.ipynb 20180301_roadrunner_assembly_meraculous_geoduck_novaseq_subset.ipynb 20180503_emu_oly_methylation_mapping.ipynb 20180507_roadrunner_geoduck_genome_mapbacks.ipynb 20180508_roadrunner_geoduck_bowtie2_genome_mapping.ipynb 20180514_roadrunner_geoduck_RRBS_trimming.ipynb 20180516_roadrunner_geoduck_EPI_fastqc.ipynb 20180516_roadrunner_geoduck_RRBS_trimming.ipynb 20180523_roadrunner_oly_TEs_repeatmasker.ipynb 20180529_roadrunner_virginica_TEs_repeatmasker.ipynb InstallingBLAST.ipynb multiqc_data multiqc_report.html PE-GBS_empirical.ipynb PhageNGS_ID.ipynb PhageNGS.ipynb README.md stdin_fastqc.zip template_linux.ipynb Untitled.ipynb
%%bash
ls /home/sam/analyses/20180529_virginica_repeatmasker/
GCF_002022765.2_C_virginica-3.0_genomic.fna wget_stderr.txt wget_stout.txt
%%bash
mv /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fna \
GCF_002022765.2_C_virginica-3.0_genomic.fasta
ls /home/sam/analyses/20180529_virginica_repeatmasker/
wget_stderr.txt wget_stout.txt
%%bash
mv GCF_002022765.2_C_virginica-3.0_genomic.fasta \
/home/sam/analyses/20180529_virginica_repeatmasker
ls /home/sam/analyses/20180529_virginica_repeatmasker/
GCF_002022765.2_C_virginica-3.0_genomic.fasta wget_stderr.txt wget_stout.txt
-species "crassostrea virginica"
: Sets species to Crassostrea virginica
-par 15
: Use 15 CPU threads
-gff
: Create GFF output file (in addition to default files)
-excln
: Adjusts output table calculations to exclude sequence runs of >=25Ns. Useful for draft genome assemblies.
1>
: Send stdout to file instead of printing to notebook.
2>
: Send stderr to file instead of printing to notebook.
%%bash
cd /home/sam/analyses/20180529_virginica_repeatmasker/
time \
/home/shared/RepeatMasker-4.0.7/RepeatMasker \
/home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta \
-species "crassostrea virginica" \
-par 15 \
-gff \
-excln \
1> /home/sam/analyses/20180529_virginica_repeatmasker/rm_stdout.out \
2> /home/sam/analyses/20180529_virginica_repeatmasker/rm_stderr.err
sed '/^Subject:/ s/ / repeatmasker JOB COMPLETE/' ~/.default-subject.mail | msmtp "$EMAIL"
real 85m42.321s user 907m8.920s sys 130m12.600s
%%bash
echo "------------"
echo "STANDARD OUT"
echo "------------"
tail /home/sam/analyses/20180529_virginica_repeatmasker/rm_stdout.out
echo ""
echo ""
echo "------------"
echo "STANDARD ERROR"
echo "------------"
tail /home/sam/analyses/20180529_virginica_repeatmasker/rm_stderr.err
echo ""
echo ""
echo "------------"
echo "REPEATMASKER C.VIRGINICA TABLE"
echo "------------"
cat /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl
------------ STANDARD OUT ------------ cycle 4 ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. cycle 5 cycle 6 ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. cycle 7 ............................................................................................................................................................................................................................................................................................................................................................................................................................................................................................. cycle 8 ........................................................................................................................................................................................................................................................................................................................................................................................................... cycle 9 ........................................................................................................................................................................................................................................................................................................................................................................................................... cycle 10 ............................................................................................................................................................................................................................................................................................................................................................................................................ Generating output... ......................................................................................................................................................................................................................................................................................................................................................................................................... masking done ------------ STANDARD ERROR ------------ ------------ REPEATMASKER C.VIRGINICA TABLE ------------ ================================================== file name: GCF_002022765.2_C_virginica-3.0_genomic.fasta sequences: 11 total length: 684741128 bp (684675328 bp excl N/X-runs) GC level: 34.83 % bases masked: 46637065 bp ( 6.81 %) ================================================== number of length percentage elements* occupied of sequence -------------------------------------------------- Retroelements 43139 8952068 bp 1.31 % SINEs: 43139 8952068 bp 1.31 % Penelope 0 0 bp 0.00 % LINEs: 0 0 bp 0.00 % CRE/SLACS 0 0 bp 0.00 % L2/CR1/Rex 0 0 bp 0.00 % R1/LOA/Jockey 0 0 bp 0.00 % R2/R4/NeSL 0 0 bp 0.00 % RTE/Bov-B 0 0 bp 0.00 % L1/CIN4 0 0 bp 0.00 % LTR elements: 0 0 bp 0.00 % BEL/Pao 0 0 bp 0.00 % Ty1/Copia 0 0 bp 0.00 % Gypsy/DIRS1 0 0 bp 0.00 % Retroviral 0 0 bp 0.00 % DNA transposons 3538 1564942 bp 0.23 % hobo-Activator 0 0 bp 0.00 % Tc1-IS630-Pogo 0 0 bp 0.00 % En-Spm 0 0 bp 0.00 % MuDR-IS905 0 0 bp 0.00 % PiggyBac 0 0 bp 0.00 % Tourist/Harbinger 0 0 bp 0.00 % Other (Mirage, 0 0 bp 0.00 % P-element, Transib) Rolling-circles 0 0 bp 0.00 % Unclassified: 65151 23982146 bp 3.50 % Total interspersed repeats: 34499156 bp 5.04 % Small RNA: 43353 8992879 bp 1.31 % Satellites: 1 222 bp 0.00 % Simple repeats: 232627 10544162 bp 1.54 % Low complexity: 29762 1561018 bp 0.23 % ================================================== * most repeats fragmented by insertions or deletions have been counted as one element Runs of >=20 X/Ns in query were excluded in % calcs The query species was assumed to be crassostrea virginica RepeatMasker Combined Database: Dfam_Consensus-20170127, RepBase-20170127 run with rmblastn version 2.6.0+
%%bash
/home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl
bash: line 1: /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl: Permission denied
%%bash
cat /home/sam/analyses/20180529_virginica_repeatmasker/GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl
================================================== file name: GCF_002022765.2_C_virginica-3.0_genomic.fasta sequences: 11 total length: 684741128 bp (684675328 bp excl N/X-runs) GC level: 34.83 % bases masked: 46637065 bp ( 6.81 %) ================================================== number of length percentage elements* occupied of sequence -------------------------------------------------- Retroelements 43139 8952068 bp 1.31 % SINEs: 43139 8952068 bp 1.31 % Penelope 0 0 bp 0.00 % LINEs: 0 0 bp 0.00 % CRE/SLACS 0 0 bp 0.00 % L2/CR1/Rex 0 0 bp 0.00 % R1/LOA/Jockey 0 0 bp 0.00 % R2/R4/NeSL 0 0 bp 0.00 % RTE/Bov-B 0 0 bp 0.00 % L1/CIN4 0 0 bp 0.00 % LTR elements: 0 0 bp 0.00 % BEL/Pao 0 0 bp 0.00 % Ty1/Copia 0 0 bp 0.00 % Gypsy/DIRS1 0 0 bp 0.00 % Retroviral 0 0 bp 0.00 % DNA transposons 3538 1564942 bp 0.23 % hobo-Activator 0 0 bp 0.00 % Tc1-IS630-Pogo 0 0 bp 0.00 % En-Spm 0 0 bp 0.00 % MuDR-IS905 0 0 bp 0.00 % PiggyBac 0 0 bp 0.00 % Tourist/Harbinger 0 0 bp 0.00 % Other (Mirage, 0 0 bp 0.00 % P-element, Transib) Rolling-circles 0 0 bp 0.00 % Unclassified: 65151 23982146 bp 3.50 % Total interspersed repeats: 34499156 bp 5.04 % Small RNA: 43353 8992879 bp 1.31 % Satellites: 1 222 bp 0.00 % Simple repeats: 232627 10544162 bp 1.54 % Low complexity: 29762 1561018 bp 0.23 % ================================================== * most repeats fragmented by insertions or deletions have been counted as one element Runs of >=20 X/Ns in query were excluded in % calcs The query species was assumed to be crassostrea virginica RepeatMasker Combined Database: Dfam_Consensus-20170127, RepBase-20170127 run with rmblastn version 2.6.0+
sudo
¶%%bash
ls -ltr /mnt/owl/Athaliana/20180529_virginica_repeatmasker/
total 1530150 -rw-rw-r-- 1 sam users 0 May 29 12:28 wget_stout.txt -rw-rw-r-- 1 sam users 693301635 May 29 12:28 GCF_002022765.2_C_virginica-3.0_genomic.fasta -rw-rw-r-- 1 sam users 316589 May 29 12:28 wget_stderr.txt -rw-rw-r-- 1 sam users 0 May 29 12:33 rm_stderr.err -rw-rw-r-- 1 sam users 2593647 May 29 13:59 rm_stdout.out -rw-rw-r-- 1 sam users 52793192 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.out -rw-rw-r-- 1 sam users 2449 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.tbl -rw-rw-r-- 1 sam users 83461675 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.cat.gz -rw-rw-r-- 1 sam users 35963379 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.out.gff -rw-rw-r-- 1 sam users 698437192 May 29 13:59 GCF_002022765.2_C_virginica-3.0_genomic.fasta.masked -rw-rw-r-- 1 sam users 1984 May 29 14:46 readme.txt