%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Wed Oct 30 08:34:26 PDT 2019 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.6 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: swoose ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 44 Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz Stepping: 2 CPU MHz: 2925.971 BogoMIPS: 5851.97 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 12288K NUMA node0 CPU(s): 0-23 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb ssbd ibrs ibpb stibp kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat flush_l1d ------------ Memory Specs total used free shared buff/cache available Mem: 70G 29G 7.0G 688M 34G 39G Swap: 4.7G 15M 4.6G
No LSB modules are available.
%env
variables are good for passing to bash cells
# Set workding directory
%env wd=/home/sam/analyses/20191030_pgen_v074.a4_intron_intergenic_features
wd="/home/sam/analyses/20191030_pgen_v074.a4_intron_intergenic_features"
%env rsync_owl=owl:/volume1/web/halfshell/genomic-databank/
%env genome_fasta=Pgenerosa_v074.fa
%env chrome_sizes=Pgenerosa_v074.sizes.txt
%env gffs=Panopea-generosa-vv0.74.a4.[eg]*.gff
%env exon_gff=Panopea-generosa-vv0.74.a4.exon.gff3
%env exon_gff_sorted=Panopea-generosa-vv0.74.a4.exon.sorted.gff3
%env gene_gff=Panopea-generosa-vv0.74.a4.gene.gff3
%env gene_gff_sorted=Panopea-generosa-vv0.74.a4.gene.sorted.gff3
%env wget_gff=--directory-prefix=${wd} --recursive --quiet --no-directories --no-check-certificate --no-parent --accept 'Panopea-generosa-vv0.74.a4.[eg]*.gff' https://owl.fish.washington.edu/halfshell/genomic-databank/
%env wget_fasta=--no-check-certificate https://owl.fish.washington.edu/halfshell/genomic-databank/"${genome_fasta}"
%env exon_comp_bed=Panopea-generosa-vv0.74.a4.exon.sorted.comp.bed
%env intron_bed=Panopea-generosa-vv0.74.a4.introns.bed
%env intergenic_bed=Panopea-generosa-vv0.74.a4.intergenic.bed
# Programs
%env bedtools_dir=/home/sam/programs/bedtools-2.28.0/bin
%env samtools=/home/sam/programs/samtools-1.9/samtools
env: wd=/home/sam/analyses/20191030_pgen_v074.a4_intron_intergenic_features env: rsync_owl=owl:/volume1/web/halfshell/genomic-databank/ env: genome_fasta=Pgenerosa_v074.fa env: chrome_sizes=Pgenerosa_v074.sizes.txt env: gffs=Panopea-generosa-vv0.74.a4.[eg]*.gff env: exon_gff=Panopea-generosa-vv0.74.a4.exon.gff3 env: exon_gff_sorted=Panopea-generosa-vv0.74.a4.exon.sorted.gff3 env: gene_gff=Panopea-generosa-vv0.74.a4.gene.gff3 env: gene_gff_sorted=Panopea-generosa-vv0.74.a4.gene.sorted.gff3 env: wget_gff=--directory-prefix=$/home/sam/analyses/20191030_pgen_v074.a4_intron_intergenic_features --recursive --quiet --no-directories --no-check-certificate --no-parent --accept 'Panopea-generosa-vv0.74.a4.[eg]*.gff' https://owl.fish.washington.edu/halfshell/genomic-databank/ env: wget_fasta=--no-check-certificate https://owl.fish.washington.edu/halfshell/genomic-databank/"${genome_fasta}" env: exon_comp_bed=Panopea-generosa-vv0.74.a4.exon.sorted.comp.bed env: intron_bed=Panopea-generosa-vv0.74.a4.introns.bed env: intergenic_bed=Panopea-generosa-vv0.74.a4.intergenic.bed env: bedtools_dir=/home/sam/programs/bedtools-2.28.0/bin env: samtools=/home/sam/programs/samtools-1.9/samtools
%%bash
mkdir --parents ${wd}
cd {wd}
/home/sam/analyses/20191030_pgen_v074.a4_intron_intergenic_features
Info on GFFs is here: [https://github.com/RobertsLab/resources/wiki/Genomic-Resources#genome-feature-tracks-3)
%%bash
input_files_array=("${genome_fasta}" "${exon_gff}" "${gene_gff}")
for file in "${input_files_array[@]}"
do
rsync \
--archive \
--verbose \
"${rsync_owl}${file}" \
.
done
echo ""
echo ""
echo "----------------------------------------------------------"
echo ""
ls -lh
receiving incremental file list Pgenerosa_v074.fa sent 30 bytes received 958,176,954 bytes 27,773,245.91 bytes/sec total size is 958,059,901 speedup is 1.00 receiving incremental file list Panopea-generosa-vv0.74.a4.exon.gff3 sent 30 bytes received 64,671,619 bytes 14,371,477.56 bytes/sec total size is 64,663,603 speedup is 1.00 receiving incremental file list Panopea-generosa-vv0.74.a4.gene.gff3 sent 30 bytes received 10,999,145 bytes 4,399,670.00 bytes/sec total size is 10,997,681 speedup is 1.00 ---------------------------------------------------------- total 986M -rwxr--r-- 1 sam users 62M Oct 14 10:13 Panopea-generosa-vv0.74.a4.exon.gff3 -rwxr--r-- 1 sam users 11M Oct 14 10:13 Panopea-generosa-vv0.74.a4.gene.gff3 -rw-rw-rw- 1 sam users 914M Jun 26 08:49 Pgenerosa_v074.fa
# %%bash
# time \
# wget "${wget_gffs}"
# wget "${wget_fasta}"
# ls -lh ${wd}
%%bash
echo ${exon_gff}
head ${exon_gff}
echo ""
echo "----------------------"
echo ""
echo ${gene_gff}
head ${gene_gff}
Panopea-generosa-vv0.74.a4.exon.gff3 ##gff-version 3 ##Generated using GenSAS, Monday 7th of October 2019 04:54:37 AM ##Project Name : Pgenerosa_v074 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish exon 2 125 . + . ID=PGEN_.00g000010.m01.exon01;Name=PGEN_.00g000010.m01.exon01;Parent=PGEN_.00g000010.m01;original_ID=21510-PGEN_.00g234140.m01.exon1;Alias=21510-PGEN_.00g234140.m01.exon1 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish exon 1995 2095 . + . ID=PGEN_.00g000010.m01.exon02;Name=PGEN_.00g000010.m01.exon02;Parent=PGEN_.00g000010.m01;original_ID=21510-PGEN_.00g234140.m01.exon2;Alias=21510-PGEN_.00g234140.m01.exon2 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish exon 3325 3495 . + . ID=PGEN_.00g000010.m01.exon03;Name=PGEN_.00g000010.m01.exon03;Parent=PGEN_.00g000010.m01;original_ID=21510-PGEN_.00g234140.m01.exon3;Alias=21510-PGEN_.00g234140.m01.exon3 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish exon 4651 4719 . + . ID=PGEN_.00g000010.m01.exon04;Name=PGEN_.00g000010.m01.exon04;Parent=PGEN_.00g000010.m01;original_ID=21510-PGEN_.00g234140.m01.exon4;Alias=21510-PGEN_.00g234140.m01.exon4 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish exon 19808 19943 . - . ID=PGEN_.00g000020.m01.exon01;Name=PGEN_.00g000020.m01.exon01;Parent=PGEN_.00g000020.m01;original_ID=21510-PGEN_.00g234150.m01.exon10;Alias=21510-PGEN_.00g234150.m01.exon10 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish exon 21133 21362 . - . ID=PGEN_.00g000020.m01.exon02;Name=PGEN_.00g000020.m01.exon02;Parent=PGEN_.00g000020.m01;original_ID=21510-PGEN_.00g234150.m01.exon9;Alias=21510-PGEN_.00g234150.m01.exon9 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish exon 22487 22613 . - . ID=PGEN_.00g000020.m01.exon03;Name=PGEN_.00g000020.m01.exon03;Parent=PGEN_.00g000020.m01;original_ID=21510-PGEN_.00g234150.m01.exon8;Alias=21510-PGEN_.00g234150.m01.exon8 ---------------------- Panopea-generosa-vv0.74.a4.gene.gff3 ##gff-version 3 ##Generated using GenSAS, Monday 7th of October 2019 04:54:37 AM ##Project Name : Pgenerosa_v074 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish gene 2 4719 . + . ID=PGEN_.00g000010;Name=PGEN_.00g000010;original_ID=21510-PGEN_.00g234140;Alias=21510-PGEN_.00g234140;original_name=21510-PGEN_.00g234140;Notes=sp|Q86IC9|CAMT1_DICDI [BLAST protein vs protein (blastp) 2.7.1],PF01596.12 [Pfam 1.6] PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish gene 19808 36739 . - . ID=PGEN_.00g000020;Name=PGEN_.00g000020;original_ID=21510-PGEN_.00g234150;Alias=21510-PGEN_.00g234150;original_name=21510-PGEN_.00g234150;Notes=sp|P04177|TY3H_RAT [BLAST protein vs protein (blastp) 2.7.1],sp|P04177|TY3H_RAT [DIAMOND Functional 0.9.22],IPR036951 [InterProScan 5.29-68.0],PF00351.16 [Pfam 1.6] PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish gene 49248 52578 . - . ID=PGEN_.00g000030;Name=PGEN_.00g000030;original_ID=21510-PGEN_.00g234160;Alias=21510-PGEN_.00g234160;original_name=21510-PGEN_.00g234160;Notes=PF08054.6 [Pfam 1.6] PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish gene 55792 67546 . + . ID=PGEN_.00g000040;Name=PGEN_.00g000040;original_ID=21510-PGEN_.00g234170;Alias=21510-PGEN_.00g234170;original_name=21510-PGEN_.00g234170 PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish gene 67586 69113 . - . ID=PGEN_.00g000050;Name=PGEN_.00g000050;original_ID=21510-PGEN_.00g234180;Alias=21510-PGEN_.00g234180;original_name=21510-PGEN_.00g234180;Notes=sp|Q8L840|RQL4A_ARATH [BLAST protein vs protein (blastp) 2.7.1],sp|Q8L840|RQL4A_ARATH [DIAMOND Functional 0.9.22],PF00270.24 [Pfam 1.6] PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish gene 70713 81099 . + . ID=PGEN_.00g000060;Name=PGEN_.00g000060;original_ID=21510-PGEN_.00g234190;Alias=21510-PGEN_.00g234190;original_name=21510-PGEN_.00g234190;Notes=sp|Q61043|NIN_MOUSE [DIAMOND Functional 0.9.22],PF04443.7 [Pfam 1.6] PGA_scaffold1__77_contigs__length_89643857 GenSAS_5d9637f372b5d-publish gene 183686 186073 . + . ID=PGEN_.00g000070;Name=PGEN_.00g000070;original_ID=21510-PGEN_.00g234200;Alias=21510-PGEN_.00g234200;original_name=21510-PGEN_.00g234200;Notes=PF15364.1 [Pfam 1.6]
%%bash
${samtools} faidx \
${genome_fasta}
cat ${genome_fasta}.fai
PGA_scaffold1__77_contigs__length_89643857 89643857 44 60 61 PGA_scaffold2__36_contigs__length_69596280 69596280 91138010 60 61 PGA_scaffold3__111_contigs__length_57743597 57743597 161894273 60 61 PGA_scaffold4__129_contigs__length_65288255 65288255 220600309 60 61 PGA_scaffold5__109_contigs__length_67248332 67248332 286976747 60 61 PGA_scaffold6__104_contigs__length_61759565 61759565 355345930 60 61 PGA_scaffold7__69_contigs__length_43120122 43120122 418134866 60 61 PGA_scaffold8__63_contigs__length_61151155 61151155 461973701 60 61 PGA_scaffold9__45_contigs__length_38581958 38581958 524144086 60 61 PGA_scaffold10__49_contigs__length_53961475 53961475 563369122 60 61 PGA_scaffold11__79_contigs__length_51449921 51449921 618230000 60 61 PGA_scaffold12__71_contigs__length_50438331 50438331 670537465 60 61 PGA_scaffold13__52_contigs__length_44396874 44396874 721816480 60 61 PGA_scaffold14__91_contigs__length_45393038 45393038 766953347 60 61 PGA_scaffold15__101_contigs__length_47938513 47938513 813102982 60 61 PGA_scaffold16__33_contigs__length_31980953 31980953 861840516 60 61 PGA_scaffold17__51_contigs__length_34923512 34923512 894354530 60 61 PGA_scaffold18__69_contigs__length_27737463 27737463 929860146 60 61
%%bash
# Change LC_COLLATE to use C for proper sorting of file
export LC_COLLATE=C
cut -f1,2 ${genome_fasta}.fai | sort > ${chrome_sizes}
cat ${chrome_sizes}
PGA_scaffold10__49_contigs__length_53961475 53961475 PGA_scaffold11__79_contigs__length_51449921 51449921 PGA_scaffold12__71_contigs__length_50438331 50438331 PGA_scaffold13__52_contigs__length_44396874 44396874 PGA_scaffold14__91_contigs__length_45393038 45393038 PGA_scaffold15__101_contigs__length_47938513 47938513 PGA_scaffold16__33_contigs__length_31980953 31980953 PGA_scaffold17__51_contigs__length_34923512 34923512 PGA_scaffold18__69_contigs__length_27737463 27737463 PGA_scaffold1__77_contigs__length_89643857 89643857 PGA_scaffold2__36_contigs__length_69596280 69596280 PGA_scaffold3__111_contigs__length_57743597 57743597 PGA_scaffold4__129_contigs__length_65288255 65288255 PGA_scaffold5__109_contigs__length_67248332 67248332 PGA_scaffold6__104_contigs__length_61759565 61759565 PGA_scaffold7__69_contigs__length_43120122 43120122 PGA_scaffold8__63_contigs__length_61151155 61151155 PGA_scaffold9__45_contigs__length_38581958 38581958
%%bash
gff_array=("${exon_gff}" "${gene_gff}")
sorted_gff_array=("${exon_gff_sorted}" "${gene_gff_sorted}")
for index in "${!gff_array[@]}"
do
{ awk 'NR<4 {print}' "${gff_array[index]}"
awk 'NR>3 {print}' "${gff_array[index]}" | "${bedtools_dir}"/bedtools sort -i -
} >> "${sorted_gff_array[index]}"
# Check out sorted GFFs
echo "Previewing ${sorted_gff_array[index]}:"
echo ""
head "${sorted_gff_array[index]}"
echo ""
echo "Confirming sort order of ${sorted_gff_array[index]}:"
echo ""
cut -f1 "${sorted_gff_array[index]}" | uniq
echo ""
done
Previewing Panopea-generosa-vv0.74.a4.exon.sorted.gff3: ##gff-version 3 ##Generated using GenSAS, Monday 7th of October 2019 04:54:37 AM ##Project Name : Pgenerosa_v074 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish exon 2 1145 . + . ID=PGEN_.00g204300.m01.exon01;Name=PGEN_.00g204300.m01.exon01;Parent=PGEN_.00g204300.m01;original_ID=21510-PGEN_.00g311420.m01.exon1;Alias=21510-PGEN_.00g311420.m01.exon1 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish exon 19540 19683 . - . ID=PGEN_.00g204310.m01.exon01;Name=PGEN_.00g204310.m01.exon01;Parent=PGEN_.00g204310.m01;original_ID=21510-PGEN_.00g311430.m01.exon9;Alias=21510-PGEN_.00g311430.m01.exon9 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish exon 20883 21134 . - . ID=PGEN_.00g204310.m01.exon02;Name=PGEN_.00g204310.m01.exon02;Parent=PGEN_.00g204310.m01;original_ID=21510-PGEN_.00g311430.m01.exon8;Alias=21510-PGEN_.00g311430.m01.exon8 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish exon 21900 21978 . - . ID=PGEN_.00g204310.m01.exon03;Name=PGEN_.00g204310.m01.exon03;Parent=PGEN_.00g204310.m01;original_ID=21510-PGEN_.00g311430.m01.exon7;Alias=21510-PGEN_.00g311430.m01.exon7 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish exon 23163 23251 . - . ID=PGEN_.00g204310.m01.exon04;Name=PGEN_.00g204310.m01.exon04;Parent=PGEN_.00g204310.m01;original_ID=21510-PGEN_.00g311430.m01.exon6;Alias=21510-PGEN_.00g311430.m01.exon6 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish exon 24166 24255 . - . ID=PGEN_.00g204310.m01.exon05;Name=PGEN_.00g204310.m01.exon05;Parent=PGEN_.00g204310.m01;original_ID=21510-PGEN_.00g311430.m01.exon5;Alias=21510-PGEN_.00g311430.m01.exon5 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish exon 26335 26460 . - . ID=PGEN_.00g204310.m01.exon06;Name=PGEN_.00g204310.m01.exon06;Parent=PGEN_.00g204310.m01;original_ID=21510-PGEN_.00g311430.m01.exon4;Alias=21510-PGEN_.00g311430.m01.exon4 Confirming sort order of Panopea-generosa-vv0.74.a4.exon.sorted.gff3: ##gff-version 3 ##Generated using GenSAS, Monday 7th of October 2019 04:54:37 AM ##Project Name : Pgenerosa_v074 PGA_scaffold10__49_contigs__length_53961475 PGA_scaffold11__79_contigs__length_51449921 PGA_scaffold12__71_contigs__length_50438331 PGA_scaffold13__52_contigs__length_44396874 PGA_scaffold14__91_contigs__length_45393038 PGA_scaffold15__101_contigs__length_47938513 PGA_scaffold16__33_contigs__length_31980953 PGA_scaffold17__51_contigs__length_34923512 PGA_scaffold18__69_contigs__length_27737463 PGA_scaffold1__77_contigs__length_89643857 PGA_scaffold2__36_contigs__length_69596280 PGA_scaffold3__111_contigs__length_57743597 PGA_scaffold4__129_contigs__length_65288255 PGA_scaffold5__109_contigs__length_67248332 PGA_scaffold6__104_contigs__length_61759565 PGA_scaffold7__69_contigs__length_43120122 PGA_scaffold8__63_contigs__length_61151155 PGA_scaffold9__45_contigs__length_38581958 Previewing Panopea-generosa-vv0.74.a4.gene.sorted.gff3: ##gff-version 3 ##Generated using GenSAS, Monday 7th of October 2019 04:54:37 AM ##Project Name : Pgenerosa_v074 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish gene 2 1145 . + . ID=PGEN_.00g204300;Name=PGEN_.00g204300;original_ID=21510-PGEN_.00g311420;Alias=21510-PGEN_.00g311420;original_name=21510-PGEN_.00g311420 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish gene 19540 36845 . - . ID=PGEN_.00g204310;Name=PGEN_.00g204310;original_ID=21510-PGEN_.00g311430;Alias=21510-PGEN_.00g311430;original_name=21510-PGEN_.00g311430;Notes=sp|F1MNN4|FBXW7_BOVIN [BLAST protein vs protein (blastp) 2.7.1],sp|A1DHW6|SCONB_NEOFI [DIAMOND Functional 0.9.22],IPR001680 [InterProScan 5.29-68.0],PF04041.8 [Pfam 1.6] PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish gene 36984 37535 . - . ID=PGEN_.00g204320;Name=PGEN_.00g204320;original_ID=21510-PGEN_.00g311440;Alias=21510-PGEN_.00g311440;original_name=21510-PGEN_.00g311440;Notes=IPR036322 [InterProScan 5.29-68.0],PF11163.3 [Pfam 1.6] PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish gene 41157 41960 . - . ID=PGEN_.00g204330;Name=PGEN_.00g204330;original_ID=21510-PGEN_.00g311450;Alias=21510-PGEN_.00g311450;original_name=21510-PGEN_.00g311450 PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish gene 61914 82099 . + . ID=PGEN_.00g204340;Name=PGEN_.00g204340;original_ID=21510-PGEN_.00g311460;Alias=21510-PGEN_.00g311460;original_name=21510-PGEN_.00g311460;Notes=sp|Q3KRG3|TSR2_DANRE [BLAST protein vs protein (blastp) 2.7.1],sp|Q3KRG3|TSR2_DANRE [DIAMOND Functional 0.9.22],IPR019398 [InterProScan 5.29-68.0],PF10273.4 [Pfam 1.6] PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish gene 90961 109412 . - . ID=PGEN_.00g204350;Name=PGEN_.00g204350;original_ID=21510-PGEN_.00g311470;Alias=21510-PGEN_.00g311470;original_name=21510-PGEN_.00g311470;Notes=PF07495.8 [Pfam 1.6] PGA_scaffold10__49_contigs__length_53961475 GenSAS_5d9637f372b5d-publish gene 135015 215664 . - . ID=PGEN_.00g204360;Name=PGEN_.00g204360;original_ID=21510-PGEN_.00g311480;Alias=21510-PGEN_.00g311480;original_name=21510-PGEN_.00g311480;Notes=sp|Q7TMY8|HUWE1_MOUSE [BLAST protein vs protein (blastp) 2.7.1],sp|Q7Z6Z7|HUWE1_HUMAN [DIAMOND Functional 0.9.22],PF06012.7 [Pfam 1.6] Confirming sort order of Panopea-generosa-vv0.74.a4.gene.sorted.gff3: ##gff-version 3 ##Generated using GenSAS, Monday 7th of October 2019 04:54:37 AM ##Project Name : Pgenerosa_v074 PGA_scaffold10__49_contigs__length_53961475 PGA_scaffold11__79_contigs__length_51449921 PGA_scaffold12__71_contigs__length_50438331 PGA_scaffold13__52_contigs__length_44396874 PGA_scaffold14__91_contigs__length_45393038 PGA_scaffold15__101_contigs__length_47938513 PGA_scaffold16__33_contigs__length_31980953 PGA_scaffold17__51_contigs__length_34923512 PGA_scaffold18__69_contigs__length_27737463 PGA_scaffold1__77_contigs__length_89643857 PGA_scaffold2__36_contigs__length_69596280 PGA_scaffold3__111_contigs__length_57743597 PGA_scaffold4__129_contigs__length_65288255 PGA_scaffold5__109_contigs__length_67248332 PGA_scaffold6__104_contigs__length_61759565 PGA_scaffold7__69_contigs__length_43120122 PGA_scaffold8__63_contigs__length_61151155 PGA_scaffold9__45_contigs__length_38581958
%%bash
"${bedtools_dir}"/complementBed \
-i "${gene_gff_sorted}" \
-g "${chrome_sizes}" \
> "${intergenic_bed}"
echo "Previewing ${intergenic_bed}:"
echo ""
head "${intergenic_bed}"
Previewing Panopea-generosa-vv0.74.a4.intergenic.bed: PGA_scaffold10__49_contigs__length_53961475 0 1 PGA_scaffold10__49_contigs__length_53961475 1145 19539 PGA_scaffold10__49_contigs__length_53961475 36845 36983 PGA_scaffold10__49_contigs__length_53961475 37535 41156 PGA_scaffold10__49_contigs__length_53961475 41960 61913 PGA_scaffold10__49_contigs__length_53961475 82099 90960 PGA_scaffold10__49_contigs__length_53961475 109412 135014 PGA_scaffold10__49_contigs__length_53961475 215664 218086 PGA_scaffold10__49_contigs__length_53961475 219013 225102 PGA_scaffold10__49_contigs__length_53961475 230053 232785
%%bash
"${bedtools_dir}"/complementBed \
-i "${exon_gff_sorted}" \
-g "${chrome_sizes}" \
> "${exon_comp_bed}"
echo "Previewing ${exon_comp_bed}:"
echo ""
head "${exon_comp_bed}"
echo ""
echo "---------------------"
"${bedtools_dir}"/intersectBed \
-a "${gene_gff_sorted}" \
-b "${exon_comp_bed}" \
| awk -v OFS='\t' '{print $1,$4,$5}' \
> "${intron_bed}"
echo ""
echo "Previewing ${intron_bed}:"
echo ""
head "${intron_bed}"
Previewing Panopea-generosa-vv0.74.a4.exon.sorted.comp.bed: PGA_scaffold10__49_contigs__length_53961475 0 1 PGA_scaffold10__49_contigs__length_53961475 1145 19539 PGA_scaffold10__49_contigs__length_53961475 19683 20882 PGA_scaffold10__49_contigs__length_53961475 21134 21899 PGA_scaffold10__49_contigs__length_53961475 21978 23162 PGA_scaffold10__49_contigs__length_53961475 23251 24165 PGA_scaffold10__49_contigs__length_53961475 24255 26334 PGA_scaffold10__49_contigs__length_53961475 26460 28467 PGA_scaffold10__49_contigs__length_53961475 28623 35328 PGA_scaffold10__49_contigs__length_53961475 35935 36093 --------------------- Previewing Panopea-generosa-vv0.74.a4.introns.bed: PGA_scaffold10__49_contigs__length_53961475 19684 20882 PGA_scaffold10__49_contigs__length_53961475 21135 21899 PGA_scaffold10__49_contigs__length_53961475 21979 23162 PGA_scaffold10__49_contigs__length_53961475 23252 24165 PGA_scaffold10__49_contigs__length_53961475 24256 26334 PGA_scaffold10__49_contigs__length_53961475 26461 28467 PGA_scaffold10__49_contigs__length_53961475 35936 36093 PGA_scaffold10__49_contigs__length_53961475 28624 35328 PGA_scaffold10__49_contigs__length_53961475 61983 65309 PGA_scaffold10__49_contigs__length_53961475 65860 81002
%%bash
rm "${genome_fasta}"* "${exon_comp_bed}" *.gff3 *.txt
ls -ltrh
total 12M -rw-rw-r-- 1 sam sam 2.1M Oct 30 08:41 Panopea-generosa-vv0.74.a4.intergenic.bed -rw-rw-r-- 1 sam sam 9.1M Oct 30 08:41 Panopea-generosa-vv0.74.a4.introns.bed
rm: cannot remove 'Pgenerosa_v074.fa*': No such file or directory rm: cannot remove 'Panopea-generosa-vv0.74.a4.exon.sorted.comp.bed': No such file or directory