%%bash
echo "TODAY'S DATE:"
date
echo "------------"
echo ""
#Display operating system info
lsb_release -a
echo ""
echo "------------"
echo "HOSTNAME: "; hostname
echo ""
echo "------------"
echo "Computer Specs:"
echo ""
lscpu
echo ""
echo "------------"
echo ""
echo "Memory Specs"
echo ""
free -mh
TODAY'S DATE: Mon Feb 25 14:55:58 PST 2019 ------------ Distributor ID: Ubuntu Description: Ubuntu 16.04.5 LTS Release: 16.04 Codename: xenial ------------ HOSTNAME: swoose ------------ Computer Specs: Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 24 On-line CPU(s) list: 0-23 Thread(s) per core: 2 Core(s) per socket: 6 Socket(s): 2 NUMA node(s): 1 Vendor ID: GenuineIntel CPU family: 6 Model: 44 Model name: Intel(R) Xeon(R) CPU X5670 @ 2.93GHz Stepping: 2 CPU MHz: 2925.907 BogoMIPS: 5851.96 Virtualization: VT-x L1d cache: 32K L1i cache: 32K L2 cache: 256K L3 cache: 12288K NUMA node0 CPU(s): 0-23 Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 popcnt aes lahf_lm epb kaiser tpr_shadow vnmi flexpriority ept vpid dtherm ida arat ------------ Memory Specs total used free shared buff/cache available Mem: 70G 6.9G 1.6G 1.1G 62G 62G Swap: 4.7G 825M 3.8G
No LSB modules are available.
%env seqkit=/home/sam/programs/seqkit-v0.9.3
%env data_dir=/home/sam/data
%env analyses_dir=/home/sam/analyses/20190225_cpg_oe
env: seqkit=/home/sam/programs/seqkit-v0.9.3 env: data_dir=/home/sam/data env: analyses_dir=/home/sam/analyses/20190225_cpg_oe
%%bash
cd ${data_dir}
time \
wget \
--recursive \
--no-directories \
--no-parent \
--quiet \
--accept "*GENE.fa" \
"http://gannet.fish.washington.edu/seashell/bu-serine-wd/19-01-08/"
ls -lhtr *.fa
-rw-rw-r-- 1 sam sam 409M Jan 9 13:06 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:07 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:07 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:07 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:07 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:08 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:08 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:08 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:09 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:09 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:09 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:10 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:10 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:10 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:11 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:11 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:11 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:11 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_7_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:12 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:12 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:12 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:13 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:13 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:13 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:14 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:14 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:14 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:15 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:15 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:15 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_7_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:15 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:16 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:16 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:16 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:17 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:17 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:17 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG0F2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:18 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG2F1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:18 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG2M5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:18 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:18 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:19 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:19 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:19 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:20 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:20 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_1_pool_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:20 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:21 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:21 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_7_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:21 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_8_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:22 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:22 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:22 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:22 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:23 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:23 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:23 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:24 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:24 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:24 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:25 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:25 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:25 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH0H4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:26 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2F6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:26 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2F8_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:26 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2M1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:27 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:27 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:27 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:27 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:28 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:28 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:28 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:29 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:29 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:29 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:30 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:30 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_6_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:30 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_10_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:31 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_11_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:31 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_12_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:31 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_7_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:32 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_8_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:32 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_9_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:32 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_1_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:32 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_2_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:33 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_3_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:33 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_4_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:33 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_5_GENE.fa -rw-rw-r-- 1 sam sam 409M Jan 9 13:34 Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_6_GENE.fa
real 6m32.190s user 0m22.332s sys 2m0.584s
Prepared at request of this GitHub Issue:
https://github.com/RobertsLab/resources/issues/593
Code is modified from the following link in order to loop through a large number of files:
http://htmlpreview.github.io/?https://github.com/hputnam/EastOyEpi/blob/master/02-Cpg-test.html
%%bash
fa_array=(${data_dir}/*GENE.fa)
for fa in ${fa_array[@]}
do
fn=$(basename ${fa} .fa)
mkdir ${fn}_analysis
cd ${fn}_analysis
fx2tab \
--length \
${fa} \
> ${fa}_tab
awk '{ print $2 }' ${fa}_tab > ${fa}_tab2
awk -F\[Cc][Gg] '{print NF-1}' ${fa}_tab_2 > CG
awk -F\[Cc] '{print NF-1}' ${fa}_tab_2 > C
awk -F\[Gg] '{print NF-1}' ${fa}_tab_2 > G
paste ${fa} \
CG \
C \
G \
> comb
awk '{print $1, "\t", (($4)/($5*$6))*(($3^2)/($3-1))}' comb \
> ID_CpG
done
Process is interrupted.
%%bash
mkdir ${analyses_dir}
fa_array=(${data_dir}/*GENE.fa)
for fa in "${fa_array[@]}"
do
cd ${analyses_dir}
fn=$(basename ${fa} .fa)
mkdir ${fn}_analysis
cd ${fn}_analysis
${seqkit} fx2tab \
--length \
${fa} \
> ${analyses_dir}/${fa}_tab
awk '{ print $2 }' ${fa}_tab > ${fa}_tab2
awk -F\[Cc][Gg] '{print NF-1}' ${fa}_tab_2 > CG
awk -F\[Cc] '{print NF-1}' ${fa}_tab_2 > C
awk -F\[Gg] '{print NF-1}' ${fa}_tab_2 > G
paste ${fa} \
CG \
C \
G \
> comb
awk '{print $1, "\t", (($4)/($5*$6))*(($3^2)/($3-1))}' comb \
> ID_CpG
done
Process is interrupted.
%%bash
mkdir ${analyses_dir}
fa_array=(${data_dir}/*GENE.fa)
for fa in "${fa_array[@]}"
do
cd ${analyses_dir}
fn=$(basename ${fa} .fa)
mkdir ${fn}_analysis
cd ${fn}_analysis
${seqkit} fx2tab \
--length \
${fa} \
> ${fn}_analysis/${fn}_tab
awk '{ print $2 }' ${fn}_tab > ${fn}_tab2
awk -F\[Cc][Gg] '{print NF-1}' ${fn}_tab_2 > CG
awk -F\[Cc] '{print NF-1}' ${fn}_tab_2 > C
awk -F\[Gg] '{print NF-1}' ${fn}_tab_2 > G
paste ${fa} \
CG \
C \
G \
> comb
awk '{print $1, "\t", (($4)/($5*$6))*(($3^2)/($3-1))}' comb \
> ID_CpG
done
Process is terminated.
%%bash
${seqkit}
SeqKit -- a cross-platform and ultrafast toolkit for FASTA/Q file manipulation Version: 0.9.3 Author: Wei Shen <shenwei356@gmail.com> Documents : http://bioinf.shenwei.me/seqkit Source code: https://github.com/shenwei356/seqkit Please cite: https://doi.org/10.1371/journal.pone.0163962 Usage: seqkit [command] Available Commands: common find common sequences of multiple files by id/name/sequence concat concatenate sequences with same ID from multiple files convert convert FASTQ quality encoding between Sanger, Solexa and Illumina duplicate duplicate sequences N times faidx create FASTA index file and extract subsequence fq2fa convert FASTQ to FASTA fx2tab convert FASTA/Q to tabular format (with length/GC content/GC skew) genautocomplete generate shell autocompletion script grep search sequences by ID/name/sequence/sequence motifs, mismatch allowed head print first N FASTA/Q records help Help about any command locate locate subsequences/motifs, mismatch allowed range print FASTA/Q records in a range (start:end) rename rename duplicated IDs replace replace name/sequence by regular expression restart reset start position for circular genome rmdup remove duplicated sequences by id/name/sequence sample sample sequences by number or proportion seq transform sequences (revserse, complement, extract ID...) shuffle shuffle sequences sliding sliding sequences, circular genome supported sort sort sequences by id/name/sequence/length split split sequences into files by id/seq region/size/parts (mainly for FASTA) split2 split sequences into files by size/parts (FASTA, PE/SE FASTQ) stats simple statistics of FASTA/Q files subseq get subsequences by region/gtf/bed, including flanking sequences tab2fx convert tabular format to FASTA/Q format translate translate DNA/RNA to protein sequence version print version information and check for update Flags: --alphabet-guess-seq-length int length of sequence prefix of the first FASTA record based on which seqkit guesses the sequence type (0 for whole seq) (default 10000) -h, --help help for seqkit --id-ncbi FASTA head is NCBI-style, e.g. >gi|110645304|ref|NC_002516.2| Pseud... --id-regexp string regular expression for parsing ID (default "^([^\\s]+)\\s?") -w, --line-width int line width when outputing FASTA format (0 for no wrap) (default 60) -o, --out-file string out file ("-" for stdout, suffix .gz for gzipped out) (default "-") --quiet be quiet and do not show extra information -t, --seq-type string sequence type (dna|rna|protein|unlimit|auto) (for auto, it automatically detect by the first sequence) (default "auto") -j, --threads int number of CPUs. (default value: 1 for single-CPU PC, 2 for others) (default 2) Use "seqkit [command] --help" for more information about a command.
%%bash
mkdir ${analyses_dir}
fa_array=(${data_dir}/*GENE.fa)
for fa in "${fa_array[@]}"
do
cd ${analyses_dir}
fn=$(basename ${fa} .fa)
mkdir ${fn}_analysis
cd ${fn}_analysis
${seqkit} fx2tab \
--length \
${fa} \
> ${fn}_tab
awk '{ print $2 }' ${fn}_tab > ${fn}_tab2
awk -F\[Cc][Gg] '{print NF-1}' ${fn}_tab2 > CG
awk -F\[Cc] '{print NF-1}' ${fn}_tab2 > C
awk -F\[Gg] '{print NF-1}' ${fn}_tab2 > G
paste ${fn}_tab \
CG \
C \
G \
> comb
awk '{print $1, "\t", (($4)/($5*$6))*(($3^2)/($3-1))}' comb \
> ID_CpG
done
awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CL_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CLP_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.CS_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.DEBY_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HC_VA_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG0F2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG0F2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG2F1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG2F1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG2M5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HG_HG2M5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.HI_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_1_pool_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_1_pool_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_8_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LM_8_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.LOLA_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NEH_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH0H4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH0H4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2F6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2F6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2F8_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2F8_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2M1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.NG_NH2M1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.OBOYS2_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SL_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_10_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_10_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_11_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_11_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_12_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_12_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_7_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_8_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_8_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_9_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.SM_9_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_1_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_2_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_3_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_4_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_5_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_6_GENE_tab2" FNR=314 NR=314 awk: program limit exceeded: maximum number of fields size=32767 FILENAME="Combined.SNP.TRSdp5g95FnDNAmaf05.sorted.ANACfill.UMFS_6_GENE_tab2" FNR=314 NR=314
gawk
.¶Installed gawk
and restarted notebook.
%%bash
mkdir ${analyses_dir}
# Create arrays of all FastA files
fa_array=(${data_dir}/*GENE.fa)
time \
for fa in "${fa_array[@]}"
do
# Change to proper directory
cd ${analyses_dir}
# Remove file path and extension from the FastA and save as variable
fn=$(basename ${fa} .fa)
# Make subdirectory using filename
mkdir ${fn}_analysis
cd ${fn}_analysis
# Use seqkit to convert FastA to tab-delimited and print sequence length
${seqkit} fx2tab \
--length \
${fa} \
> ${fn}_tab
# Print only sequences to new file
gawk '{ print $2 }' ${fn}_tab > ${fn}_tab2
# Delimit sequences on CGs and print the number of fields minus 1 to get the number of CGs present.
gawk -F\[Cc][Gg] '{print NF-1}' ${fn}_tab2 > CG
# Delimit sequences on CGs and print the number of fields minus 1 to get the number of Cs present.
gawk -F\[Cc] '{print NF-1}' ${fn}_tab2 > C
# Delimit sequences on CGs and print the number of fields minus 1 to get the number of Gs present.
gawk -F\[Gg] '{print NF-1}' ${fn}_tab2 > G
# Paste these together to have file with the following fields:
# - FastA header
# - Sequence
# - Sequence length
# - Number of CGs
# - Number of Cs
# - Number of Gs
paste ${fn}_tab \
CG \
C \
G \
> comb
# Do some math to calculate CpG O/E ratio (observed vs expected)
gawk '{print $1, "\t", (($4)/($5*$6))*(($3^2)/($3-1))}' comb \
> ID_CpG
done
real 65m31.145s user 53m47.100s sys 2m53.228s
%%bash
cd /home/sam/analyses/
rsync \
--archive \
--relative \
./20190225_cpg_oe \
gannet:/volume1/web/Atumefaciens