In this Jupyter notebook, I'll use BS-Snper to call SNP variants from Pacific oyster (Crassostrea gigas) WGBS data. Adult oysters were exposed to either low or ambient pH conditions, then DNA was extracted from gonad tissue for WGBS. Reads were aligned to the Roslin C. gigas genome.
I downloaded the ZIP file from https://github.com/hellbelly/BS-Snper, which automatically unzipped.
#cd /Users/Shared/Apps/BS-Snper-master/
/Users/Shared/Apps/BS-Snper-master
#Install BS-SNPer
#!sh BS-Snper.sh
x samtools-0.1.19/ x samtools-0.1.19/.gitignore x samtools-0.1.19/AUTHORS x samtools-0.1.19/COPYING x samtools-0.1.19/ChangeLog.old x samtools-0.1.19/INSTALL x samtools-0.1.19/Makefile x samtools-0.1.19/Makefile.mingw x samtools-0.1.19/NEWS x samtools-0.1.19/bam.c x samtools-0.1.19/bam.h x samtools-0.1.19/bam2bcf.c x samtools-0.1.19/bam2bcf.h x samtools-0.1.19/bam2bcf_indel.c x samtools-0.1.19/bam2depth.c x samtools-0.1.19/bam_aux.c x samtools-0.1.19/bam_cat.c x samtools-0.1.19/bam_color.c x samtools-0.1.19/bam_endian.h x samtools-0.1.19/bam_import.c x samtools-0.1.19/bam_index.c x samtools-0.1.19/bam_lpileup.c x samtools-0.1.19/bam_mate.c x samtools-0.1.19/bam_md.c x samtools-0.1.19/bam_pileup.c x samtools-0.1.19/bam_plcmd.c x samtools-0.1.19/bam_reheader.c x samtools-0.1.19/bam_rmdup.c x samtools-0.1.19/bam_rmdupse.c x samtools-0.1.19/bam_sort.c x samtools-0.1.19/bam_stat.c x samtools-0.1.19/bam_tview.c x samtools-0.1.19/bam_tview.h x samtools-0.1.19/bam_tview_curses.c x samtools-0.1.19/bam_tview_html.c x samtools-0.1.19/bamshuf.c x samtools-0.1.19/bamtk.c x samtools-0.1.19/bcftools/ x samtools-0.1.19/bcftools/Makefile x samtools-0.1.19/bcftools/README x samtools-0.1.19/bcftools/bcf.c x samtools-0.1.19/bcftools/bcf.h x samtools-0.1.19/bcftools/bcf.tex x samtools-0.1.19/bcftools/bcf2qcall.c x samtools-0.1.19/bcftools/bcfutils.c x samtools-0.1.19/bcftools/call1.c x samtools-0.1.19/bcftools/em.c x samtools-0.1.19/bcftools/fet.c x samtools-0.1.19/bcftools/index.c x samtools-0.1.19/bcftools/kfunc.c x samtools-0.1.19/bcftools/kmin.c x samtools-0.1.19/bcftools/kmin.h x samtools-0.1.19/bcftools/main.c x samtools-0.1.19/bcftools/mut.c x samtools-0.1.19/bcftools/prob1.c x samtools-0.1.19/bcftools/prob1.h x samtools-0.1.19/bcftools/vcf.c x samtools-0.1.19/bcftools/vcfutils.pl x samtools-0.1.19/bcftools/bcf.h~ x samtools-0.1.19/bedcov.c x samtools-0.1.19/bedidx.c x samtools-0.1.19/bgzf.c x samtools-0.1.19/bgzf.h x samtools-0.1.19/bgzip.c x samtools-0.1.19/cut_target.c x samtools-0.1.19/errmod.c x samtools-0.1.19/errmod.h x samtools-0.1.19/examples/ x samtools-0.1.19/examples/00README.txt x samtools-0.1.19/examples/Makefile x samtools-0.1.19/examples/bam2bed.c x samtools-0.1.19/examples/calDepth.c x samtools-0.1.19/examples/chk_indel.c x samtools-0.1.19/examples/ex1.fa x samtools-0.1.19/examples/ex1.sam.gz x samtools-0.1.19/examples/toy.fa x samtools-0.1.19/examples/toy.sam x samtools-0.1.19/faidx.c x samtools-0.1.19/faidx.h x samtools-0.1.19/kaln.c x samtools-0.1.19/kaln.h x samtools-0.1.19/khash.h x samtools-0.1.19/klist.h x samtools-0.1.19/knetfile.c x samtools-0.1.19/knetfile.h x samtools-0.1.19/kprobaln.c x samtools-0.1.19/kprobaln.h x samtools-0.1.19/kseq.h x samtools-0.1.19/ksort.h x samtools-0.1.19/kstring.c x samtools-0.1.19/kstring.h x samtools-0.1.19/misc/ x samtools-0.1.19/misc/HmmGlocal.java x samtools-0.1.19/misc/Makefile x samtools-0.1.19/misc/ace2sam.c x samtools-0.1.19/misc/bamcheck.c x samtools-0.1.19/misc/blast2sam.pl x samtools-0.1.19/misc/bowtie2sam.pl x samtools-0.1.19/misc/export2sam.pl x samtools-0.1.19/misc/interpolate_sam.pl x samtools-0.1.19/misc/maq2sam.c x samtools-0.1.19/misc/md5.c x samtools-0.1.19/misc/md5.h x samtools-0.1.19/misc/md5fa.c x samtools-0.1.19/misc/novo2sam.pl x samtools-0.1.19/misc/plot-bamcheck x samtools-0.1.19/misc/psl2sam.pl x samtools-0.1.19/misc/r2plot.lua x samtools-0.1.19/misc/sam2vcf.pl x samtools-0.1.19/misc/samtools.pl x samtools-0.1.19/misc/soap2sam.pl x samtools-0.1.19/misc/varfilter.py x samtools-0.1.19/misc/vcfutils.lua x samtools-0.1.19/misc/wgsim.c x samtools-0.1.19/misc/wgsim_eval.pl x samtools-0.1.19/misc/zoom2sam.pl x samtools-0.1.19/padding.c x samtools-0.1.19/phase.c x samtools-0.1.19/razf.c x samtools-0.1.19/razf.h x samtools-0.1.19/razip.c x samtools-0.1.19/sam.c x samtools-0.1.19/sam.h x samtools-0.1.19/sam_header.c x samtools-0.1.19/sam_header.h x samtools-0.1.19/sam_view.c x samtools-0.1.19/sample.c x samtools-0.1.19/sample.h x samtools-0.1.19/samtools.1 x samtools-0.1.19/win32/ x samtools-0.1.19/win32/libcurses.a x samtools-0.1.19/win32/libz.a x samtools-0.1.19/win32/xcurses.h x samtools-0.1.19/win32/zconf.h x samtools-0.1.19/win32/zlib.h x samtools-0.1.19/bam.h~ make[2]: Nothing to be done for `lib'. make[2]: Nothing to be done for `lib'. make[2]: Nothing to be done for `lib'. gcc -g -Wall -O2 -o samtools bam_tview.o bam_plcmd.o sam_view.o bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o bam_tview_curses.o bam_tview_html.o libbam.a -Lbcftools -lbcf -lcurses -lm -lz -lpthread gcc -g -Wall -O2 -o bcftools call1.o main.o ../kstring.o ../bgzf.o ../knetfile.o ../bedidx.o -L. -lbcf -lm -lz -lpthread make[1]: Nothing to be done for `all'. g++ -O2 -o rrbsSnp main.o sam_funcs.o hash_funcs.o chrome_funcs.o -m64 -I./samtools-0.1.19/ -L./samtools-0.1.19/ -lbam -lz -lpthread
#Check that rrbsSnp was generated and look at other output
#!ls
BS-Snper.pl chrome_funcs.o rrbsSnp BS-Snper.sh hash_funcs.c sam_funcs.c Makefile hash_funcs.h sam_funcs.h README.txt hash_funcs.o sam_funcs.o chrome_funcs.c main.c samtools-0.1.19 chrome_funcs.h main.o samtools-0.1.19.tar.bz2
cd /Users/yaamini/Documents/project-gigas-oa-meth/output/
/Users/yaamini/Documents/project-gigas-oa-meth/output
!mkdir 07_BS-SNPer
mkdir: 07_BS-SNPer: File exists
cd 07_BS-SNPer/
/Users/yaamini/Documents/project-gigas-oa-meth/output/07_BS-SNPer
bedtoolsDirectory = "/Users/Shared/bioinformatics/bedtools2/bin/"
I will identify variants in individual files, as well as SNPs across all samples.
To identify SNPs across all samples, I need to merge my samples, then use that as the input file for BS-Snper
.
!samtools merge -h
samtools: option requires an argument -- h Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>] Options: -n Input files are sorted by read name -t TAG Input files are sorted by TAG value -r Attach RG tag (inferred from file names) -u Uncompressed BAM output -f Overwrite the output BAM if exist -1 Compress level 1 -l INT Compression level, from 0 to 9 [-1] -R STR Merge file in the specified region STR [all] -h FILE Copy the header in FILE to <out.bam> [in1.bam] -c Combine @RG headers with colliding IDs [alter IDs to be distinct] -p Combine @PG headers with colliding IDs [alter IDs to be distinct] -s VALUE Override random seed -b FILE List of input BAM filenames, one per line [null] -X Use customized index files -L FILE Specify a BED file for multiple region filtering [null] --no-PG do not add a PG line --input-fmt-option OPT[=VAL] Specify a single input file format option in the form of OPTION or OPTION=VALUE -O, --output-fmt FORMAT[,OPT[=VAL]]... Specify output format (SAM, BAM, CRAM) --output-fmt-option OPT[=VAL] Specify a single output file format option in the form of OPTION or OPTION=VALUE --reference FILE Reference sequence FASTA FILE [null] -@, --threads INT Number of additional threads to use [0] --write-index Automatically index the output files [off] --verbosity INT Set level of verbosity
%%bash
samtools merge \
/Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged-sorted-deduplicated.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam \
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam
#View output file header
!samtools view /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged-sorted-deduplicated.bam \
| head
A00564:290:HLTHCDSXY:2:1437:25039:22748_1:N:0:GGTAACTC+TCACCAAC 83 NC_047559.1 20 2 81M = 35 88 AAAAACAAATATTTCTATAAAAAAATTACATCCCTAAAAAAAAACCAATATTCTTCATTTTAATATTAAAATAAAAATATA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:10 MD:Z:2G0G17G13G0G3G23G8G0G0G5 XM:Z:..hh.................h.............xh...h.......................h........hhh..... XR:Z:CT XG:Z:GA A00564:290:HLTHCDSXY:2:1437:25039:22748_1:N:0:GGTAACTC+TCACCAAC 163 NC_047559.1 35 2 66M7I7M = 20 -88 TATAAAAAAATTACATCCCTAAAAAAAAACCAATATTCTTCATTTTAATATTAAAATAAAAATATACAATCACGTACAAA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:17 MD:Z:6G13G0G3G23G8G0G0G7G0T3 XM:Z:......h.............xh...h.......................h........hhh............U.h.... XR:Z:GA XG:Z:GA A00564:290:HLTHCDSXY:2:1416:25247:3192_1:N:0:TATGTAGT+CATTAGTG 83 NC_047559.1 116 0 102M = 131 115 AAAACCCCCTAAAACTTTTAAAATCAATAATAACACCCATATTTCTCCTTTAATACATTAATACAAAAAATACAAAAAATACATTAACATTAATTTCAAATA :F:FFFFFFFF::FFF:FFFFFFFFFF:F:FFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFF NM:i:16 MD:Z:3G15G0G0G16G9G2G15G1G0A5G2A0T5G5G5G3 XM:Z:...h...............hhh................z............h...............h.h......h.........h.....h.....x... XR:Z:CT XG:Z:GA A00564:290:HLTHCDSXY:2:1416:25247:3192_1:N:0:TATGTAGT+CATTAGTG 163 NC_047559.1 131 0 93M1I7M = 116 -115 TTTTAAAATCAATAATAACACCCATATTTCTCCTTTAATACATTAATACAAAAAATACAAAAAATACATTAACATTAATTTCAAATAAAATTCTTTTATAA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFF NM:i:16 MD:Z:4G0G0G16G9G2G15G1G0A5G2A0T5G5G5G16 XM:Z:....hhh................z............h...............h.h......h.........h.....h.....x................. XR:Z:GA XG:Z:GA A00564:290:HLTHCDSXY:2:2449:9109:9377_1:N:0:CCACCAGG+ATTCCATA 83 NC_047559.1 246 6 40M = 257 -54 TAAAAAAACTCAAAATATTCTATTTATCATTAAAAAAAAT FFFFFFFFFFFFFFF:F:FFFFFFFFFFFFFFFFFFFFFF NM:i:5 MD:Z:1G1G17G4G1G11 XM:Z:.x.h.................x......z........... XR:Z:CT XG:Z:GA A00564:290:HLTHCDSXY:2:2449:9109:9377_1:N:0:CCACCAGG+ATTCCATA 163 NC_047559.1 257 6 43M = 246 54 AAAATATTCTATTTATCATTAAAAAAAATCAATAAACTTAATT FFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF,FFFF:FFFF NM:i:5 MD:Z:10G4G1G13G2G8 XM:Z:..........x......z.............x..h........ XR:Z:GA XG:Z:GA A00564:290:HLTHCDSXY:2:2212:29857:9643_1:N:0:CAATCGGC+TTCCTACA 83 NC_047559.1 398 6 15M1D34M = 414 -64 AAAAATCACTTTATTAAAATAACATCCTCAAAAAAAAACCCATATTCTT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:6 MD:Z:15^A5G9G0G2G0G13 XM:Z:....................h.........xh..hh............. XR:Z:CT XG:Z:GA A00564:290:HLTHCDSXY:2:2212:29857:9643_1:N:0:CAATCGGC+TTCCTACA 163 NC_047559.1 414 6 48M = 398 64 AAAATAACATCCTCAAAAAAAAACCCATATTCTTCATTTTAACGTTCA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:5 MD:Z:5G9G0G2G0G27 XM:Z:.....h.........xh..hh......................Z.... XR:Z:GA XG:Z:GA A00564:290:HLTHCDSXY:2:1678:11550:29450_1:N:0:GCCGCACT+CGAGGTCG 83 NC_047559.1 490 6 112M = 505 -125 AACCCCCAAAAACTTTTAAAATTAAAACTTACACCATCAATTCTCCTTTAATACATTAATACAAAAAAAATAATCAACTTTAATTTTAAAAAAAATCCTTTTATTAAAAAAT FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:15 MD:Z:0G16G0G0G5G24G1G6A5G10G5G5G0G15G1G4 XM:Z:h................hhh.....h........................h.h............h..........x.....h.....hh...............h.h.... XR:Z:CT XG:Z:GA A00564:290:HLTHCDSXY:2:2632:5746:2080_1:N:0:CCACCAGG+ATTCCATA 83 NC_047559.1 490 6 58M = 505 75 AACCCCCAAAAACTTTTAAAATTAAAACTTACACCATCAATTCTCCTTTAATACATTA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF NM:i:7 MD:Z:0G16G0G0G5G24G1G5 XM:Z:h................hhh.....h........................h.h..... XR:Z:CT XG:Z:GA samtools view: writing to standard output failed: Broken pipe samtools view: error closing standard output: -1
#Create index file for IGV
!samtools index -b /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged-sorted-deduplicated.bam
!find /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged-sorted-deduplicated.bam.bai
/Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged-sorted-deduplicated.bam.bai
Options for the script are found here and below. Input and output files should be absolute paths.
--fa: Reference genome file in fasta format
--input: Input bam file (I'm using deduplicated sorted bams)
--output: Temporary file storing SNP candidates
--methcg: CpG methylation information
--methchg: CHG methylation information
--methchh: CHH methylation information
--minhetfreq: Threshold of frequency for calling heterozygous SNP
--minhomfreq: Threshold of frequency for calling homozygous SNP
--minquali: Threshold of base quality
--mincover: Threshold of minimum depth of covered reads
--maxcover: Threshold of maximum depth of covered reads
--minread2: Minimum mutation reads number
--errorate: Minimum mutation rate
--mapvalue: Minimum read mapping value
SNP.out: Final SNP result file
ERR.log: Log file
#Defaults: --minhetfreq 0.1 --minhomfreq 0.85 --minquali 15 --maxcover 1000 --minread2 2 --errorate 0.02 --mapvalue 20
#Saving output to gannet
!perl /Users/Shared/Apps/BS-Snper-master/BS-Snper.pl \
--fa /Volumes/web/spartina/project-oyster-oa/data/Cg-roslin/cgigas_uk_roslin_v1_genomic-mito.fa \
--input /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged-sorted-deduplicated.bam \
--output /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/SNP-candidates.txt \
--methcg /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CpG-meth-info.tab \
--methchg /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CHG-meth-info.tab \
--methchh /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CHH-meth-info.tab \
--mincover 5 \
> /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/SNP-results.vcf 2> /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged.ERR.log
!head /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/SNP-results.vcf
##fileformat=VCFv4.3 ##fileDate= 20210407 ##bssnperVersion=1.1 ##bssnperCommand=--fa /Volumes/web/spartina/project-oyster-oa/data/Cg-roslin/cgigas_uk_roslin_v1_genomic-mito.fa --input /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/merged-sorted-deduplicated.bam --output /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/SNP-candidates.txt --methcg /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CpG-meth-info.tab --methchg /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CHG-meth-info.tab --methchh /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CHH-meth-info.tab --minhetfreq 0.1 --minhomfreq 0.85 --minquali 15 --mincover 5 --maxcover 1000 --minread2 2 --errorate 0.02 --mapvalue 20 ##reference=file:///Volumes/web/spartina/project-oyster-oa/data/Cg-roslin/cgigas_uk_roslin_v1_genomic-mito.fa ##Bisulfite=directional> ##contig=<ID=NC_047559.1,length=55785328> ##contig=<ID=NC_047560.1,length=73222313> ##contig=<ID=NC_047561.1,length=58319100> ##contig=<ID=NC_047562.1,length=53127865>
!wc -l /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/SNP-results.vcf
5519308 /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/SNP-results.vcf
I'll take the output from this and use it with bedtools intersect
to determine which SNPs are present at CG sites.
#Intersect VCF with SNP locations and CG motif track
#BEDtools output has C/T and non-C/T SNPs
#Use grep to isolate C/T SNPs
!{bedtoolsDirectory}intersectBed \
-u \
-a /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/SNP-results.vcf \
-b ../../genome-feature-files/cgigas_uk_roslin_v1_fuzznuc_CGmotif.gff \
| grep "C T" \
> /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CT-SNPs.tab
!head /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CT-SNPs.tab
!wc -l /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CT-SNPs.tab
NC_047559.1 22443 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 34836 . C T 1000 PASS DP=39;ADF=0,0;ADR=0,39;AD=0,39; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:39:0,0:0,39:0,39:0,0,78,0,0,39,0,0:0,0,37,0,0,36,0,0:0.000,1.000 NC_047559.1 36674 . C T 1000 PASS DP=43;ADF=0,0;ADR=21,22;AD=21,22; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:43:0,0:21,22:21,22:0,0,123,0,0,22,21,0:0,0,37,0,0,37,36,0:0.488,0.512 NC_047559.1 38038 . C T 1000 PASS DP=43;ADF=0,0;ADR=30,13;AD=30,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:43:0,0:30,13:30,13:0,0,31,0,0,13,30,0:0,0,36,0,0,36,37,0:0.698,0.302 NC_047559.1 44211 . C T 29 PASS DP=5;ADF=0,0;ADR=1,4;AD=1,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:1,4:1,4:0,0,0,0,0,4,1,0:0,0,0,0,0,37,37,0:0.200,0.800 NC_047559.1 48352 . C T 104 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,19,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 49472 . C T 1000 PASS DP=10;ADF=0,0;ADR=1,9;AD=1,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:1,9:1,9:0,0,16,0,0,9,1,0:0,0,37,0,0,37,37,0:0.100,0.900 NC_047559.1 82690 . C T 1000 PASS DP=53;ADF=0,0;ADR=2,51;AD=2,51; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:53:0,0:2,51:2,51:0,0,31,0,0,51,2,0:0,0,37,0,0,37,37,0:0.038,0.962 NC_047559.1 83012 . C T 132 PASS DP=34;ADF=0,0;ADR=0,34;AD=0,34; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:34:0,0:0,34:0,34:0,0,6,0,0,34,0,0:0,0,35,0,0,37,0,0:0.000,1.000 NC_047559.1 87321 . C T 47 PASS DP=11;ADF=0,0;ADR=0,11;AD=0,11; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:0,11:0,11:0,0,2,0,0,11,0,0:0,0,37,0,0,37,0,0:0.000,1.000 122306 /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/CT-SNPs.tab
cd /Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin/
/Volumes/web/spartina/project-gigas-oa-meth/output/bismark-roslin
!find *deduplicated.sorted.bam | wc -l
8
%%bash
FILES=$(ls *deduplicated.sorted.bam)
echo ${FILES}
for file in ${FILES}
do
NAME=$(echo ${file} | awk -F "." '{print $1}')
echo ${NAME}
perl /Users/Shared/Apps/BS-Snper-master/BS-Snper.pl \
--fa /Volumes/web/spartina/project-oyster-oa/data/Cg-roslin/cgigas_uk_roslin_v1_genomic-mito.fa \
--input ${NAME}.deduplicated.sorted.bam \
--output ${NAME}.SNP-candidates.txt \
--methcg ${NAME}.CpG-meth-info.tab \
--methchg ${NAME}.CHG-meth-info.tab \
--methchh ${NAME}.CHH-meth-info.tab \
--mincover 5 \
> ${NAME}.SNP-results.vcf 2> ${NAME}.ERR.log
done
zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe
# Move files to BS-Snper directory
!mv *SNP-candidates.txt *meth-info.tab *SNP-results.vcf *ERR.log ../BS-Snper/
cd /Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper/
/Volumes/web/spartina/project-gigas-oa-meth/output/BS-Snper
!wc -l zr3616*vcf
3083382 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 3105080 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 3202988 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 3232583 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 3007518 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 3204395 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 3083706 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 3100801 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf 25020453 total
%%bash
for f in zr3616*vcf
do
/Users/Shared/bioinformatics/bedtools2/bin/intersectBed \
-u \
-a ${f} \
-b /Users/yaamini/Documents/project-gigas-oa-meth/genome-feature-files/cgigas_uk_roslin_v1_fuzznuc_CGmotif.gff \
| grep "C T" \
> ${f}_CT-SNPs.tab
head ${f}_CT-SNPs.tab
wc -l ${f}_CT-SNPs.tab
done
NC_047559.1 22443 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 34836 . C T 142 PASS DP=6;ADF=0,0;ADR=0,6;AD=0,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:6:0,0:0,6:0,6:0,0,17,0,0,6,0,0:0,0,36,0,0,37,0,0:0.000,1.000 NC_047559.1 36674 . C T 29 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,44,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 38038 . C T 1000 PASS DP=12;ADF=0,0;ADR=5,7;AD=5,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:12:0,0:5,7:5,7:0,0,13,0,0,7,5,0:0,0,37,0,0,35,35,0:0.417,0.583 NC_047559.1 42517 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 48352 . C T 72 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:3:0,0:0,3:0,3:0,0,4,0,0,3,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 49472 . C T 68 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,2,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 108375 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 197299 . C T 38 PASS DP=11;ADF=0,0;ADR=7,4;AD=7,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:7,4:7,4:0,0,21,0,0,4,7,0:0,0,37,0,0,37,37,0:0.636,0.364 NC_047559.1 205322 . C T 115 PASS DP=28;ADF=0,0;ADR=22,6;AD=22,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:28:0,0:22,6:22,6:0,0,6,0,0,6,22,0:0,0,37,0,0,35,36,0:0.786,0.214 69845 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab NC_047559.1 82690 . C T 1000 PASS DP=7;ADF=0,0;ADR=0,7;AD=0,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:7:0,0:0,7:0,7:0,0,12,0,0,7,0,0:0,0,37,0,0,35,0,0:0.000,1.000 NC_047559.1 83012 . C T 50 PASS DP=10;ADF=0,0;ADR=0,10;AD=0,10; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:0,10:0,10:0,0,2,0,0,10,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 83513 . C T 9 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:3:0,0:0,3:0,3:0,0,0,0,0,3,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 100873 . C T 123 PASS DP=10;ADF=0,0;ADR=0,10;AD=0,10; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:0,10:0,10:0,0,4,0,0,10,0,0:0,0,37,0,0,36,0,0:0.000,1.000 NC_047559.1 125852 . C T 141 PASS DP=24;ADF=0,0;ADR=17,7;AD=17,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:24:0,0:17,7:17,7:0,0,13,0,0,7,17,0:0,0,35,0,0,37,36,0:0.708,0.292 NC_047559.1 183845 . C T 1000 PASS DP=7;ADF=0,0;ADR=0,7;AD=0,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:7:0,0:0,7:0,7:0,0,12,0,0,7,0,0:0,0,37,0,0,35,0,0:0.000,1.000 NC_047559.1 191463 . C T 107 PASS DP=11;ADF=0,0;ADR=6,5;AD=6,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:6,5:6,5:0,0,12,0,0,5,6,0:0,0,37,0,0,37,37,0:0.545,0.455 NC_047559.1 211764 . C T 1000 PASS DP=17;ADF=0,0;ADR=0,17;AD=0,17; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:17:0,0:0,17:0,17:0,0,8,0,0,17,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 219023 . C T 29 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,1,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 219665 . C T 1000 PASS DP=40;ADF=0,0;ADR=21,19;AD=21,19; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:40:0,0:21,19:21,19:0,0,11,0,0,19,21,0:0,0,37,0,0,36,37,0:0.525,0.475 70244 zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab NC_047559.1 34836 . C T 1000 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,25,0,0,9,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 36674 . C T 1000 PASS DP=13;ADF=0,0;ADR=0,13;AD=0,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:0,13:0,13:0,0,36,0,0,13,0,0:0,0,36,0,0,37,0,0:0.000,1.000 NC_047559.1 49472 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 100045 . C T 105 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,3,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 100873 . C T 100 PASS DP=6;ADF=0,0;ADR=0,6;AD=0,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:6:0,0:0,6:0,6:0,0,3,0,0,6,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 125852 . C T 80 PASS DP=16;ADF=0,0;ADR=12,4;AD=12,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:16:0,0:12,4:12,4:0,0,2,0,0,4,12,0:0,0,37,0,0,37,36,0:0.750,0.250 NC_047559.1 149250 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 199691 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 211764 . C T 1000 PASS DP=21;ADF=0,0;ADR=0,21;AD=0,21; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:21:0,0:0,21:0,21:0,0,6,0,0,21,0,0:0,0,35,0,0,37,0,0:0.000,1.000 NC_047559.1 219244 . C T 26 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,1,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 72064 zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab NC_047559.1 77086 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 211764 . C T 1000 PASS DP=34;ADF=0,0;ADR=0,34;AD=0,34; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:34:0,0:0,34:0,34:0,0,8,0,0,34,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 222020 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 353501 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,31,0,0:0.000,1.000 NC_047559.1 357397 . C T 53 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,2,0,0,9,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 357948 . C T 1000 PASS DP=13;ADF=0,0;ADR=0,13;AD=0,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:0,13:0,13:0,0,6,0,0,13,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 378466 . C T 1000 PASS DP=36;ADF=0,0;ADR=0,36;AD=0,36; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:36:0,0:0,36:0,36:0,0,14,0,0,36,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 428033 . C T 68 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,2,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 433155 . C T 77 PASS DP=9;ADF=0,0;ADR=5,4;AD=5,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:5,4:5,4:0,0,10,0,0,4,5,0:0,0,37,0,0,37,37,0:0.556,0.444 NC_047559.1 469275 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 73664 zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab NC_047559.1 34836 . C T 1000 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,16,0,0,9,0,0:0,0,37,0,0,36,0,0:0.000,1.000 NC_047559.1 49333 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 82690 . C T 1000 PASS DP=31;ADF=0,0;ADR=2,29;AD=2,29; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:31:0,0:2,29:2,29:0,0,15,0,0,29,2,0:0,0,37,0,0,37,37,0:0.065,0.935 NC_047559.1 83012 . C T 130 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,4,0,0,9,0,0:0,0,34,0,0,37,0,0:0.000,1.000 NC_047559.1 87321 . C T 70 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:3:0,0:0,3:0,3:0,0,2,0,0,3,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 210243 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 211764 . C T 1000 PASS DP=22;ADF=0,0;ADR=2,20;AD=2,20; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:22:0,0:2,20:2,20:0,0,15,0,0,20,2,0:0,0,37,0,0,36,37,0:0.091,0.909 NC_047559.1 223627 . C T 95 PASS DP=10;ADF=0,0;ADR=6,4;AD=6,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:6,4:6,4:0,0,3,0,0,4,6,0:0,0,33,0,0,37,37,0:0.600,0.400 NC_047559.1 224536 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,25,0,0:0.000,1.000 NC_047559.1 236983 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 66965 zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab NC_047559.1 211764 . C T 1000 PASS DP=17;ADF=0,0;ADR=0,17;AD=0,17; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:17:0,0:0,17:0,17:0,0,13,0,0,17,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 222409 . C T 12 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:4:0,0:0,4:0,4:0,0,0,0,0,4,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 245710 . C T 20 PASS DP=7;ADF=0,0;ADR=0,7;AD=0,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:7:0,0:0,7:0,7:0,0,1,0,0,7,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 251061 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 252351 . C T 36 PASS DP=13;ADF=0,0;ADR=10,3;AD=10,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:10,3:10,3:0,0,6,0,0,3,10,0:0,0,37,0,0,37,37,0:0.769,0.231 NC_047559.1 261933 . C T 1000 PASS DP=8;ADF=0,0;ADR=1,7;AD=1,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:8:0,0:1,7:1,7:0,0,6,0,0,7,1,0:0,0,37,0,0,37,37,0:0.125,0.875 NC_047559.1 267998 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,25,0,0:0.000,1.000 NC_047559.1 268110 . C T 1000 PASS DP=8;ADF=0,0;ADR=0,8;AD=0,8; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:8:0,0:0,8:0,8:0,0,14,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 268671 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 287651 . C T 24 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:3:0,0:0,3:0,3:0,0,20,0,0,3,0,0:0,0,37,0,0,33,0,0:0.000,1.000 72123 zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab NC_047559.1 22959 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,25,0,0:0.000,1.000 NC_047559.1 34836 . C T 1000 PASS DP=13;ADF=0,0;ADR=0,13;AD=0,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:0,13:0,13:0,0,10,0,0,13,0,0:0,0,37,0,0,35,0,0:0.000,1.000 NC_047559.1 38038 . C T 92 PASS DP=9;ADF=0,0;ADR=5,4;AD=5,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:5,4:5,4:0,0,5,0,0,4,5,0:0,0,37,0,0,37,37,0:0.556,0.444 NC_047559.1 211764 . C T 102 PASS DP=17;ADF=0,0;ADR=0,17;AD=0,17; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:17:0,0:0,17:0,17:0,0,4,0,0,17,0,0:0,0,34,0,0,36,0,0:0.000,1.000 NC_047559.1 224774 . C T 105 PASS DP=6;ADF=0,0;ADR=2,4;AD=2,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:6:0,0:2,4:2,4:0,0,1,0,0,4,2,0:0,0,37,0,0,37,37,0:0.333,0.667 NC_047559.1 249908 . C T 1000 PASS DP=8;ADF=0,0;ADR=0,8;AD=0,8; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:8:0,0:0,8:0,8:0,0,6,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 250602 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 251061 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 356833 . C T 141 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,4,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 357397 . C T 9 PASS DP=11;ADF=0,0;ADR=0,11;AD=0,11; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:0,11:0,11:0,0,1,0,0,11,0,0:0,0,37,0,0,37,0,0:0.000,1.000 68856 zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab NC_047559.1 36674 . C T 59 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,21,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 42691 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 44211 . C T 12 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:4:0,0:0,4:0,4:0,0,0,0,0,4,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 82690 . C T 112 PASS DP=15;ADF=0,0;ADR=0,15;AD=0,15; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:15:0,0:0,15:0,15:0,0,4,0,0,15,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 83012 . C T 45 PASS DP=15;ADF=0,0;ADR=0,15;AD=0,15; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:15:0,0:0,15:0,15:0,0,0,0,0,15,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 87321 . C T 24 PASS DP=8;ADF=0,0;ADR=0,8;AD=0,8; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:8:0,0:0,8:0,8:0,0,0,0,0,8,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 121206 . C T 18 PASS DP=6;ADF=0,0;ADR=0,6;AD=0,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:6:0,0:0,6:0,6:0,0,0,0,0,6,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 188398 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 224356 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 252976 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 69695 zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab
!find zr3616*CT-SNPs.tab
zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf_CT-SNPs.tab
I'll now rename the output files so they are shorter.
%%bash
for f in zr3616*CT-SNPs.tab
do
[ -f ${f} ] || continue
mv "${f}" "${f//_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf/}"
done
!find zr3616*CT-SNPs.tab
zr3616_1_CT-SNPs.tab zr3616_2_CT-SNPs.tab zr3616_3_CT-SNPs.tab zr3616_4_CT-SNPs.tab zr3616_5_CT-SNPs.tab zr3616_6_CT-SNPs.tab zr3616_7_CT-SNPs.tab zr3616_8_CT-SNPs.tab
!head zr3616*CT-SNPs.tab
!wc -l zr3616*CT-SNPs.tab
==> zr3616_1_CT-SNPs.tab <== NC_047559.1 22443 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 34836 . C T 142 PASS DP=6;ADF=0,0;ADR=0,6;AD=0,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:6:0,0:0,6:0,6:0,0,17,0,0,6,0,0:0,0,36,0,0,37,0,0:0.000,1.000 NC_047559.1 36674 . C T 29 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,44,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 38038 . C T 1000 PASS DP=12;ADF=0,0;ADR=5,7;AD=5,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:12:0,0:5,7:5,7:0,0,13,0,0,7,5,0:0,0,37,0,0,35,35,0:0.417,0.583 NC_047559.1 42517 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 48352 . C T 72 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:3:0,0:0,3:0,3:0,0,4,0,0,3,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 49472 . C T 68 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,2,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 108375 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 197299 . C T 38 PASS DP=11;ADF=0,0;ADR=7,4;AD=7,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:7,4:7,4:0,0,21,0,0,4,7,0:0,0,37,0,0,37,37,0:0.636,0.364 NC_047559.1 205322 . C T 115 PASS DP=28;ADF=0,0;ADR=22,6;AD=22,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:28:0,0:22,6:22,6:0,0,6,0,0,6,22,0:0,0,37,0,0,35,36,0:0.786,0.214 ==> zr3616_2_CT-SNPs.tab <== NC_047559.1 82690 . C T 1000 PASS DP=7;ADF=0,0;ADR=0,7;AD=0,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:7:0,0:0,7:0,7:0,0,12,0,0,7,0,0:0,0,37,0,0,35,0,0:0.000,1.000 NC_047559.1 83012 . C T 50 PASS DP=10;ADF=0,0;ADR=0,10;AD=0,10; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:0,10:0,10:0,0,2,0,0,10,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 83513 . C T 9 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:3:0,0:0,3:0,3:0,0,0,0,0,3,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 100873 . C T 123 PASS DP=10;ADF=0,0;ADR=0,10;AD=0,10; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:0,10:0,10:0,0,4,0,0,10,0,0:0,0,37,0,0,36,0,0:0.000,1.000 NC_047559.1 125852 . C T 141 PASS DP=24;ADF=0,0;ADR=17,7;AD=17,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:24:0,0:17,7:17,7:0,0,13,0,0,7,17,0:0,0,35,0,0,37,36,0:0.708,0.292 NC_047559.1 183845 . C T 1000 PASS DP=7;ADF=0,0;ADR=0,7;AD=0,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:7:0,0:0,7:0,7:0,0,12,0,0,7,0,0:0,0,37,0,0,35,0,0:0.000,1.000 NC_047559.1 191463 . C T 107 PASS DP=11;ADF=0,0;ADR=6,5;AD=6,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:6,5:6,5:0,0,12,0,0,5,6,0:0,0,37,0,0,37,37,0:0.545,0.455 NC_047559.1 211764 . C T 1000 PASS DP=17;ADF=0,0;ADR=0,17;AD=0,17; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:17:0,0:0,17:0,17:0,0,8,0,0,17,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 219023 . C T 29 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,1,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 219665 . C T 1000 PASS DP=40;ADF=0,0;ADR=21,19;AD=21,19; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:40:0,0:21,19:21,19:0,0,11,0,0,19,21,0:0,0,37,0,0,36,37,0:0.525,0.475 ==> zr3616_3_CT-SNPs.tab <== NC_047559.1 34836 . C T 1000 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,25,0,0,9,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 36674 . C T 1000 PASS DP=13;ADF=0,0;ADR=0,13;AD=0,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:0,13:0,13:0,0,36,0,0,13,0,0:0,0,36,0,0,37,0,0:0.000,1.000 NC_047559.1 49472 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 100045 . C T 105 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,3,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 100873 . C T 100 PASS DP=6;ADF=0,0;ADR=0,6;AD=0,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:6:0,0:0,6:0,6:0,0,3,0,0,6,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 125852 . C T 80 PASS DP=16;ADF=0,0;ADR=12,4;AD=12,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:16:0,0:12,4:12,4:0,0,2,0,0,4,12,0:0,0,37,0,0,37,36,0:0.750,0.250 NC_047559.1 149250 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 199691 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 211764 . C T 1000 PASS DP=21;ADF=0,0;ADR=0,21;AD=0,21; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:21:0,0:0,21:0,21:0,0,6,0,0,21,0,0:0,0,35,0,0,37,0,0:0.000,1.000 NC_047559.1 219244 . C T 26 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,1,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 ==> zr3616_4_CT-SNPs.tab <== NC_047559.1 77086 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 211764 . C T 1000 PASS DP=34;ADF=0,0;ADR=0,34;AD=0,34; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:34:0,0:0,34:0,34:0,0,8,0,0,34,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 222020 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 353501 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,31,0,0:0.000,1.000 NC_047559.1 357397 . C T 53 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,2,0,0,9,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 357948 . C T 1000 PASS DP=13;ADF=0,0;ADR=0,13;AD=0,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:0,13:0,13:0,0,6,0,0,13,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 378466 . C T 1000 PASS DP=36;ADF=0,0;ADR=0,36;AD=0,36; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:36:0,0:0,36:0,36:0,0,14,0,0,36,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 428033 . C T 68 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,2,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 433155 . C T 77 PASS DP=9;ADF=0,0;ADR=5,4;AD=5,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:5,4:5,4:0,0,10,0,0,4,5,0:0,0,37,0,0,37,37,0:0.556,0.444 NC_047559.1 469275 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 ==> zr3616_5_CT-SNPs.tab <== NC_047559.1 34836 . C T 1000 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,16,0,0,9,0,0:0,0,37,0,0,36,0,0:0.000,1.000 NC_047559.1 49333 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 82690 . C T 1000 PASS DP=31;ADF=0,0;ADR=2,29;AD=2,29; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:31:0,0:2,29:2,29:0,0,15,0,0,29,2,0:0,0,37,0,0,37,37,0:0.065,0.935 NC_047559.1 83012 . C T 130 PASS DP=9;ADF=0,0;ADR=0,9;AD=0,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:0,9:0,9:0,0,4,0,0,9,0,0:0,0,34,0,0,37,0,0:0.000,1.000 NC_047559.1 87321 . C T 70 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:3:0,0:0,3:0,3:0,0,2,0,0,3,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 210243 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 211764 . C T 1000 PASS DP=22;ADF=0,0;ADR=2,20;AD=2,20; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:22:0,0:2,20:2,20:0,0,15,0,0,20,2,0:0,0,37,0,0,36,37,0:0.091,0.909 NC_047559.1 223627 . C T 95 PASS DP=10;ADF=0,0;ADR=6,4;AD=6,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:6,4:6,4:0,0,3,0,0,4,6,0:0,0,33,0,0,37,37,0:0.600,0.400 NC_047559.1 224536 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,25,0,0:0.000,1.000 NC_047559.1 236983 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 ==> zr3616_6_CT-SNPs.tab <== NC_047559.1 211764 . C T 1000 PASS DP=17;ADF=0,0;ADR=0,17;AD=0,17; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:17:0,0:0,17:0,17:0,0,13,0,0,17,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 222409 . C T 12 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:4:0,0:0,4:0,4:0,0,0,0,0,4,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 245710 . C T 20 PASS DP=7;ADF=0,0;ADR=0,7;AD=0,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:7:0,0:0,7:0,7:0,0,1,0,0,7,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 251061 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 252351 . C T 36 PASS DP=13;ADF=0,0;ADR=10,3;AD=10,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:10,3:10,3:0,0,6,0,0,3,10,0:0,0,37,0,0,37,37,0:0.769,0.231 NC_047559.1 261933 . C T 1000 PASS DP=8;ADF=0,0;ADR=1,7;AD=1,7; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:8:0,0:1,7:1,7:0,0,6,0,0,7,1,0:0,0,37,0,0,37,37,0:0.125,0.875 NC_047559.1 267998 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,25,0,0:0.000,1.000 NC_047559.1 268110 . C T 1000 PASS DP=8;ADF=0,0;ADR=0,8;AD=0,8; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:8:0,0:0,8:0,8:0,0,14,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 268671 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 287651 . C T 24 Low DP=3;ADF=0,0;ADR=0,3;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:3:0,0:0,3:0,3:0,0,20,0,0,3,0,0:0,0,37,0,0,33,0,0:0.000,1.000 ==> zr3616_7_CT-SNPs.tab <== NC_047559.1 22959 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,25,0,0:0.000,1.000 NC_047559.1 34836 . C T 1000 PASS DP=13;ADF=0,0;ADR=0,13;AD=0,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:13:0,0:0,13:0,13:0,0,10,0,0,13,0,0:0,0,37,0,0,35,0,0:0.000,1.000 NC_047559.1 38038 . C T 92 PASS DP=9;ADF=0,0;ADR=5,4;AD=5,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:9:0,0:5,4:5,4:0,0,5,0,0,4,5,0:0,0,37,0,0,37,37,0:0.556,0.444 NC_047559.1 211764 . C T 102 PASS DP=17;ADF=0,0;ADR=0,17;AD=0,17; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:17:0,0:0,17:0,17:0,0,4,0,0,17,0,0:0,0,34,0,0,36,0,0:0.000,1.000 NC_047559.1 224774 . C T 105 PASS DP=6;ADF=0,0;ADR=2,4;AD=2,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:6:0,0:2,4:2,4:0,0,1,0,0,4,2,0:0,0,37,0,0,37,37,0:0.333,0.667 NC_047559.1 249908 . C T 1000 PASS DP=8;ADF=0,0;ADR=0,8;AD=0,8; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:8:0,0:0,8:0,8:0,0,6,0,0,8,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 250602 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 251061 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 356833 . C T 141 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,4,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 357397 . C T 9 PASS DP=11;ADF=0,0;ADR=0,11;AD=0,11; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:0,11:0,11:0,0,1,0,0,11,0,0:0,0,37,0,0,37,0,0:0.000,1.000 ==> zr3616_8_CT-SNPs.tab <== NC_047559.1 36674 . C T 59 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:4:0,0:0,4:0,4:0,0,21,0,0,4,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 42691 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 44211 . C T 12 Low DP=4;ADF=0,0;ADR=0,4;AD=0,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:4:0,0:0,4:0,4:0,0,0,0,0,4,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 82690 . C T 112 PASS DP=15;ADF=0,0;ADR=0,15;AD=0,15; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:15:0,0:0,15:0,15:0,0,4,0,0,15,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 83012 . C T 45 PASS DP=15;ADF=0,0;ADR=0,15;AD=0,15; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:15:0,0:0,15:0,15:0,0,0,0,0,15,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 87321 . C T 24 PASS DP=8;ADF=0,0;ADR=0,8;AD=0,8; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:8:0,0:0,8:0,8:0,0,0,0,0,8,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 121206 . C T 18 PASS DP=6;ADF=0,0;ADR=0,6;AD=0,6; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:6:0,0:0,6:0,6:0,0,0,0,0,6,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 188398 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 224356 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 252976 . C T 6 Low DP=2;ADF=0,0;ADR=0,2;AD=0,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:2:0,0:0,2:0,2:0,0,0,0,0,2,0,0:0,0,0,0,0,37,0,0:0.000,1.000 69845 zr3616_1_CT-SNPs.tab 70244 zr3616_2_CT-SNPs.tab 72064 zr3616_3_CT-SNPs.tab 73664 zr3616_4_CT-SNPs.tab 66965 zr3616_5_CT-SNPs.tab 72123 zr3616_6_CT-SNPs.tab 68856 zr3616_7_CT-SNPs.tab 69695 zr3616_8_CT-SNPs.tab 563456 total
!find *.vcf
SNP-results.vcf zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf
#The VCF header/additional information comprises 268 lines (checked with multiple files)
!head -n268 zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf
##fileformat=VCFv4.3 ##fileDate= 20210407 ##bssnperVersion=1.1 ##bssnperCommand=--fa /Volumes/web/spartina/project-oyster-oa/data/Cg-roslin/cgigas_uk_roslin_v1_genomic-mito.fa --input zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam --output zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-candidates.txt --methcg zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.CpG-meth-info.tab --methchg zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.CHG-meth-info.tab --methchh zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.CHH-meth-info.tab --minhetfreq 0.1 --minhomfreq 0.85 --minquali 15 --mincover 5 --maxcover 1000 --minread2 2 --errorate 0.02 --mapvalue 20 ##reference=file:///Volumes/web/spartina/project-oyster-oa/data/Cg-roslin/cgigas_uk_roslin_v1_genomic-mito.fa ##Bisulfite=directional> ##contig=<ID=NC_047559.1,length=55785328> ##contig=<ID=NC_047560.1,length=73222313> ##contig=<ID=NC_047561.1,length=58319100> ##contig=<ID=NC_047562.1,length=53127865> ##contig=<ID=NC_047564.1,length=60151564> ##contig=<ID=NC_047565.1,length=62107823> ##contig=<ID=NC_047566.1,length=58462999> ##contig=<ID=NC_047567.1,length=37089910> ##contig=<ID=NC_047568.1,length=57541580> ##contig=<ID=NW_022994773.1,length=129571> ##contig=<ID=NW_022994774.1,length=1185781> ##contig=<ID=NW_022994775.1,length=128451> ##contig=<ID=NW_022994776.1,length=124569> ##contig=<ID=NW_022994777.1,length=123505> ##contig=<ID=NW_022994778.1,length=761972> ##contig=<ID=NW_022994779.1,length=111600> ##contig=<ID=NW_022994780.1,length=109880> ##contig=<ID=NW_022994781.1,length=109004> ##contig=<ID=NW_022994782.1,length=105193> ##contig=<ID=NW_022994783.1,length=103107> ##contig=<ID=NW_022994784.1,length=1334230> ##contig=<ID=NW_022994785.1,length=99559> ##contig=<ID=NW_022994786.1,length=93417> ##contig=<ID=NW_022994787.1,length=85616> ##contig=<ID=NW_022994788.1,length=84435> ##contig=<ID=NW_022994789.1,length=725421> ##contig=<ID=NW_022994790.1,length=78344> ##contig=<ID=NW_022994791.1,length=77172> ##contig=<ID=NW_022994792.1,length=66789> ##contig=<ID=NW_022994793.1,length=66084> ##contig=<ID=NW_022994794.1,length=699037> ##contig=<ID=NW_022994795.1,length=63125> ##contig=<ID=NW_022994796.1,length=62774> ##contig=<ID=NW_022994797.1,length=60186> ##contig=<ID=NW_022994798.1,length=59881> ##contig=<ID=NW_022994799.1,length=680695> ##contig=<ID=NW_022994800.1,length=57426> ##contig=<ID=NW_022994801.1,length=56398> ##contig=<ID=NW_022994802.1,length=52255> ##contig=<ID=NW_022994803.1,length=47562> ##contig=<ID=NW_022994804.1,length=45312> ##contig=<ID=NW_022994805.1,length=654836> ##contig=<ID=NW_022994806.1,length=34204> ##contig=<ID=NW_022994807.1,length=33063> ##contig=<ID=NW_022994808.1,length=32339> ##contig=<ID=NW_022994809.1,length=621026> ##contig=<ID=NW_022994810.1,length=30983> ##contig=<ID=NW_022994811.1,length=28060> ##contig=<ID=NW_022994812.1,length=26205> ##contig=<ID=NW_022994813.1,length=609159> ##contig=<ID=NW_022994814.1,length=18830> ##contig=<ID=NW_022994815.1,length=549086> ##contig=<ID=NW_022994816.1,length=985078> ##contig=<ID=NW_022994817.1,length=2254> ##contig=<ID=NW_022994818.1,length=1361> ##contig=<ID=NW_022994819.1,length=734241> ##contig=<ID=NW_022994820.1,length=658371> ##contig=<ID=NW_022994821.1,length=546250> ##contig=<ID=NW_022994822.1,length=614137> ##contig=<ID=NW_022994823.1,length=196267> ##contig=<ID=NW_022994824.1,length=461872> ##contig=<ID=NW_022994825.1,length=345626> ##contig=<ID=NW_022994826.1,length=153136> ##contig=<ID=NW_022994827.1,length=85676> ##contig=<ID=NW_022994828.1,length=316174> ##contig=<ID=NW_022994829.1,length=476514> ##contig=<ID=NW_022994830.1,length=534456> ##contig=<ID=NW_022994831.1,length=527944> ##contig=<ID=NW_022994832.1,length=513312> ##contig=<ID=NW_022994833.1,length=507370> ##contig=<ID=NW_022994834.1,length=496827> ##contig=<ID=NW_022994835.1,length=482992> ##contig=<ID=NW_022994836.1,length=471557> ##contig=<ID=NW_022994837.1,length=686402> ##contig=<ID=NW_022994838.1,length=445523> ##contig=<ID=NW_022994839.1,length=505355> ##contig=<ID=NW_022994840.1,length=408464> ##contig=<ID=NW_022994841.1,length=392916> ##contig=<ID=NW_022994842.1,length=346530> ##contig=<ID=NW_022994843.1,length=346498> ##contig=<ID=NW_022994844.1,length=984115> ##contig=<ID=NW_022994845.1,length=334217> ##contig=<ID=NW_022994846.1,length=330898> ##contig=<ID=NW_022994847.1,length=310459> ##contig=<ID=NW_022994848.1,length=975996> ##contig=<ID=NW_022994849.1,length=299473> ##contig=<ID=NW_022994850.1,length=724080> ##contig=<ID=NW_022994851.1,length=284019> ##contig=<ID=NW_022994852.1,length=380988> ##contig=<ID=NW_022994853.1,length=271986> ##contig=<ID=NW_022994854.1,length=271335> ##contig=<ID=NW_022994855.1,length=270660> ##contig=<ID=NW_022994856.1,length=269814> ##contig=<ID=NW_022994857.1,length=255287> ##contig=<ID=NW_022994858.1,length=250265> ##contig=<ID=NW_022994859.1,length=249553> ##contig=<ID=NW_022994860.1,length=243901> ##contig=<ID=NW_022994861.1,length=243737> ##contig=<ID=NW_022994862.1,length=230837> ##contig=<ID=NW_022994863.1,length=896916> ##contig=<ID=NW_022994864.1,length=230075> ##contig=<ID=NW_022994865.1,length=3851109> ##contig=<ID=NW_022994866.1,length=222500> ##contig=<ID=NW_022994867.1,length=221461> ##contig=<ID=NW_022994868.1,length=214429> ##contig=<ID=NW_022994869.1,length=208222> ##contig=<ID=NW_022994870.1,length=984156> ##contig=<ID=NW_022994871.1,length=303113> ##contig=<ID=NW_022994872.1,length=195767> ##contig=<ID=NW_022994873.1,length=192086> ##contig=<ID=NW_022994874.1,length=189815> ##contig=<ID=NW_022994875.1,length=180076> ##contig=<ID=NW_022994876.1,length=178302> ##contig=<ID=NW_022994877.1,length=371905> ##contig=<ID=NW_022994878.1,length=173440> ##contig=<ID=NW_022994879.1,length=171659> ##contig=<ID=NW_022994880.1,length=169219> ##contig=<ID=NW_022994881.1,length=520634> ##contig=<ID=NW_022994882.1,length=167015> ##contig=<ID=NW_022994883.1,length=166538> ##contig=<ID=NW_022994884.1,length=165345> ##contig=<ID=NW_022994885.1,length=161001> ##contig=<ID=NW_022994886.1,length=803096> ##contig=<ID=NW_022994887.1,length=153941> ##contig=<ID=NW_022994888.1,length=153236> ##contig=<ID=NW_022994889.1,length=151090> ##contig=<ID=NW_022994890.1,length=226113> ##contig=<ID=NW_022994891.1,length=142569> ##contig=<ID=NW_022994892.1,length=139519> ##contig=<ID=NW_022994893.1,length=137056> ##contig=<ID=NW_022994894.1,length=135750> ##contig=<ID=NW_022994895.1,length=800838> ##contig=<ID=NW_022994896.1,length=123475> ##contig=<ID=NW_022994897.1,length=543823> ##contig=<ID=NW_022994898.1,length=63957> ##contig=<ID=NW_022994899.1,length=135656> ##contig=<ID=NW_022994900.1,length=158606> ##contig=<ID=NW_022994901.1,length=138885> ##contig=<ID=NW_022994902.1,length=213710> ##contig=<ID=NW_022994903.1,length=31917> ##contig=<ID=NW_022994904.1,length=192304> ##contig=<ID=NW_022994905.1,length=122499> ##contig=<ID=NW_022994906.1,length=24192> ##contig=<ID=NW_022994907.1,length=80452> ##contig=<ID=NW_022994908.1,length=34710> ##contig=<ID=NW_022994909.1,length=80340> ##contig=<ID=NW_022994910.1,length=59023> ##contig=<ID=NW_022994911.1,length=90223> ##contig=<ID=NW_022994912.1,length=80828> ##contig=<ID=NW_022994913.1,length=18545> ##contig=<ID=NW_022994914.1,length=79317> ##contig=<ID=NW_022994915.1,length=76490> ##contig=<ID=NW_022994916.1,length=77779> ##contig=<ID=NW_022994917.1,length=39112> ##contig=<ID=NW_022994918.1,length=404750> ##contig=<ID=NW_022994919.1,length=196227> ##contig=<ID=NW_022994920.1,length=39174> ##contig=<ID=NW_022994921.1,length=323512> ##contig=<ID=NW_022994922.1,length=83186> ##contig=<ID=NW_022994923.1,length=61906> ##contig=<ID=NW_022994924.1,length=127844> ##contig=<ID=NW_022994925.1,length=61549> ##contig=<ID=NW_022994926.1,length=81940> ##contig=<ID=NW_022994927.1,length=162546> ##contig=<ID=NW_022994928.1,length=69385> ##contig=<ID=NW_022994929.1,length=46878> ##contig=<ID=NW_022994930.1,length=145484> ##contig=<ID=NW_022994931.1,length=118323> ##contig=<ID=NW_022994932.1,length=123881> ##contig=<ID=NW_022994933.1,length=29995> ##contig=<ID=NW_022994934.1,length=161585> ##contig=<ID=NW_022994935.1,length=43805> ##contig=<ID=NW_022994936.1,length=297039> ##contig=<ID=NW_022994937.1,length=490709> ##contig=<ID=NW_022994938.1,length=203969> ##contig=<ID=NW_022994939.1,length=324655> ##contig=<ID=NW_022994940.1,length=218172> ##contig=<ID=NW_022994941.1,length=19044> ##contig=<ID=NW_022994942.1,length=61061> ##contig=<ID=NW_022994943.1,length=289459> ##contig=<ID=NW_022994944.1,length=261312> ##contig=<ID=NW_022994945.1,length=625727> ##contig=<ID=NW_022994946.1,length=333635> ##contig=<ID=NW_022994947.1,length=135775> ##contig=<ID=NW_022994948.1,length=858184> ##contig=<ID=NW_022994949.1,length=121447> ##contig=<ID=NW_022994950.1,length=33519> ##contig=<ID=NW_022994951.1,length=187731> ##contig=<ID=NW_022994952.1,length=219001> ##contig=<ID=NW_022994953.1,length=45830> ##contig=<ID=NW_022994954.1,length=129811> ##contig=<ID=NW_022994955.1,length=365466> ##contig=<ID=NW_022994956.1,length=433943> ##contig=<ID=NW_022994957.1,length=201146> ##contig=<ID=NW_022994958.1,length=172809> ##contig=<ID=NW_022994959.1,length=64928> ##contig=<ID=NW_022994960.1,length=137501> ##contig=<ID=NW_022994961.1,length=155459> ##contig=<ID=NW_022994962.1,length=175997> ##contig=<ID=NW_022994963.1,length=286963> ##contig=<ID=NW_022994964.1,length=245988> ##contig=<ID=NW_022994965.1,length=30682> ##contig=<ID=NW_022994966.1,length=514743> ##contig=<ID=NW_022994967.1,length=56505> ##contig=<ID=NW_022994968.1,length=91994> ##contig=<ID=NW_022994969.1,length=256332> ##contig=<ID=NW_022994970.1,length=36429> ##contig=<ID=NW_022994971.1,length=30384> ##contig=<ID=NW_022994972.1,length=189740> ##contig=<ID=NW_022994973.1,length=66576> ##contig=<ID=NW_022994974.1,length=36592> ##contig=<ID=NW_022994975.1,length=74180> ##contig=<ID=NW_022994976.1,length=30109> ##contig=<ID=NW_022994977.1,length=141708> ##contig=<ID=NW_022994978.1,length=113692> ##contig=<ID=NW_022994979.1,length=83790> ##contig=<ID=NW_022994980.1,length=6557> ##contig=<ID=NW_022994981.1,length=62922> ##contig=<ID=NW_022994982.1,length=153818> ##contig=<ID=NW_022994983.1,length=79855> ##contig=<ID=NW_022994984.1,length=42847> ##contig=<ID=NW_022994985.1,length=190097> ##contig=<ID=NW_022994986.1,length=135148> ##contig=<ID=NW_022994987.1,length=316463> ##contig=<ID=NW_022994988.1,length=28442> ##contig=<ID=NW_022994989.1,length=147472> ##contig=<ID=NW_022994990.1,length=30511> ##contig=<ID=NW_022994991.1,length=237136> ##contig=<ID=NW_022994992.1,length=107548> ##contig=<ID=NW_022994993.1,length=225226> ##contig=<ID=NW_022994994.1,length=24264> ##contig=<ID=NW_022994995.1,length=63071> ##contig=<ID=NW_022994996.1,length=98197> ##contig=<ID=NW_022994997.1,length=297219> ##contig=<ID=NW_022994998.1,length=55042> ##contig=<ID=NC_001276.1,length=18224> ##ALT=<ID=*,Description="Represents allele(s) other than observed."> ##INFO=<ID=INDEL,Number=0,Type=Flag,Description="Indicates that the variant is an INDEL."> ##INFO=<ID=IDV,Number=1,Type=Integer,Description="Maximum number of reads supporting an indel"> ##INFO=<ID=IMF,Number=1,Type=Float,Description="Maximum fraction of reads supporting an indel"> ##INFO=<ID=DP,Number=1,Type=Integer,Description="Raw read depth"> ##INFO=<ID=AD,Number=R,Type=Integer,Description="Total allelic depths"> ##INFO=<ID=ADF,Number=R,Type=Integer,Description="Total allelic depths on the forward strand"> ##INFO=<ID=ADR,Number=R,Type=Integer,Description="Total allelic depths on the reverse strand"> ##INFO=<ID=MQ0F,Number=1,Type=Float,Description="Fraction of MQ0 reads (smaller is better)"> ##FILTER=<ID=Low,Description="Low Quality"> ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> ##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes for each ALT allele, in the same order as listed"> ##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> ##INFO=<ID=DP4,Number=4,Type=Integer,Description="Number of high-quality ref-forward , ref-reverse, alt-forward and alt-reverse bases"> ##INFO=<ID=MQ,Number=1,Type=Integer,Description="Average mapping quality"> ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="List of Phred-scaled genotype likelihoods"> ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Number of high-quality bases"> ##FORMAT=<ID=SP,Number=1,Type=Integer,Description="Phred-scaled strand bias P-value"> ##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths"> ##FORMAT=<ID=ADF,Number=R,Type=Integer,Description="Allelic depths on the forward strand"> ##FORMAT=<ID=ADR,Number=R,Type=Integer,Description="Allelic depths on the reverse strand"> ##FORMAT=<ID=BSD,Number=8,Type=Integer,Description="Depth, ATCG in watson strand and crick strand"> ##FORMAT=<ID=BSQ,Number=8,Type=Integer,Description="Avarage Base Quality, ATCG in watson strand and crick strand"> ##FORMAT=<ID=ALFR,Number=R,Type=Float,Description="Allele frequency"> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.deduplicated.sorted.bam
%%bash
for f in *vcf
do
tail -n +269 ${f} > ${f}.tab
done
!find *.vcf.tab
SNP-results.vcf.tab zr3616_1_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab zr3616_2_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab zr3616_3_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab zr3616_4_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab zr3616_5_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab zr3616_6_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab zr3616_7_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab zr3616_8_R1_val_1_val_1_val_1_bismark_bt2_pe.SNP-results.vcf.tab
#Combine C/T SNPs into one file
!cat *.vcf.tab >> all-SNPs.tab
!head all-SNPs.tab
!wc -l all-SNPs.tab
NC_047559.1 2313 . G T 12 PASS DP=11;ADF=0,0;ADR=9,2;AD=9,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:9,2:9,2:0,0,0,0,0,2,0,9:0,0,0,0,0,37,0,37:0.818,0.182 NC_047559.1 4001 . G C 1000 Low DP=66;ADF=2,0;ADR=44,20;AD=46,20; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:66:2,0:44,20:46,20:0,0,0,2,0,0,20,44:0,0,0,37,0,0,36,36:0.697,0.303 NC_047559.1 9833 . G T 9 Low DP=3;ADF=0,2;ADR=0,1;AD=0,3; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:3:0,2:0,1:0,3:0,2,0,0,0,1,0,0:0,37,0,0,0,37,0,0:0.000,1.000 NC_047559.1 10022 . C G 4 Low DP=1;ADF=0,1;ADR=0,0;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,1:0,0:0,1:0,0,0,1,0,0,0,0:0,0,0,37,0,0,0,0:0.000,1.000 NC_047559.1 15494 . G T 16 PASS DP=788;ADF=0,740;ADR=3,45;AD=3,785; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:788:0,740:3,45:3,785:0,740,0,0,0,45,0,3:0,37,0,0,0,37,0,37:0.004,0.996 NC_047559.1 15790 . T A 1000 PASS DP=196;ADF=5,186;ADR=0,5;AD=5,191; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:196:5,186:0,5:5,191:186,5,0,0,5,0,0,0:37,37,0,0,35,0,0,0:0.026,0.974 NC_047559.1 16384 . T G 5 PASS DP=15;ADF=7,2;ADR=6,0;AD=13,2; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:15:7,2:6,0:13,2:0,7,0,2,0,6,0,0:0,37,0,37,0,35,0,0:0.867,0.133 NC_047559.1 18106 . A T 1000 PASS DP=27;ADF=0,0;ADR=19,8;AD=19,8; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:27:0,0:19,8:19,8:0,0,0,0,19,8,0,0:0,0,0,0,37,37,0,0:0.704,0.296 NC_047559.1 20990 . C A 4 Low DP=1;ADF=0,1;ADR=0,0;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,1:0,0:0,1:1,0,0,0,0,0,0,0:37,0,0,0,0,0,0,0:0.000,1.000 NC_047559.1 22443 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 30537349 all-SNPs.tab
#Take columns 1-5
#Sort combined SNPs
#Only keep unique SNPs
!awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5}' all-SNPs.tab \
| sort \
| uniq \
> unique-SNPs.tab
!head unique-SNPs.tab
!wc -l unique-SNPs.tab
NC_001276.1 10299 . A G NC_001276.1 10412 . G A NC_001276.1 10452 . G C NC_001276.1 10656 . T C NC_001276.1 10761 . G A NC_001276.1 11236 . A G NC_001276.1 11789 . T C NC_001276.1 11837 . C T NC_001276.1 11843 . G A NC_001276.1 11852 . T C 13234183 unique-SNPs.tab
!find *CT-SNPs.tab
CT-SNPs.tab zr3616_1_CT-SNPs.tab zr3616_2_CT-SNPs.tab zr3616_3_CT-SNPs.tab zr3616_4_CT-SNPs.tab zr3616_5_CT-SNPs.tab zr3616_6_CT-SNPs.tab zr3616_7_CT-SNPs.tab zr3616_8_CT-SNPs.tab
#Combine C/T SNPs into one file
!cat *CT-SNPs.tab >> all-CT-SNPs.tab
!head all-CT-SNPs.tab
!wc -l all-CT-SNPs.tab
NC_047559.1 22443 . C T 4 Low DP=1;ADF=0,0;ADR=0,1;AD=0,1; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 1/1:1:0,0:0,1:0,1:0,0,0,0,0,1,0,0:0,0,0,0,0,37,0,0:0.000,1.000 NC_047559.1 34836 . C T 1000 PASS DP=39;ADF=0,0;ADR=0,39;AD=0,39; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:39:0,0:0,39:0,39:0,0,78,0,0,39,0,0:0,0,37,0,0,36,0,0:0.000,1.000 NC_047559.1 36674 . C T 1000 PASS DP=43;ADF=0,0;ADR=21,22;AD=21,22; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:43:0,0:21,22:21,22:0,0,123,0,0,22,21,0:0,0,37,0,0,37,36,0:0.488,0.512 NC_047559.1 38038 . C T 1000 PASS DP=43;ADF=0,0;ADR=30,13;AD=30,13; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:43:0,0:30,13:30,13:0,0,31,0,0,13,30,0:0,0,36,0,0,36,37,0:0.698,0.302 NC_047559.1 44211 . C T 29 PASS DP=5;ADF=0,0;ADR=1,4;AD=1,4; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:1,4:1,4:0,0,0,0,0,4,1,0:0,0,0,0,0,37,37,0:0.200,0.800 NC_047559.1 48352 . C T 104 PASS DP=5;ADF=0,0;ADR=0,5;AD=0,5; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:5:0,0:0,5:0,5:0,0,19,0,0,5,0,0:0,0,37,0,0,37,0,0:0.000,1.000 NC_047559.1 49472 . C T 1000 PASS DP=10;ADF=0,0;ADR=1,9;AD=1,9; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:10:0,0:1,9:1,9:0,0,16,0,0,9,1,0:0,0,37,0,0,37,37,0:0.100,0.900 NC_047559.1 82690 . C T 1000 PASS DP=53;ADF=0,0;ADR=2,51;AD=2,51; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:53:0,0:2,51:2,51:0,0,31,0,0,51,2,0:0,0,37,0,0,37,37,0:0.038,0.962 NC_047559.1 83012 . C T 132 PASS DP=34;ADF=0,0;ADR=0,34;AD=0,34; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:34:0,0:0,34:0,34:0,0,6,0,0,34,0,0:0,0,35,0,0,37,0,0:0.000,1.000 NC_047559.1 87321 . C T 47 PASS DP=11;ADF=0,0;ADR=0,11;AD=0,11; GT:DP:ADF:ADR:AD:BSD:BSQ:ALFR 0/1:11:0,0:0,11:0,11:0,0,2,0,0,11,0,0:0,0,37,0,0,37,0,0:0.000,1.000 685762 all-CT-SNPs.tab
#Take columns 1-5
#Sort combined C/T SNPs
#Only keep unique SNPs
!awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5}' all-CT-SNPs.tab \
| sort \
| uniq \
> unique-CT-SNPs.tab
!head unique-CT-SNPs.tab
!wc -l unique-CT-SNPs.tab
NC_001276.1 14669 . C T NC_047559.1 1000025 . C T NC_047559.1 10001065 . C T NC_047559.1 10001128 . C T NC_047559.1 10001236 . C T NC_047559.1 10003470 . C T NC_047559.1 10003475 . C T NC_047559.1 10004318 . C T NC_047559.1 100045 . C T NC_047559.1 10004558 . C T 300278 unique-CT-SNPs.tab