!date
Mon Apr 18 07:41:44 PDT 2016
%%bash
system_profiler SPSoftwareDataType
Software: System Software Overview: System Version: OS X 10.9.5 (13F34) Kernel Version: Darwin 13.4.0 Boot Volume: Hummingbird Boot Mode: Normal Computer Name: hummingbird User Name: Sam (Sam) Secure Virtual Memory: Enabled Time since boot: 5 days 23:15
%%bash
#Excludes serial number and hardware UUID
system_profiler SPHardwareDataType | grep -v [SH][ea]
Model Name: Xserve Model Identifier: Xserve3,1 Processor Name: Quad-Core Intel Xeon Processor Speed: 2.26 GHz Number of Processors: 2 Total Number of Cores: 8 L2 Cache (per Core): 256 KB L3 Cache (per Processor): 8 MB Memory: 24 GB Processor Interconnect Speed: 5.86 GT/s Boot ROM Version: XS31.0081.B06 SMC Version (system): 1.43f4 LOM Revision: 1.1.8
cd /usr/local/bioinformatics/
/usr/local/bioinformatics
%%bash
mkdir pyrad
mkdir: pyrad: Permission denied
cd pyrad
/usr/local/bioinformatics/pyrad
%%bash
wget https://github.com/dereneaton/pyrad/archive/3.0.66.tar.gz
--2016-04-18 07:50:10-- https://github.com/dereneaton/pyrad/archive/3.0.66.tar.gz Resolving github.com... 192.30.252.128 Connecting to github.com|192.30.252.128|:443... connected. HTTP request sent, awaiting response... 302 Found Location: https://codeload.github.com/dereneaton/pyrad/tar.gz/3.0.66 [following] --2016-04-18 07:50:11-- https://codeload.github.com/dereneaton/pyrad/tar.gz/3.0.66 Resolving codeload.github.com... 192.30.252.163 Connecting to codeload.github.com|192.30.252.163|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 76100 (74K) [application/x-gzip] 3.0.66.tar.gz: Permission denied Cannot write to '3.0.66.tar.gz' (Success).
ls
3.0.66.tar.gz
%%bash
tar -zxvf 3.0.66.tar.gz
x pyrad-3.0.66/: Can't create 'pyrad-3.0.66' x pyrad-3.0.66/.gitignore: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/.gitignore' x pyrad-3.0.66/LICENSE: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/LICENSE' x pyrad-3.0.66/README.rst: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/README.rst' x pyrad-3.0.66/pyrad/: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad' x pyrad-3.0.66/pyrad/Dtest.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/Dtest.py' x pyrad-3.0.66/pyrad/Dtest_5.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/Dtest_5.py' x pyrad-3.0.66/pyrad/Dtest_foil.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/Dtest_foil.py' x pyrad-3.0.66/pyrad/H_err_dp.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/H_err_dp.py' x pyrad-3.0.66/pyrad/__init__.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/__init__.py' x pyrad-3.0.66/pyrad/alignable.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/alignable.py' x pyrad-3.0.66/pyrad/cluster7dp.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/cluster7dp.py' x pyrad-3.0.66/pyrad/cluster_cons7_shuf.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/cluster_cons7_shuf.py' x pyrad-3.0.66/pyrad/consens_pairs.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/consens_pairs.py' x pyrad-3.0.66/pyrad/consensdp.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/consensdp.py' x pyrad-3.0.66/pyrad/createfile.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/createfile.py' x pyrad-3.0.66/pyrad/editraw_merges.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/editraw_merges.py' x pyrad-3.0.66/pyrad/editraw_pairs.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/editraw_pairs.py' x pyrad-3.0.66/pyrad/editraw_rads.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/editraw_rads.py' x pyrad-3.0.66/pyrad/loci2SNP.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2SNP.py' x pyrad-3.0.66/pyrad/loci2gphocs.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2gphocs.py' x pyrad-3.0.66/pyrad/loci2mig.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2mig.py' x pyrad-3.0.66/pyrad/loci2phynex.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2phynex.py' x pyrad-3.0.66/pyrad/loci2treemix.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2treemix.py' x pyrad-3.0.66/pyrad/loci2vcf.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/loci2vcf.py' x pyrad-3.0.66/pyrad/overlapcheck.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/overlapcheck.py' x pyrad-3.0.66/pyrad/potpour.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/potpour.py' x pyrad-3.0.66/pyrad/pyRAD.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/pyRAD.py' x pyrad-3.0.66/pyrad/sortandcheck2.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/sortandcheck2.py' x pyrad-3.0.66/pyrad/tier2clust.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/pyrad/tier2clust.py' x pyrad-3.0.66/setup.py: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/setup.py' x pyrad-3.0.66/tox.ini: Failed to create dir 'pyrad-3.0.66'Can't create 'pyrad-3.0.66/tox.ini' tar: Error exit delayed from previous errors.
cd /usr/local/bioinformatics/pyrad-3.0.66/
/usr/local/bioinformatics/pyrad-3.0.66
ls
LICENSE README.rst pyrad/ setup.py tox.ini
setup.py install
File "<ipython-input-8-0964efb811bd>", line 1 setup.py install ^ SyntaxError: invalid syntax
ls
LICENSE README.rst build/ dist/ pyrad/ pyrad.egg-info/ setup.py tox.ini
cd ..
/usr/local/bioinformatics
%%bash
wget http://www.drive5.com/muscle/downloads3.8.31/muscle3.8.31_i86darwin64.tar.gz
--2016-04-18 08:04:19-- http://www.drive5.com/muscle/downloads3.8.31/muscle3.8.31_i86darwin64.tar.gz Resolving www.drive5.com... 205.196.220.130 Connecting to www.drive5.com|205.196.220.130|:80... connected. HTTP request sent, awaiting response... 200 OK Length: 166130 (162K) [application/x-tar] muscle3.8.31_i86darwin64.tar.gz: Permission denied Cannot write to 'muscle3.8.31_i86darwin64.tar.gz' (Success).
%%bash
tar -zxvf muscle3.8.31_i86darwin64.tar.gz
x muscle3.8.31_i86darwin64: Can't create 'muscle3.8.31_i86darwin64' tar: Error exit delayed from previous errors.
ls
FastQC/ bowtie@ hmmer@ ngsplot-2.08/ stacks-1.13/ Trimmomatic@ bowtie-1.0.0/ hmmer-3.1b1/ pyrad-3.0.66/ stacks-1.37/ Trimmomatic-0.30/ bowtie2@ libgtextutils-0.6.1/ rsem@ stacks-1.37.tar.gz Trinotate@ bowtie2-2.1.0/ muscle3.8.31_i86darwin64* rsem-1.2.10/ tophat-2.0.13.OSX_x86_64/ Trinotate_r20140708/ cufflinks-2.2.1.OSX_x86_64/ muscle3.8.31_i86darwin64.tar.gz samtools-0.1.19/ trinity@ anaconda/ dbs/ ncbi-blast@ signalp-4.1/ trinityrnaseq_r20131110/ bedtools-2.22.1/ fastx_toolkit@ ncbi-blast-2.2.29+/ source.sh bismark_v0.14.2/ fastx_toolkit-0.0.13.2/ ngsplot@ stacks@
cd muscle3.8.31_i86darwin64
[Errno 20] Not a directory: 'muscle3.8.31_i86darwin64' /usr/local/bioinformatics
cat source.sh
# Variables to use the Bioinformatics directory export BIO=/usr/local/bioinformatics # Anaconda export PATH=${BIO}/anaconda/bin:${PATH} # PYTHONPATH for using R with iPython export PYTHONPATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages # BLAST export PATH=${BIO}/ncbi-blast/bin:${PATH} export BLASTDB=/Volumes/Data/blast_db # Trinity export PATH=${BIO}/trinity:${PATH} # HMMER export PATH=${BIO}/hmmer:${PATH} # Bowtie1 export PATH=${BIO}/bowtie:${PATH} # Bowtie2 export PATH=${BIO}/bowtie2:${PATH} # RSEM export PATH=${BIO}/rsem:${PATH} # Khmer tools export PATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/bin:${PATH} # FASTX toolkit export PATH=${BIO}/fastx_toolkit/bin:${PATH} # Samtools (via RSEM) export PATH=${BIO}/rsem/sam:${PATH} # stacks export PATH=${BIO}/stacks/bin:${PATH} # Trimmomatic alias trimmomatic="java -jar ${BIO}/Trimmomatic/trimmomatic-0.30.jar" # ngsplot export PATH=${BIO}/ngsplot/bin:${PATH} export NGSPLOT=${BIO}/ngsplot # Trinotate export PATH=${BIO}/Trinotate:${PATH} # signalp export PATH=${BIO}/signalp-4.1:${PATH} # Tophat export PATH=${BIO}/tophat-2.0.13.OSX_x86_64:${PATH} # FastQC export PATH=${BIO}/FastQC:${PATH} # Cufflinks export PATH=${BIO}/cufflinks-2.2.1.OSX_x86_64:${PATH} # bedtools export PATH=${BIO}/bedtools-2.22.1/bin:${PATH} # bismark export PATH=${BIO}/bismark_v0.14.2:${PATH} # muscle export PATH=${BIO}/muscle3.8.31_i86darwin64:${PATH}
%%bash
wget https://github.com/torognes/vsearch/releases/download/v1.11.1/vsearch-1.11.1-osx-x86_64.tar.gz
--2016-04-18 08:15:13-- https://github.com/torognes/vsearch/releases/download/v1.11.1/vsearch-1.11.1-osx-x86_64.tar.gz Resolving github.com... 192.30.252.131 Connecting to github.com|192.30.252.131|:443... connected. HTTP request sent, awaiting response... 302 Found Location: https://github-cloud.s3.amazonaws.com/releases/19848353/5a974bd0-018b-11e6-810c-daa981b4e335.gz?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAISTNZFOVBIJMK3TQ%2F20160418%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20160418T151513Z&X-Amz-Expires=300&X-Amz-Signature=e3f2e38959bdb1a13e4a665eae70c0e1326e67e05c899107eb91e0aff543e5e6&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dvsearch-1.11.1-osx-x86_64.tar.gz&response-content-type=application%2Foctet-stream [following] --2016-04-18 08:15:13-- https://github-cloud.s3.amazonaws.com/releases/19848353/5a974bd0-018b-11e6-810c-daa981b4e335.gz?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAISTNZFOVBIJMK3TQ%2F20160418%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20160418T151513Z&X-Amz-Expires=300&X-Amz-Signature=e3f2e38959bdb1a13e4a665eae70c0e1326e67e05c899107eb91e0aff543e5e6&X-Amz-SignedHeaders=host&actor_id=0&response-content-disposition=attachment%3B%20filename%3Dvsearch-1.11.1-osx-x86_64.tar.gz&response-content-type=application%2Foctet-stream Resolving github-cloud.s3.amazonaws.com... 54.231.49.88 Connecting to github-cloud.s3.amazonaws.com|54.231.49.88|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 274585 (268K) [application/octet-stream] vsearch-1.11.1-osx-x86_64.tar.gz: Permission denied Cannot write to 'vsearch-1.11.1-osx-x86_64.tar.gz' (Success).
%%bash
tar -xzf vsearch-1.11.1-osx-x86_64.tar.gz
./._vsearch-1.11.1-osx-x86_64.rzY4Q7: Can't create '._vsearch-1.11.1-osx-x86_64.rzY4Q7' vsearch-1.11.1-osx-x86_64/: Can't update time for vsearch-1.11.1-osx-x86_64 vsearch-1.11.1-osx-x86_64/._bin.D23mUv: Can't create 'vsearch-1.11.1-osx-x86_64/._bin.D23mUv' vsearch-1.11.1-osx-x86_64/bin/: Can't create 'vsearch-1.11.1-osx-x86_64/bin' vsearch-1.11.1-osx-x86_64/._doc.HzJsUU: Can't create 'vsearch-1.11.1-osx-x86_64/._doc.HzJsUU' vsearch-1.11.1-osx-x86_64/doc/: Can't create 'vsearch-1.11.1-osx-x86_64/doc' vsearch-1.11.1-osx-x86_64/._LICENSE.txt.eqYDGZ: Can't create 'vsearch-1.11.1-osx-x86_64/._LICENSE.txt.eqYDGZ' vsearch-1.11.1-osx-x86_64/LICENSE.txt: Can't create 'vsearch-1.11.1-osx-x86_64/LICENSE.txt' vsearch-1.11.1-osx-x86_64/._LICENSE_GNU_GPL3.txt.M9efN0: Can't create 'vsearch-1.11.1-osx-x86_64/._LICENSE_GNU_GPL3.txt.M9efN0' vsearch-1.11.1-osx-x86_64/LICENSE_GNU_GPL3.txt: Can't create 'vsearch-1.11.1-osx-x86_64/LICENSE_GNU_GPL3.txt' vsearch-1.11.1-osx-x86_64/._man.ilEZyS: Can't create 'vsearch-1.11.1-osx-x86_64/._man.ilEZyS' vsearch-1.11.1-osx-x86_64/man/: Can't create 'vsearch-1.11.1-osx-x86_64/man' vsearch-1.11.1-osx-x86_64/._README.md.QPpbcs: Can't create 'vsearch-1.11.1-osx-x86_64/._README.md.QPpbcs' vsearch-1.11.1-osx-x86_64/README.md: Can't create 'vsearch-1.11.1-osx-x86_64/README.md' vsearch-1.11.1-osx-x86_64/man/._vsearch.1.fEowYE: Can't create 'vsearch-1.11.1-osx-x86_64/man/._vsearch.1.fEowYE' vsearch-1.11.1-osx-x86_64/man/vsearch.1: Can't create 'vsearch-1.11.1-osx-x86_64/man/vsearch.1' vsearch-1.11.1-osx-x86_64/doc/._vsearch_manual.pdf.hQbbyY: Can't create 'vsearch-1.11.1-osx-x86_64/doc/._vsearch_manual.pdf.hQbbyY' vsearch-1.11.1-osx-x86_64/doc/vsearch_manual.pdf: Can't create 'vsearch-1.11.1-osx-x86_64/doc/vsearch_manual.pdf' vsearch-1.11.1-osx-x86_64/bin/._vsearch.q3PzBD: Can't create 'vsearch-1.11.1-osx-x86_64/bin/._vsearch.q3PzBD' vsearch-1.11.1-osx-x86_64/bin/vsearch: Can't create 'vsearch-1.11.1-osx-x86_64/bin/vsearch' tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/bin/vsearch) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/doc/vsearch_manual.pdf) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/man/vsearch.1) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/README.md) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/man) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/LICENSE_GNU_GPL3.txt) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/LICENSE.txt) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/doc) failed: Permission denied tar: copyfile unpack (vsearch-1.11.1-osx-x86_64/bin) failed: Permission denied tar: copyfile unpack (./vsearch-1.11.1-osx-x86_64) failed: No such file or directory tar: Error exit delayed from previous errors.
ls
FastQC/ bowtie2@ ncbi-blast@ stacks@ Trimmomatic@ bowtie2-2.1.0/ ncbi-blast-2.2.29+/ stacks-1.13/ Trimmomatic-0.30/ cufflinks-2.2.1.OSX_x86_64/ ngsplot@ stacks-1.37/ Trinotate@ dbs/ ngsplot-2.08/ stacks-1.37.tar.gz Trinotate_r20140708/ fastx_toolkit@ pyrad-3.0.66/ tophat-2.0.13.OSX_x86_64/ anaconda/ fastx_toolkit-0.0.13.2/ rsem@ trinity@ bedtools-2.22.1/ hmmer@ rsem-1.2.10/ trinityrnaseq_r20131110/ bismark_v0.14.2/ hmmer-3.1b1/ samtools-0.1.19/ vsearch-1.11.1-osx-x86_64/ bowtie@ libgtextutils-0.6.1/ signalp-4.1/ vsearch-1.11.1-osx-x86_64.tar.gz bowtie-1.0.0/ muscle3.8.31_i86darwin64* source.sh
cat source.sh
# Variables to use the Bioinformatics directory export BIO=/usr/local/bioinformatics # Anaconda export PATH=${BIO}/anaconda/bin:${PATH} # PYTHONPATH for using R with iPython export PYTHONPATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages # BLAST export PATH=${BIO}/ncbi-blast/bin:${PATH} export BLASTDB=/Volumes/Data/blast_db # Trinity export PATH=${BIO}/trinity:${PATH} # HMMER export PATH=${BIO}/hmmer:${PATH} # Bowtie1 export PATH=${BIO}/bowtie:${PATH} # Bowtie2 export PATH=${BIO}/bowtie2:${PATH} # RSEM export PATH=${BIO}/rsem:${PATH} # Khmer tools export PATH=/opt/local/Library/Frameworks/Python.framework/Versions/2.7/bin:${PATH} # FASTX toolkit export PATH=${BIO}/fastx_toolkit/bin:${PATH} # Samtools (via RSEM) export PATH=${BIO}/rsem/sam:${PATH} # stacks export PATH=${BIO}/stacks/bin:${PATH} # Trimmomatic alias trimmomatic="java -jar ${BIO}/Trimmomatic/trimmomatic-0.30.jar" # ngsplot export PATH=${BIO}/ngsplot/bin:${PATH} export NGSPLOT=${BIO}/ngsplot # Trinotate export PATH=${BIO}/Trinotate:${PATH} # signalp export PATH=${BIO}/signalp-4.1:${PATH} # Tophat export PATH=${BIO}/tophat-2.0.13.OSX_x86_64:${PATH} # FastQC export PATH=${BIO}/FastQC:${PATH} # Cufflinks export PATH=${BIO}/cufflinks-2.2.1.OSX_x86_64:${PATH} # bedtools export PATH=${BIO}/bedtools-2.22.1/bin:${PATH} # bismark export PATH=${BIO}/bismark_v0.14.2:${PATH} # muscle export PATH=${BIO}/muscle3.8.31_i86darwin64:${PATH} # vsearch export PATH=${BIO}/vsearch-1.11.1-osx-x86_64/bin:${PATH}
Paired-end GBS (or similarly, paired Ez-RAD) data may commonly contain first and second reads that overlap to some degree and therefore should be merged. This notebook will outline how to analyze such data in pyRAD (v.3.03 or newer). I will demonstrate this example using an empirical data set of mine. To begin, we will create an empty directory within our current directory in which to perform the assembly.
cd /Volumes/owl/temp/
/Volumes/owl/temp
%%bash
mkdir oly_gbs_pyrad
cd /Volumes/owl/temp/oly_gbs_pyrad/
/Volumes/owl/temp/oly_gbs_pyrad
%%bash
mkdir -p analysis_pyrad
ls
analysis_pyrad/
%%bash
time cp /Volumes/owl_web/nightingales/O_lurida/20160223_gbs/*_{1..10}A_*.fq.gz .
real 6m26.803s user 0m0.028s sys 0m42.577s
ls
analysis_pyrad/ params.txt
Next we create an empty params file by calling pyRAD with the -n option.
%%bash
pyrad -n
new params.txt file created
Then we enter some arguments into the params file, which can be done using any text editor. I use the script below to automate it here. Some important arguments for dealing with PE-GBS data in particular include the following:
Set the correct datatype. In the case of this analysis, we will use 'pairgbs' for de-multiplexing in step 1, but then switch it to 'merged' for steps 2-7 after we merge the paired reads.
The filter setting (param 21) is not needed in this case because we will be using an external read merging program that applies this filtering.
We will, however, still probably want to trim the overhanging edges of reads (param 29), which helps to remove barcodes or adapters that were missed.
It can also be helpful with this data type to use a low mindepth setting in conjunction with majority rule consensus base calls (param 31). This will tell pyrad to make statistical base calls for all sites with depth >mindepth, but to make majority rule base calls at sites with depth lower than mindepth, but higher than the majority rule minimum depth. I first run the analysis with the option turned off, and then at the end of the tutorial show the difference with it turned on.
cat params.txt
==** parameter inputs for pyRAD version 3.0.66 **======================== affected step == ./ ## 1. Working directory (all) ## 2. Loc. of non-demultiplexed files (if not line 18) (s1) ## 3. Loc. of barcode file (if not line 18) (s1) /usr/local/bioinformatics/vsearch-1.11.1-osx-x86_64/bin/vsearch ## 4. command (or path) to call vsearch (or usearch) (s3,s6) /usr/local/bioinformatics/muscle3.8.31_i86darwin64 ## 5. command (or path) to call muscle (s3,s7) CWGC ## 6. Restriction overhang (e.g., C|TGCAG -> TGCAG) (s1,s2) 16 ## 7. N processors (parallel) (all) 6 ## 8. Mindepth: min coverage for a cluster (s4,s5) 4 ## 9. NQual: max # sites with qual < 20 (or see line 20)(s2) .88 ## 10. Wclust: clustering threshold as a decimal (s3,s6) pairgbs ## 11. Datatype: rad,gbs,pairgbs,pairddrad,(others:see docs)(all) 4 ## 12. MinCov: min samples in a final locus (s7) 3 ## 13. MaxSH: max inds with shared hetero site (s7) oly_gbs_pyrad ## 14. Prefix name for final output (no spaces) (s7) ==== optional params below this line =================================== affected step == ## 15.opt.: select subset (prefix* only selector) (s2-s7) ## 16.opt.: add-on (outgroup) taxa (list or prefix*) (s6,s7) ## 17.opt.: exclude taxa (list or prefix*) (s7) /Volumes/nightingales/O_lurida/20160223_gbs/*.fq.gz ## 18.opt.: loc. of de-multiplexed data (s2) ## 19.opt.: maxM: N mismatches in barcodes (def= 1) (s1) ## 20.opt.: phred Qscore offset (def= 33) (s2) 2 ## 21.opt.: filter: def=0=NQual 1=NQual+adapters. 2=strict (s2) ## 22.opt.: a priori E,H (def= 0.001,0.01, if not estimated) (s5) 4 ## 23.opt.: maxN: max Ns in a cons seq (def=5) (s5) 8 ## 24.opt.: maxH: max heterozyg. sites in cons seq (def=5) (s5) ## 25.opt.: ploidy: max alleles in cons seq (def=2;see docs) (s4,s5) ## 26.opt.: maxSNPs: (def=100). Paired (def=100,100) (s7) ## 27.opt.: maxIndels: within-clust,across-clust (def. 3,99) (s3,s7) ## 28.opt.: random number seed (def. 112233) (s3,s6,s7) ## 29.opt.: trim overhang left,right on final loci, def(0,0) (s7) * ## 30.opt.: output formats: p,n,a,s,v,u,t,m,k,g,* (see docs) (s7) ## 31.opt.: maj. base call at depth>x<mindepth (def.x=mindepth) (s5) 50 ## 32.opt.: keep trimmed reads (def=0). Enter min length. (s2) ## 33.opt.: max stack size (int), def= max(500,mean+2*SD) (s3) ## 34.opt.: minDerep: exclude dereps with <= N copies, def=1 (s3) ## 35.opt.: use hierarchical clustering (def.=0, 1=yes) (s6) ## 36.opt.: repeat masking (def.=1='dust' method, 0=no) (s3,s6) 16 ## 37.opt.: vsearch max threads per job (def.=6; see docs) (s3,s6) ==== optional: list group/clade assignments below this line (see docs) ==================
%%bash
time pyrad -p params.txt -s 1
skipping step 1: line 18 of input file shows seqs already sorted
------------------------------------------------------------ pyRAD : RADseq for phylogenetics & introgression analyses ------------------------------------------------------------ real 0m0.500s user 0m0.389s sys 0m0.104s
It looks like most reads from each file were matched to a barcode.
%%bash
head -n 100 analysis_pyrad/stats/s1.sorting.txt
head: analysis_pyrad/stats/s1.sorting.txt: No such file or directory
For this we will use the program PEAR. This step is very important. I can pretty much assure you that you will have many paired end reads in your library that overlap, and if you do not merge these reads they can really mess up your analysis. In this case the vast majority of paired reads are overlapping (~80%), and in fact, the merged reads are the only portion of the data that we will be using for analysis.
The first thing we have to do is unzip the reads if they are gzipped. If you demultiplexed your data in pyrad they will be in the fastq/ directory.
%%bash
mv *.gz /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/
ls /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/
1HL_10A_1.fq.gz* 1HL_3A_2.fq.gz* 1HL_7A_1.fq.gz* 1NF_10A_2.fq.gz* 1NF_5A_1.fq.gz* 1NF_8A_2.fq.gz* 1SN_2A_1.fq.gz* 1SN_5A_2.fq.gz* 1SN_9A_1.fq.gz* 1HL_10A_2.fq.gz* 1HL_4A_1.fq.gz* 1HL_7A_2.fq.gz* 1NF_1A_1.fq.gz* 1NF_5A_2.fq.gz* 1NF_9A_1.fq.gz* 1SN_2A_2.fq.gz* 1SN_6A_1.fq.gz* 1SN_9A_2.fq.gz* 1HL_1A_1.fq.gz* 1HL_4A_2.fq.gz* 1HL_8A_1.fq.gz* 1NF_1A_2.fq.gz* 1NF_6A_1.fq.gz* 1NF_9A_2.fq.gz* 1SN_3A_1.fq.gz* 1SN_6A_2.fq.gz* 1HL_1A_2.fq.gz* 1HL_5A_1.fq.gz* 1HL_8A_2.fq.gz* 1NF_2A_1.fq.gz* 1NF_6A_2.fq.gz* 1SN_10A_1.fq.gz* 1SN_3A_2.fq.gz* 1SN_7A_1.fq.gz* 1HL_2A_1.fq.gz* 1HL_5A_2.fq.gz* 1HL_9A_1.fq.gz* 1NF_2A_2.fq.gz* 1NF_7A_1.fq.gz* 1SN_10A_2.fq.gz* 1SN_4A_1.fq.gz* 1SN_7A_2.fq.gz* 1HL_2A_2.fq.gz* 1HL_6A_1.fq.gz* 1HL_9A_2.fq.gz* 1NF_4A_1.fq.gz* 1NF_7A_2.fq.gz* 1SN_1A_1.fq.gz* 1SN_4A_2.fq.gz* 1SN_8A_1.fq.gz* 1HL_3A_1.fq.gz* 1HL_6A_2.fq.gz* 1NF_10A_1.fq.gz* 1NF_4A_2.fq.gz* 1NF_8A_1.fq.gz* 1SN_1A_2.fq.gz* 1SN_5A_1.fq.gz* 1SN_8A_2.fq.gz*
mkdir /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq
%%bash
mv /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/*.gz /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq
ls /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq
1HL_10A_1.fq.gz* 1HL_3A_2.fq.gz* 1HL_7A_1.fq.gz* 1NF_10A_2.fq.gz* 1NF_5A_1.fq.gz* 1NF_8A_2.fq.gz* 1SN_2A_1.fq.gz* 1SN_5A_2.fq.gz* 1SN_9A_1.fq.gz* 1HL_10A_2.fq.gz* 1HL_4A_1.fq.gz* 1HL_7A_2.fq.gz* 1NF_1A_1.fq.gz* 1NF_5A_2.fq.gz* 1NF_9A_1.fq.gz* 1SN_2A_2.fq.gz* 1SN_6A_1.fq.gz* 1SN_9A_2.fq.gz* 1HL_1A_1.fq.gz* 1HL_4A_2.fq.gz* 1HL_8A_1.fq.gz* 1NF_1A_2.fq.gz* 1NF_6A_1.fq.gz* 1NF_9A_2.fq.gz* 1SN_3A_1.fq.gz* 1SN_6A_2.fq.gz* 1HL_1A_2.fq.gz* 1HL_5A_1.fq.gz* 1HL_8A_2.fq.gz* 1NF_2A_1.fq.gz* 1NF_6A_2.fq.gz* 1SN_10A_1.fq.gz* 1SN_3A_2.fq.gz* 1SN_7A_1.fq.gz* 1HL_2A_1.fq.gz* 1HL_5A_2.fq.gz* 1HL_9A_1.fq.gz* 1NF_2A_2.fq.gz* 1NF_7A_1.fq.gz* 1SN_10A_2.fq.gz* 1SN_4A_1.fq.gz* 1SN_7A_2.fq.gz* 1HL_2A_2.fq.gz* 1HL_6A_1.fq.gz* 1HL_9A_2.fq.gz* 1NF_4A_1.fq.gz* 1NF_7A_2.fq.gz* 1SN_1A_1.fq.gz* 1SN_4A_2.fq.gz* 1SN_8A_1.fq.gz* 1HL_3A_1.fq.gz* 1HL_6A_2.fq.gz* 1NF_10A_1.fq.gz* 1NF_4A_2.fq.gz* 1NF_8A_1.fq.gz* 1SN_1A_2.fq.gz* 1SN_5A_1.fq.gz* 1SN_8A_2.fq.gz*
%%bash
## For this example I subselect only the samples from
## this library that start with the letter 'd'
time gunzip analysis_pyrad/fastq/*.gz
gunzip: analysis_pyrad/fastq/1HL_7A_1.fq already exists -- skipping real 13m38.673s user 2m26.466s sys 1m35.531s
%%bash
echo analysis_pyrad/fastq/*
analysis_pyrad/fastq/1HL_10A_1.fq analysis_pyrad/fastq/1HL_10A_2.fq analysis_pyrad/fastq/1HL_1A_1.fq analysis_pyrad/fastq/1HL_1A_2.fq analysis_pyrad/fastq/1HL_2A_1.fq analysis_pyrad/fastq/1HL_2A_2.fq analysis_pyrad/fastq/1HL_3A_1.fq analysis_pyrad/fastq/1HL_3A_2.fq analysis_pyrad/fastq/1HL_4A_1.fq analysis_pyrad/fastq/1HL_4A_2.fq analysis_pyrad/fastq/1HL_5A_1.fq analysis_pyrad/fastq/1HL_5A_2.fq analysis_pyrad/fastq/1HL_6A_1.fq analysis_pyrad/fastq/1HL_6A_2.fq analysis_pyrad/fastq/1HL_7A_1.fq analysis_pyrad/fastq/1HL_7A_1.fq.gz analysis_pyrad/fastq/1HL_7A_2.fq analysis_pyrad/fastq/1HL_8A_1.fq analysis_pyrad/fastq/1HL_8A_2.fq analysis_pyrad/fastq/1HL_9A_1.fq analysis_pyrad/fastq/1HL_9A_2.fq analysis_pyrad/fastq/1NF_10A_1.fq analysis_pyrad/fastq/1NF_10A_2.fq analysis_pyrad/fastq/1NF_1A_1.fq analysis_pyrad/fastq/1NF_1A_2.fq analysis_pyrad/fastq/1NF_2A_1.fq analysis_pyrad/fastq/1NF_2A_2.fq analysis_pyrad/fastq/1NF_4A_1.fq analysis_pyrad/fastq/1NF_4A_2.fq analysis_pyrad/fastq/1NF_5A_1.fq analysis_pyrad/fastq/1NF_5A_2.fq analysis_pyrad/fastq/1NF_6A_1.fq analysis_pyrad/fastq/1NF_6A_2.fq analysis_pyrad/fastq/1NF_7A_1.fq analysis_pyrad/fastq/1NF_7A_2.fq analysis_pyrad/fastq/1NF_8A_1.fq analysis_pyrad/fastq/1NF_8A_2.fq analysis_pyrad/fastq/1NF_9A_1.fq analysis_pyrad/fastq/1NF_9A_2.fq analysis_pyrad/fastq/1SN_10A_1.fq analysis_pyrad/fastq/1SN_10A_2.fq analysis_pyrad/fastq/1SN_1A_1.fq analysis_pyrad/fastq/1SN_1A_2.fq analysis_pyrad/fastq/1SN_2A_1.fq analysis_pyrad/fastq/1SN_2A_2.fq analysis_pyrad/fastq/1SN_3A_1.fq analysis_pyrad/fastq/1SN_3A_2.fq analysis_pyrad/fastq/1SN_4A_1.fq analysis_pyrad/fastq/1SN_4A_2.fq analysis_pyrad/fastq/1SN_5A_1.fq analysis_pyrad/fastq/1SN_5A_2.fq analysis_pyrad/fastq/1SN_6A_1.fq analysis_pyrad/fastq/1SN_6A_2.fq analysis_pyrad/fastq/1SN_7A_1.fq analysis_pyrad/fastq/1SN_7A_2.fq analysis_pyrad/fastq/1SN_8A_1.fq analysis_pyrad/fastq/1SN_8A_2.fq analysis_pyrad/fastq/1SN_9A_1.fq analysis_pyrad/fastq/1SN_9A_2.fq
I prefer using a stringent filter setting, including the -q
option which trims parts of the read that are low quality. This often results in removing all reads that are not merged. You can assemble merged and non-merged reads separately, as described in the 'non-merged PE-GBS tutorial', but here we will focus on just analyzing the merged reads.
%%bash
for gfile in analysis_pyrad/fastq/*_1.fq;
do pear -f $gfile \
-r ${gfile/_1.fq/_2.fq} \
-o ${gfile/_1.fq/} \
-n 33 \
-t 33 \
-q 10 \
-j 20 >> pear.log 2>&1;
done
bash: line 1: 2407 Abort trap: 6 pear -f $gfile -r ${gfile/_1.fq/_2.fq} -o ${gfile/_1.fq/} -n 33 -t 33 -q 10 -j 20 >> pear.log 2>&1
cd ./analysis_pyrad/fastq/
/Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq
ls
1HL_10A.assembled.fastq 1HL_7A.unassembled.forward.fastq 1NF_5A.unassembled.reverse.fastq 1SN_2A_2.fq* 1HL_10A.discarded.fastq 1HL_7A.unassembled.reverse.fastq 1NF_5A_1.fq* 1SN_3A.assembled.fastq 1HL_10A.unassembled.forward.fastq 1HL_7A_1.fq 1NF_5A_2.fq* 1SN_3A.discarded.fastq 1HL_10A.unassembled.reverse.fastq 1HL_7A_1.fq.gz* 1NF_6A.assembled.fastq 1SN_3A.unassembled.forward.fastq 1HL_10A_1.fq* 1HL_7A_2.fq* 1NF_6A.discarded.fastq 1SN_3A.unassembled.reverse.fastq 1HL_10A_2.fq* 1HL_8A.assembled.fastq 1NF_6A.unassembled.forward.fastq 1SN_3A_1.fq* 1HL_1A.assembled.fastq 1HL_8A.discarded.fastq 1NF_6A.unassembled.reverse.fastq 1SN_3A_2.fq* 1HL_1A.discarded.fastq 1HL_8A.unassembled.forward.fastq 1NF_6A_1.fq* 1SN_4A.assembled.fastq 1HL_1A.unassembled.forward.fastq 1HL_8A.unassembled.reverse.fastq 1NF_6A_2.fq* 1SN_4A.discarded.fastq 1HL_1A.unassembled.reverse.fastq 1HL_8A_1.fq* 1NF_7A.assembled.fastq 1SN_4A.unassembled.forward.fastq 1HL_1A_1.fq* 1HL_8A_2.fq* 1NF_7A.discarded.fastq 1SN_4A.unassembled.reverse.fastq 1HL_1A_2.fq* 1HL_9A.assembled.fastq 1NF_7A.unassembled.forward.fastq 1SN_4A_1.fq* 1HL_2A.assembled.fastq 1HL_9A.discarded.fastq 1NF_7A.unassembled.reverse.fastq 1SN_4A_2.fq* 1HL_2A.discarded.fastq 1HL_9A.unassembled.forward.fastq 1NF_7A_1.fq* 1SN_5A.assembled.fastq 1HL_2A.unassembled.forward.fastq 1HL_9A.unassembled.reverse.fastq 1NF_7A_2.fq* 1SN_5A.discarded.fastq 1HL_2A.unassembled.reverse.fastq 1HL_9A_1.fq* 1NF_8A.assembled.fastq 1SN_5A.unassembled.forward.fastq 1HL_2A_1.fq* 1HL_9A_2.fq* 1NF_8A.discarded.fastq 1SN_5A.unassembled.reverse.fastq 1HL_2A_2.fq* 1NF_10A.assembled.fastq 1NF_8A.unassembled.forward.fastq 1SN_5A_1.fq* 1HL_3A.assembled.fastq 1NF_10A.discarded.fastq 1NF_8A.unassembled.reverse.fastq 1SN_5A_2.fq* 1HL_3A.discarded.fastq 1NF_10A.unassembled.forward.fastq 1NF_8A_1.fq* 1SN_6A.assembled.fastq 1HL_3A.unassembled.forward.fastq 1NF_10A.unassembled.reverse.fastq 1NF_8A_2.fq* 1SN_6A.discarded.fastq 1HL_3A.unassembled.reverse.fastq 1NF_10A_1.fq* 1NF_9A.assembled.fastq 1SN_6A.unassembled.forward.fastq 1HL_3A_1.fq* 1NF_10A_2.fq* 1NF_9A.discarded.fastq 1SN_6A.unassembled.reverse.fastq 1HL_3A_2.fq* 1NF_1A.assembled.fastq 1NF_9A.unassembled.forward.fastq 1SN_6A_1.fq* 1HL_4A.assembled.fastq 1NF_1A.discarded.fastq 1NF_9A.unassembled.reverse.fastq 1SN_6A_2.fq* 1HL_4A.discarded.fastq 1NF_1A.unassembled.forward.fastq 1NF_9A_1.fq* 1SN_7A.assembled.fastq 1HL_4A.unassembled.forward.fastq 1NF_1A.unassembled.reverse.fastq 1NF_9A_2.fq* 1SN_7A.discarded.fastq 1HL_4A.unassembled.reverse.fastq 1NF_1A_1.fq* 1SN_10A.assembled.fastq 1SN_7A.unassembled.forward.fastq 1HL_4A_1.fq* 1NF_1A_2.fq* 1SN_10A.discarded.fastq 1SN_7A.unassembled.reverse.fastq 1HL_4A_2.fq* 1NF_2A.assembled.fastq 1SN_10A.unassembled.forward.fastq 1SN_7A_1.fq* 1HL_5A.assembled.fastq 1NF_2A.discarded.fastq 1SN_10A.unassembled.reverse.fastq 1SN_7A_2.fq* 1HL_5A.discarded.fastq 1NF_2A.unassembled.forward.fastq 1SN_10A_1.fq* 1SN_8A.assembled.fastq 1HL_5A.unassembled.forward.fastq 1NF_2A.unassembled.reverse.fastq 1SN_10A_2.fq* 1SN_8A.discarded.fastq 1HL_5A.unassembled.reverse.fastq 1NF_2A_1.fq* 1SN_1A.assembled.fastq 1SN_8A.unassembled.forward.fastq 1HL_5A_1.fq* 1NF_2A_2.fq* 1SN_1A.discarded.fastq 1SN_8A.unassembled.reverse.fastq 1HL_5A_2.fq* 1NF_4A.assembled.fastq 1SN_1A.unassembled.forward.fastq 1SN_8A_1.fq* 1HL_6A.assembled.fastq 1NF_4A.discarded.fastq 1SN_1A.unassembled.reverse.fastq 1SN_8A_2.fq* 1HL_6A.discarded.fastq 1NF_4A.unassembled.forward.fastq 1SN_1A_1.fq* 1SN_9A.assembled.fastq 1HL_6A.unassembled.forward.fastq 1NF_4A.unassembled.reverse.fastq 1SN_1A_2.fq* 1SN_9A.discarded.fastq 1HL_6A.unassembled.reverse.fastq 1NF_4A_1.fq* 1SN_2A.assembled.fastq 1SN_9A.unassembled.forward.fastq 1HL_6A_1.fq* 1NF_4A_2.fq* 1SN_2A.discarded.fastq 1SN_9A.unassembled.reverse.fastq 1HL_6A_2.fq* 1NF_5A.assembled.fastq 1SN_2A.unassembled.forward.fastq 1SN_9A_1.fq* 1HL_7A.assembled.fastq 1NF_5A.discarded.fastq 1SN_2A.unassembled.reverse.fastq 1SN_9A_2.fq* 1HL_7A.discarded.fastq 1NF_5A.unassembled.forward.fastq 1SN_2A_1.fq*
%%bash
## set location of demultiplexed data that are 'pear' filtered
sed -i '/## 18./c\analysis_pyrad/fastq/*.assembled.fastq ## 18. demulti data loc ' ./params.txt
sed: 1: "./params.txt": invalid command code .
cd /Volumes/owl/temp/oly_gbs_pyrad/
/Volumes/owl/temp/oly_gbs_pyrad
%%bash
cat params.txt
==** parameter inputs for pyRAD version 3.0.66 **======================== affected step == ./ ## 1. Working directory (all) ## 2. Loc. of non-demultiplexed files (if not line 18) (s1) ## 3. Loc. of barcode file (if not line 18) (s1) /usr/local/bioinformatics/vsearch-1.11.1-osx-x86_64/bin/vsearch ## 4. command (or path) to call vsearch (or usearch) (s3,s6) /usr/local/bioinformatics/muscle3.8.31_i86darwin64 ## 5. command (or path) to call muscle (s3,s7) CWGC ## 6. Restriction overhang (e.g., C|TGCAG -> TGCAG) (s1,s2) 16 ## 7. N processors (parallel) (all) 6 ## 8. Mindepth: min coverage for a cluster (s4,s5) 4 ## 9. NQual: max # sites with qual < 20 (or see line 20)(s2) .88 ## 10. Wclust: clustering threshold as a decimal (s3,s6) pairgbs ## 11. Datatype: rad,gbs,pairgbs,pairddrad,(others:see docs)(all) 4 ## 12. MinCov: min samples in a final locus (s7) 3 ## 13. MaxSH: max inds with shared hetero site (s7) oly_gbs_pyrad ## 14. Prefix name for final output (no spaces) (s7) ==== optional params below this line =================================== affected step == ## 15.opt.: select subset (prefix* only selector) (s2-s7) ## 16.opt.: add-on (outgroup) taxa (list or prefix*) (s6,s7) ## 17.opt.: exclude taxa (list or prefix*) (s7) /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq/*.assembled.fastq ## 18.opt.: loc. of de-multiplexed data (s2) ## 19.opt.: maxM: N mismatches in barcodes (def= 1) (s1) ## 20.opt.: phred Qscore offset (def= 33) (s2) 2 ## 21.opt.: filter: def=0=NQual 1=NQual+adapters. 2=strict (s2) ## 22.opt.: a priori E,H (def= 0.001,0.01, if not estimated) (s5) 4 ## 23.opt.: maxN: max Ns in a cons seq (def=5) (s5) 8 ## 24.opt.: maxH: max heterozyg. sites in cons seq (def=5) (s5) ## 25.opt.: ploidy: max alleles in cons seq (def=2;see docs) (s4,s5) ## 26.opt.: maxSNPs: (def=100). Paired (def=100,100) (s7) ## 27.opt.: maxIndels: within-clust,across-clust (def. 3,99) (s3,s7) ## 28.opt.: random number seed (def. 112233) (s3,s6,s7) ## 29.opt.: trim overhang left,right on final loci, def(0,0) (s7) * ## 30.opt.: output formats: p,n,a,s,v,u,t,m,k,g,* (see docs) (s7) ## 31.opt.: maj. base call at depth>x<mindepth (def.x=mindepth) (s5) 50 ## 32.opt.: keep trimmed reads (def=0). Enter min length. (s2) ## 33.opt.: max stack size (int), def= max(500,mean+2*SD) (s3) ## 34.opt.: minDerep: exclude dereps with <= N copies, def=1 (s3) ## 35.opt.: use hierarchical clustering (def.=0, 1=yes) (s6) ## 36.opt.: repeat masking (def.=1='dust' method, 0=no) (s3,s6) 16 ## 37.opt.: vsearch max threads per job (def.=6; see docs) (s3,s6) ==== optional: list group/clade assignments below this line (see docs) ==================
%%bash
cat params.txt
==** parameter inputs for pyRAD version 3.0.66 **======================== affected step == ./ ## 1. Working directory (all) ## 2. Loc. of non-demultiplexed files (if not line 18) (s1) ## 3. Loc. of barcode file (if not line 18) (s1) /usr/local/bioinformatics/vsearch-1.11.1-osx-x86_64/bin/vsearch ## 4. command (or path) to call vsearch (or usearch) (s3,s6) /usr/local/bioinformatics/muscle3.8.31_i86darwin64 ## 5. command (or path) to call muscle (s3,s7) CWGC ## 6. Restriction overhang (e.g., C|TGCAG -> TGCAG) (s1,s2) 16 ## 7. N processors (parallel) (all) 6 ## 8. Mindepth: min coverage for a cluster (s4,s5) 4 ## 9. NQual: max # sites with qual < 20 (or see line 20)(s2) .88 ## 10. Wclust: clustering threshold as a decimal (s3,s6) merged ## 11. Datatype: rad,gbs,pairgbs,pairddrad,(others:see docs)(all) 4 ## 12. MinCov: min samples in a final locus (s7) 3 ## 13. MaxSH: max inds with shared hetero site (s7) oly_gbs_pyrad ## 14. Prefix name for final output (no spaces) (s7) ==== optional params below this line =================================== affected step == ## 15.opt.: select subset (prefix* only selector) (s2-s7) ## 16.opt.: add-on (outgroup) taxa (list or prefix*) (s6,s7) ## 17.opt.: exclude taxa (list or prefix*) (s7) /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq/*.assembled.fastq ## 18.opt.: loc. of de-multiplexed data (s2) ## 19.opt.: maxM: N mismatches in barcodes (def= 1) (s1) ## 20.opt.: phred Qscore offset (def= 33) (s2) 2 ## 21.opt.: filter: def=0=NQual 1=NQual+adapters. 2=strict (s2) ## 22.opt.: a priori E,H (def= 0.001,0.01, if not estimated) (s5) 4 ## 23.opt.: maxN: max Ns in a cons seq (def=5) (s5) 8 ## 24.opt.: maxH: max heterozyg. sites in cons seq (def=5) (s5) ## 25.opt.: ploidy: max alleles in cons seq (def=2;see docs) (s4,s5) ## 26.opt.: maxSNPs: (def=100). Paired (def=100,100) (s7) ## 27.opt.: maxIndels: within-clust,across-clust (def. 3,99) (s3,s7) ## 28.opt.: random number seed (def. 112233) (s3,s6,s7) ## 29.opt.: trim overhang left,right on final loci, def(0,0) (s7) * ## 30.opt.: output formats: p,n,a,s,v,u,t,m,k,g,* (see docs) (s7) ## 31.opt.: maj. base call at depth>x<mindepth (def.x=mindepth) (s5) 50 ## 32.opt.: keep trimmed reads (def=0). Enter min length. (s2) ## 33.opt.: max stack size (int), def= max(500,mean+2*SD) (s3) ## 34.opt.: minDerep: exclude dereps with <= N copies, def=1 (s3) ## 35.opt.: use hierarchical clustering (def.=0, 1=yes) (s6) ## 36.opt.: repeat masking (def.=1='dust' method, 0=no) (s3,s6) 16 ## 37.opt.: vsearch max threads per job (def.=6; see docs) (s3,s6) ==== optional: list group/clade assignments below this line (see docs) ==================
%%bash
time pyrad -p params.txt -s 2
skipping /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq/1HL_7A.assembled.fastq , file is empty
------------------------------------------------------------ pyRAD : RADseq for phylogenetics & introgression analyses ------------------------------------------------------------ sorted .fastq from /Volumes/owl/temp/oly_gbs_pyrad/analysis_pyrad/fastq/*.assembled.fastq being used step 2: editing raw reads ............................ real 36m39.119s user 346m20.177s sys 2m17.116s
ls
analysis_pyrad/ edits/ params.txt pear.log stats/
%%bash
cat stats/s2.rawedit.txt
sample Nreads passed passed.w.trim passed.total 1HL_10A.assembled 2507372 1896970 283645 2180615 1HL_1A.assembled 1773097 1354799 195229 1550028 1HL_2A.assembled 2171914 1651420 245434 1896854 1HL_3A.assembled 1732554 1315904 194042 1509946 1HL_4A.assembled 2014361 1531652 227437 1759089 1HL_5A.assembled 1545655 1179051 171115 1350166 1HL_6A.assembled 1596112 1202761 185711 1388472 1HL_8A.assembled 1979026 1502255 221401 1723656 1HL_9A.assembled 2215196 1678429 252747 1931176 1NF_10A.assembled 1671753 1267635 193913 1461548 1NF_1A.assembled 1045115 791346 122974 914320 1NF_2A.assembled 2712241 2051426 315164 2366590 1NF_4A.assembled 2817468 2133772 328632 2462404 1NF_5A.assembled 2159619 1629567 254774 1884341 1NF_6A.assembled 1898743 1441365 219674 1661039 1NF_7A.assembled 2020526 1535799 232861 1768660 1NF_8A.assembled 2152984 1631583 251061 1882644 1NF_9A.assembled 2646866 2006706 306781 2313487 1SN_10A.assembled 2014213 1536796 225508 1762304 1SN_1A.assembled 2361481 1794932 270753 2065685 1SN_2A.assembled 2407383 1839713 270415 2110128 1SN_3A.assembled 1899774 1444121 216316 1660437 1SN_4A.assembled 2608511 1969748 302881 2272629 1SN_5A.assembled 2363176 1792769 268603 2061372 1SN_6A.assembled 2640957 2011414 299080 2310494 1SN_7A.assembled 2335159 1767126 267129 2034255 1SN_8A.assembled 2575057 1952134 295156 2247290 1SN_9A.assembled 2511951 1897343 291717 2189060 Nreads = total number of reads for a sample passed = retained reads that passed quality filtering at full length passed.w.trim= retained reads that were trimmed due to detection of adapters passed.total = total kept reads of sufficient length note: you can set the option in params file to include trimmed reads of xx length.
%%bash
pyrad -p params.txt -s 3
------------------------------------------------------------ pyRAD : RADseq for phylogenetics & introgression analyses ------------------------------------------------------------ de-replicating files for clustering... step 3: within-sample clustering of 28 samples at '.88' similarity. Running 16 parallel jobs with up to 16 threads per job. If needed, adjust to avoid CPU and MEM limits sample 1HL_2A finished, 806060 loci sample 1HL_9A finished, 843114 loci sample 1SN_5A finished, 897389 loci sample 1NF_5A finished, 839831 loci sample 1SN_7A finished, 884633 loci sample 1NF_8A finished, 834461 loci sample 1SN_9A finished, 949018 loci sample 1SN_2A finished, 897159 loci sample 1SN_8A finished, 961067 loci sample 1SN_4A finished, 961256 loci sample 1SN_1A finished, 878975 loci sample 1HL_10A finished, 929902 loci sample 1SN_6A finished, 981945 loci sample 1NF_9A finished, 984792 loci sample 1NF_4A finished, 1052234 loci sample 1NF_2A finished, 1002618 loci sample 1NF_1A finished, 432755 loci sample 1HL_5A finished, 610261 loci sample 1HL_4A finished, 758890 loci sample 1SN_10A finished, 778305 loci sample 1HL_3A finished, 692692 loci sample 1NF_7A finished, 760816 loci sample 1HL_6A finished, 645676 loci sample 1HL_1A finished, 697155 loci sample 1NF_6A finished, 734107 loci sample 1NF_10A finished, 660691 loci sample 1SN_3A finished, 775704 loci sample 1HL_8A finished, 764053 loci
%%bash
cat stats/s3.clusters.txt
taxa total dpt.me dpt.sd d>5.tot d>5.me d>5.sd badpairs 1HL_10A.assembled 929902 1.324 4.302 14750 14.742 31.154 0 1HL_1A.assembled 697155 1.302 3.598 10665 13.498 26.092 0 1HL_2A.assembled 806060 1.324 3.079 13181 14.275 19.905 0 1HL_3A.assembled 692692 1.299 3.247 10830 13.068 22.795 0 1HL_4A.assembled 758890 1.325 3.89 12675 13.885 27.065 0 1HL_5A.assembled 610261 1.309 3.885 9593 13.251 28.295 0 1HL_6A.assembled 645676 1.302 3.02 9955 13.321 20.74 0 1HL_8A.assembled 764053 1.311 4.378 11485 14.441 32.949 0 1HL_9A.assembled 843114 1.318 4.406 13178 14.572 32.403 0 1NF_10A.assembled 660691 1.306 3.262 10014 13.935 22.931 0 1NF_1A.assembled 432755 1.295 2.348 6680 12.746 14.473 0 1NF_2A.assembled 1002618 1.321 4.53 15706 14.885 33.317 0 1NF_4A.assembled 1052234 1.321 4.52 16803 14.694 32.941 0 1NF_5A.assembled 839831 1.304 3.072 13013 13.676 20.971 0 1NF_6A.assembled 734107 1.313 3.937 11456 13.916 28.594 0 1NF_7A.assembled 760816 1.323 3.174 12256 14.163 21.068 0 1NF_8A.assembled 834461 1.305 3.113 12426 14.3 21.566 0 1NF_9A.assembled 984792 1.32 3.915 15378 14.824 27.984 0 1SN_10A.assembled 778305 1.314 3.509 12283 13.869 24.62 0 1SN_1A.assembled 878975 1.321 3.324 13747 14.666 22.631 0 1SN_2A.assembled 897159 1.323 3.664 13986 14.72 25.791 0 1SN_3A.assembled 775704 1.286 2.946 10947 13.352 21.257 0 1SN_4A.assembled 961256 1.328 3.54 15787 14.49 23.965 0 1SN_5A.assembled 897389 1.317 3.624 14585 13.743 25.251 0 1SN_6A.assembled 981945 1.324 4.577 15571 14.758 33.539 0 1SN_7A.assembled 884633 1.318 3.903 14175 14.198 27.725 0 1SN_8A.assembled 961067 1.325 9.207 15325 14.778 71.55 0 1SN_9A.assembled 949018 1.315 3.67 15109 14.076 25.837 0 ## total = total number of clusters, including singletons ## dpt.me = mean depth of clusters ## dpt.sd = standard deviation of cluster depth ## >N.tot = number of clusters with depth greater than N ## >N.me = mean depth of clusters with depth greater than N ## >N.sd = standard deviation of cluster depth for clusters with depth greater than N ## badpairs = mismatched 1st & 2nd reads (only for paired ddRAD data) HISTOGRAMS sample: 1HL_10A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 911416 5 * 11120 10 * 3287 15 * 1533 20 * 808 25 * 531 30 * 380 35 * 235 40 * 262 50 * 244 100 * 65 250 * 16 500 * 5 sample: 1HL_1A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 683427 5 * 8605 10 * 2448 15 * 1085 20 * 651 25 * 366 30 * 209 35 * 115 40 * 90 50 * 109 100 * 37 250 * 11 500 * 2 sample: 1HL_2A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 789404 5 * 10224 10 * 2884 15 * 1334 20 * 712 25 * 467 30 * 331 35 * 228 40 * 214 50 * 187 100 * 58 250 * 14 500 * 3 sample: 1HL_3A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 678821 5 * 8846 10 * 2460 15 * 1081 20 * 584 25 * 384 30 * 181 35 * 94 40 * 88 50 * 105 100 * 39 250 * 7 500 * 2 sample: 1HL_4A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 742799 5 * 9960 10 * 2912 15 * 1238 20 * 724 25 * 450 30 * 261 35 * 175 40 * 151 50 * 157 100 * 50 250 * 10 500 * 3 sample: 1HL_5A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 597839 5 * 8056 10 * 2079 15 * 988 20 * 586 25 * 270 30 * 142 35 * 78 40 * 73 50 * 101 100 * 40 250 * 6 500 * 3 sample: 1HL_6A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 632762 5 * 8156 10 * 2166 15 * 1093 20 * 648 25 * 354 30 * 175 35 * 97 40 * 79 50 * 103 100 * 34 250 * 8 500 * 1 sample: 1HL_8A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 749305 5 * 9138 10 * 2492 15 * 1202 20 * 674 25 * 401 30 * 260 35 * 179 40 * 181 50 * 149 100 * 58 250 * 9 500 * 5 sample: 1HL_9A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 826401 5 * 10113 10 * 3014 15 * 1300 20 * 794 25 * 488 30 * 344 35 * 204 40 * 182 50 * 197 100 * 61 250 * 12 500 * 4 sample: 1NF_10A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 647784 5 * 7984 10 * 2252 15 * 1080 20 * 607 25 * 362 30 * 218 35 * 122 40 * 109 50 * 114 100 * 49 250 * 8 500 * 2 sample: 1NF_1A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 424162 5 * 5310 10 * 1653 15 * 841 20 * 378 25 * 153 30 * 77 35 * 48 40 * 46 50 * 58 100 * 26 250 * 3 500 0 sample: 1NF_2A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 983061 5 * 11679 10 * 3601 15 * 1550 20 * 863 25 * 509 30 * 384 35 * 273 40 * 341 50 * 265 100 * 65 250 * 20 500 * 7 sample: 1NF_4A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 1031378 5 * 12266 10 * 3940 15 * 1840 20 * 924 25 * 526 30 * 418 35 * 280 40 * 314 50 * 254 100 * 72 250 * 14 500 * 8 sample: 1NF_5A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 823258 5 * 10358 10 * 2986 15 * 1266 20 * 723 25 * 420 30 * 280 35 * 179 40 * 141 50 * 146 100 * 59 250 * 12 500 * 3 sample: 1NF_6A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 719344 5 * 9341 10 * 2531 15 * 1104 20 * 630 25 * 433 30 * 229 35 * 156 40 * 156 50 * 119 100 * 48 250 * 12 500 * 4 sample: 1NF_7A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 745146 5 * 9719 10 * 2732 15 * 1204 20 * 689 25 * 472 30 * 279 35 * 186 40 * 142 50 * 170 100 * 62 250 * 11 500 * 4 sample: 1NF_8A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 818557 5 * 9859 10 * 2628 15 * 1228 20 * 741 25 * 505 30 * 312 35 * 201 40 * 196 50 * 157 100 * 64 250 * 11 500 * 2 sample: 1NF_9A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 965401 5 * 11687 10 * 3495 15 * 1519 20 * 845 25 * 506 30 * 391 35 * 275 40 * 341 50 * 235 100 * 73 250 * 17 500 * 7 sample: 1SN_10A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 762615 5 * 9888 10 * 2692 15 * 1183 20 * 679 25 * 451 30 * 276 35 * 160 40 * 148 50 * 147 100 * 50 250 * 13 500 * 3 sample: 1SN_1A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 861548 5 * 10687 10 * 2991 15 * 1285 20 * 780 25 * 522 30 * 343 35 * 245 40 * 281 50 * 206 100 * 66 250 * 16 500 * 5 sample: 1SN_2A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 879338 5 * 10887 10 * 3204 15 * 1270 20 * 765 25 * 478 30 * 347 35 * 268 40 * 278 50 * 241 100 * 65 250 * 11 500 * 7 sample: 1SN_3A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 761420 5 * 9131 10 * 2406 15 * 1155 20 * 626 25 * 382 30 * 209 35 * 122 40 * 84 50 * 118 100 * 37 250 * 11 500 * 3 sample: 1SN_4A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 941440 5 * 11957 10 * 3664 15 * 1568 20 * 882 25 * 527 30 * 361 35 * 262 40 * 263 50 * 240 100 * 66 250 * 16 500 * 10 sample: 1SN_5A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 878933 5 * 11554 10 * 3328 15 * 1389 20 * 786 25 * 474 30 * 313 35 * 201 40 * 161 50 * 175 100 * 54 250 * 17 500 * 4 sample: 1SN_6A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 962379 5 * 11767 10 * 3629 15 * 1524 20 * 831 25 * 549 30 * 376 35 * 279 40 * 308 50 * 215 100 * 63 250 * 16 500 * 9 sample: 1SN_7A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 866767 5 * 10874 10 * 3323 15 * 1396 20 * 758 25 * 511 30 * 355 35 * 205 40 * 195 50 * 170 100 * 59 250 * 16 500 * 4 sample: 1SN_8A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 941822 5 * 11699 10 * 3624 15 * 1488 20 * 840 25 * 544 30 * 338 35 * 208 40 * 227 50 * 181 100 * 73 250 * 13 500 * 10 sample: 1SN_9A.assembled bins depth_histogram cnts : 0------------50-------------100% 0 ****************************** 930053 5 * 11576 10 * 3579 15 * 1479 20 * 807 25 * 488 30 * 312 35 * 241 40 * 194 50 * 211 100 * 53 250 * 16 500 * 9
ls
analysis_pyrad/ clust.88/ edits/ params.txt pear.log stats/
cd ..
/Volumes/owl/temp
%%bash
time cp -r /Volumes/owl/temp/oly_gbs_pyrad/ /Volumes/Data/Sam/scratch/oly_gbs_pyrad/
real 27m5.957s user 0m0.141s sys 2m48.164s
cd /Volumes/Data/Sam/scratch/oly_gbs_pyrad/
/Volumes/Data/Sam/scratch/oly_gbs_pyrad
%%bash
gzip -cd analysis_pyrad/clust.85/d35422.assembled.clustS.gz | head -n 100 | cut -b 1-80
>d35422.assembled_2120_r1;size=51; TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA >d35422.assembled_274175_r1;size=15;+ TGCAGCGCTACCTGCCCGATCTGTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA >d35422.assembled_2265219_r1;size=2;+ TGCAGCGCTACCTGCCGGATCTGTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA >d35422.assembled_216079_r1;size=1;+ TGCAGCGCTACCTGCCCGATCTGCTCGAGAAGCGGACGAACTGAGAATGAACCTGCA >d35422.assembled_2495995_r1;size=1;+ TGCAGCGCTACCTGCCCGGTCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA >d35422.assembled_590416_r1;size=1;+ TGCAGCGCTATCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA >d35422.assembled_1947641_r1;size=1;+ TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAGCTGAGAATGAACCTGCA >d35422.assembled_1601389_r1;size=1;+ TGCAGCGCTACCTGCCGGATCTGTTCGAGAAGCGGACGAACTGAGAATGACACTGCA >d35422.assembled_190367_r1;size=1;+ TGCAGCGCTACCTGCCCGATCCTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA >d35422.assembled_2631365_r1;size=1;+ TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCG >d35422.assembled_126737_r1;size=1;+ TGCAGCGCTACCTGCCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTNCA >d35422.assembled_288692_r1;size=1;+ TGCAGCGCTACCTGCCGGATCNGTTCGAGAAGCGGACGAACTGAGAATGACACTGCA >d35422.assembled_2194406_r1;size=1;+ TGCAGCGCTANCTNCCCGATCTTTTCGAGAAGCGGANNANCTGAGAATGAACCTGCA >d35422.assembled_831862_r1;size=1;+ TGCAGCGCTACCTGNCCGATCTTTTCGAGAAGCGGACGAACTGAGAATGAACCTGCA // // >d35422.assembled_61639_r1;size=24; TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC >d35422.assembled_825340_r1;size=8;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGTCAGTTCATCCAGTCGCATCCGCGCGACGCCCTGGTGCCGTCC >d35422.assembled_88436_r1;size=2;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC >d35422.assembled_354389_r1;size=2;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC >d35422.assembled_2229078_r1;size=1;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTACGTCAGTTCATCCAGTCGCATCCGCGCGACTCCCTGGTGCCGTCC >d35422.assembled_1405977_r1;size=1;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATNCAGTCGCANCCCCGCGACGCCCTGGTGCCGNCC >d35422.assembled_2738651_r1;size=1;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC >d35422.assembled_2565401_r1;size=1;+ TGCAGCGTCAGTACGAGCTGGCGGAGATGGGCCTGCGTCAGTTCATCCAGTCCCATCCGCGTGATGCACTGGTACCGGCC >d35422.assembled_2060896_r1;size=1;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCGCGCGACGCCCTCGTGCCGGCC >d35422.assembled_27774_r1;size=1;+ TGCAGCGTCAGTACGAGCTGGCGGAGATGGGCCTGCGTCAGTTCATCCAGTCCCATCCGCGTGATGCACTGGTACCGGCC >d35422.assembled_2776613_r1;size=1;+ TGCAGCGCCAGTACGAGCAGGCCGAGATGGGCCTGCGCCAGTTCATCCAGTCGCATCCCCGCGACGCCCTGGTGCCGGCC >d35422.assembled_2566934_r1;size=1;+ TGCAGCGCCAGTACGAGCAGGCCGGGATGGGCCTGCGTCAGTTCATCCAGTCGCATCCGCGCGACGCCCTGGTGCCGTCC // // >d35422.assembled_781655_r1;size=2;+ TGCAGGACCAGGGCTTCGCCGTGACCGCGAACATGGTGACCAGCCGCATCGTCGTGGACGCCGTGGGCACCTTCGACGCC >d35422.assembled_1218064_r1;size=2; TGCAGGACCAGGGCTTCGCCGTGACCGCGAACATGGTGACCAGTCGTGCTGTGGTCGAAGCCGTCGGCGCCTTCGACGCC >d35422.assembled_81417_r1;size=1;+ TGCAGGACCAGGGCTTCGCCGTGNCCGCGAACATGGTNACCACCCGCGCCGTTGTCGACGCCGTCGGCACCTTCGACGCC >d35422.assembled_1447392_r1;size=1;+ TGCAGGACCAGGGCTTCGCNGTGACCNCGAACATGGTGACCAGCCGCATCGTCGTGGACGCCGTGGGCACCTTCGACGCC >d35422.assembled_1916059_r1;size=1;+ TGCAGGACCAGGGCTTCGCCGTGACCGCGAACATGGTGACCAGCCGCGTCGTCGTGGACGCCGTGGGCACCTTCGACGCC // // >d35422.assembled_1086507_r1;size=1; TGCAGNNANC-AAAAAAAATATATG-TTTTTTTTTTTTGCTGCA >d35422.assembled_1935566_r1;size=1;+ TGCAGNTAACNAAAAAAAATATATGTTTTTTTTTTTTTGCTGCA // // >d35422.assembled_105061_r1;size=2; TGCAGGTCTCCTCCGACGTCTACTACTACACGGTCGGCTCGAAGCTCTTCTCGAGGCCGAACAAGCCCCTGCA >d35422.assembled_405948_r1;size=2;+ TGCAGGTCTCCTCCGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCGAGAGGAAGAACAAGCCGCTGCA >d35422.assembled_1112236_r1;size=1;+ TGCAGGTNNCCTCNGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCTCCAAGCCGAACAAGCCGCTGCA >d35422.assembled_129605_r1;size=1;+ TGCAGGTGTCCTCCGACGTCTACTACTACACCGTCGGGTCGAAGCTCTTCTCGAGGCCGAACAAGCCCCTGCA >d35422.assembled_108168_r1;size=1;+ TGCAGGTGTCCTCCGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCTCGAAGCCGAACAAGCCCCTGCA >d35422.assembled_1796775_r1;size=1;+ TGCAGGTGTCCTCCGACGTCTACTACTACACGGTCGGCGCGAACCTCTTCTCCAAGCCGAACAAGCCGCTGCA // // >d35422.assembled_2015519_r1;size=1;+ TGCAGGGGGNCGCGGGGAGCAACCCCAGCTCGGTGCACGTGAGACAGGTGTCGATCACGGGCGGGGACCTCCTGGGGCCT >d35422.assembled_1326334_r1;size=1; TGCAGGGGGTCGCGGGGAGCAACCCCAGCTCGGTGCACGTGAGACAGGTGTCGATCACGGGCGGGGACCTCCTGGGGCCT // // >d35422.assembled_735556_r1;size=2; TGCAGCGCCGCCAGCGGCTGCAGAACTGCCCCCGAGTTTCCAGCTTCTGCA >d35422.assembled_2203593_r1;size=1;+ TGCAGCGCCTGGACGCCCTGCAGAACTGCCCCCGAGTTTCCAGCTTCTGCA // //
gzip: stdout: Broken pipe
%%bash
time pyrad -p params.txt -s 4
------------------------------------------------------------ pyRAD : RADseq for phylogenetics & introgression analyses ------------------------------------------------------------ step 4: estimating error rate and heterozygosity ............................ real 106m53.916s user 1403m36.534s sys 1m17.698s
%%bash
cat stats/Pi_E_estimate.txt
taxa H E 1NF_1A.assembled 0.01462331 0.00325686 1HL_5A.assembled 0.01406749 0.0032951 1HL_6A.assembled 0.01505022 0.00348848 1NF_10A.assembled 0.01534361 0.00308861 1HL_3A.assembled 0.01364489 0.00319892 1HL_1A.assembled 0.01579058 0.00339714 1NF_6A.assembled 0.01539856 0.00312442 1HL_4A.assembled 0.01459177 0.00315271 1HL_8A.assembled 0.01639449 0.00322956 1NF_7A.assembled 0.01590034 0.00303005 1SN_3A.assembled 0.01520282 0.00335947 1SN_10A.assembled 0.01505682 0.00330012 1HL_2A.assembled 0.01482491 0.00313122 1HL_9A.assembled 0.01540366 0.00296631 1NF_8A.assembled 0.01642511 0.00323494 1NF_5A.assembled 0.01519121 0.00314829 1SN_1A.assembled 0.01575731 0.00306824 1SN_7A.assembled 0.01513415 0.00310041 1SN_5A.assembled 0.01463804 0.0031614 1SN_2A.assembled 0.01625897 0.00318461 1HL_10A.assembled 0.01516635 0.00324934 1SN_9A.assembled 0.01423233 0.00315477 1SN_8A.assembled 0.01640971 0.00328322 1SN_4A.assembled 0.01409185 0.00304299 1SN_6A.assembled 0.01553508 0.00320316 1NF_9A.assembled 0.01569505 0.00318668 1NF_2A.assembled 0.01581381 0.00301155 1NF_4A.assembled 0.01508062 0.00300757
You may want to adjust:
%%bash
time pyrad -p params.txt -s 5
------------------------------------------------------------ pyRAD : RADseq for phylogenetics & introgression analyses ------------------------------------------------------------ step 5: creating consensus seqs for 28 samples, using H=0.01524 E=0.00318 ............................ real 66m47.210s user 827m35.100s sys 0m46.871s
%%bash
cat stats/s5.consens.txt
taxon nloci f1loci f2loci nsites npoly poly 1NF_8A.assembled 834461 12424 9070 936467 9052 0.0096661 1HL_2A.assembled 806060 13178 10137 1044548 9526 0.0091197 1NF_5A.assembled 839831 13010 10171 1069632 9293 0.008688 1HL_9A.assembled 843114 13174 10343 1071651 10224 0.0095404 1SN_1A.assembled 878975 13742 10736 1134533 9765 0.0086071 1SN_2A.assembled 897159 13979 10742 1118263 10144 0.0090712 1SN_7A.assembled 884633 14171 11005 1147984 10236 0.0089165 1SN_5A.assembled 897389 14581 11416 1184878 10807 0.0091208 1HL_10A.assembled 929902 14745 11554 1198784 10908 0.0090992 1SN_9A.assembled 949018 15100 11927 1244046 11194 0.0089981 1SN_8A.assembled 961067 15315 11873 1242588 10833 0.0087181 1SN_6A.assembled 981945 15562 12013 1252587 11659 0.0093079 1NF_9A.assembled 984792 15371 11942 1268914 10701 0.0084332 1SN_4A.assembled 961256 15777 12499 1300779 11409 0.0087709 1NF_2A.assembled 1002618 15699 12176 1292382 11082 0.0085749 1NF_4A.assembled 1052234 16795 13192 1398803 12103 0.0086524 1NF_1A.assembled 432755 6680 5139 536894 5451 0.0101528 1SN_3A.assembled 775704 10944 8257 850361 8592 0.0101039 1HL_1A.assembled 697155 10663 7979 807288 8288 0.0102665 1HL_8A.assembled 764053 11480 8863 921573 8513 0.0092375 1HL_5A.assembled 610261 9590 7466 764440 7345 0.0096083 1HL_6A.assembled 645676 9954 7293 732928 8296 0.011319 1SN_10A.assembled 778305 12280 9610 1001472 9488 0.0094741 1NF_10A.assembled 660691 10012 7687 808979 7778 0.0096146 1HL_3A.assembled 692692 10828 8461 868384 8396 0.0096685 1NF_6A.assembled 734107 11452 8962 944288 8628 0.009137 1NF_7A.assembled 760816 12252 9601 1016859 9056 0.0089059 1HL_4A.assembled 758890 12672 9914 1027933 9402 0.0091465 ## nloci = number of loci ## f1loci = number of loci with >N depth coverage ## f2loci = number of loci with >N depth and passed paralog filter ## nsites = number of sites across f loci ## npoly = number of polymorphic sites in nsites ## poly = frequency of polymorphic sites
%%bash
time pyrad -p params.txt -s 6
vsearch v1.11.1_osx_x86_64, 24.0GB RAM, 16 cores https://github.com/torognes/vsearch finished clustering
------------------------------------------------------------ pyRAD : RADseq for phylogenetics & introgression analyses ------------------------------------------------------------ step 6: clustering across 28 samples at '.88' similarity Reading file /Volumes/Data/Sam/scratch/oly_gbs_pyrad/clust.88/cat.haplos_ 100% 30308350 nt in 280028 seqs, min 32, max 245, avg 108 Counting unique k-mers 100% Clustering 100% Sorting clusters 100% Writing clusters 100% Clusters: 51756 Size min 1, max 68, avg 5.4 Singletons: 16399, 5.9% of seqs, 31.7% of clusters real 6m9.378s user 7m42.856s sys 0m6.083s
%%bash
time pyrad -p params.txt -s 7
ingroup 1HL_10A.assembled,1HL_1A.assembled,1HL_2A.assembled,1HL_3A.assembled,1HL_4A.assembled,1HL_5A.assembled,1HL_6A.assembled,1HL_8A.assembled,1HL_9A.assembled,1NF_10A.assembled,1NF_1A.assembled,1NF_2A.assembled,1NF_4A.assembled,1NF_5A.assembled,1NF_6A.assembled,1NF_7A.assembled,1NF_8A.assembled,1NF_9A.assembled,1SN_10A.assembled,1SN_1A.assembled,1SN_2A.assembled,1SN_3A.assembled,1SN_4A.assembled,1SN_5A.assembled,1SN_6A.assembled,1SN_7A.assembled,1SN_8A.assembled,1SN_9A.assembled addon exclude final stats written to: /Volumes/Data/Sam/scratch/oly_gbs_pyrad/stats/oly_gbs_pyrad.stats output files being written to: /Volumes/Data/Sam/scratch/oly_gbs_pyrad/outfiles/ directory filtering & writing to phylip file writing nexus file Writing gphocs file + writing full SNPs file + writing unlinked bi-allelic SNPs file + writing STRUCTURE file + writing geno file ** must enter group/clade assignments for treemix output writing vcf file ** must enter group/clade assignments for migrate-n output
------------------------------------------------------------ pyRAD : RADseq for phylogenetics & introgression analyses ------------------------------------------------------------ ................ real 5m33.004s user 22m21.544s sys 4m54.310s
%%bash
cat stats/oly_gbs_pyrad.stats
21017 ## loci with > minsp containing data 18212 ## loci with > minsp containing data & paralogs removed 18212 ## loci with > minsp containing data & paralogs removed & final filtering ## number of loci recovered in final data set for each taxon. taxon nloci 1HL_10A.assembled 6563 1HL_1A.assembled 4706 1HL_2A.assembled 5971 1HL_3A.assembled 5037 1HL_4A.assembled 5910 1HL_5A.assembled 4373 1HL_6A.assembled 4299 1HL_8A.assembled 5148 1HL_9A.assembled 6075 1NF_10A.assembled 4560 1NF_1A.assembled 3123 1NF_2A.assembled 6960 1NF_4A.assembled 7381 1NF_5A.assembled 5920 1NF_6A.assembled 5270 1NF_7A.assembled 5690 1NF_8A.assembled 5287 1NF_9A.assembled 6863 1SN_10A.assembled 5651 1SN_1A.assembled 6293 1SN_2A.assembled 6287 1SN_3A.assembled 4786 1SN_4A.assembled 7230 1SN_5A.assembled 6577 1SN_6A.assembled 6770 1SN_7A.assembled 6365 1SN_8A.assembled 6794 1SN_9A.assembled 6873 ## nloci = number of loci with data for exactly ntaxa ## ntotal = number of loci for which at least ntaxa have data ntaxa nloci saved ntotal 1 - 2 - - 3 - - 4 3551 * 18212 5 2701 * 14661 6 2051 * 11960 7 1655 * 9909 8 1386 * 8254 9 1089 * 6868 10 876 * 5779 11 732 * 4903 12 601 * 4171 13 516 * 3570 14 400 * 3054 15 328 * 2654 16 291 * 2326 17 271 * 2035 18 224 * 1764 19 193 * 1540 20 175 * 1347 21 175 * 1172 22 139 * 997 23 157 * 858 24 160 * 701 25 132 * 541 26 118 * 409 27 141 * 291 28 150 * 150 ## nvar = number of loci containing n variable sites (pis+autapomorphies). ## sumvar = sum of variable sites (SNPs). ## pis = number of loci containing n parsimony informative sites. ## sumpis = sum of parsimony informative sites. nvar sumvar PIS sumPIS 0 7053 0 10716 0 1 3045 3045 2795 2795 2 1991 7027 1563 5921 3 1369 11134 995 8906 4 1000 15134 678 11618 5 811 19189 489 14063 6 639 23023 348 16151 7 569 27006 227 17740 8 411 30294 145 18900 9 303 33021 87 19683 10 211 35131 62 20303 11 186 37177 26 20589 12 129 38725 31 20961 13 112 40181 11 21104 14 79 41287 9 21230 15 73 42382 9 21365 16 48 43150 9 21509 17 43 43881 3 21560 18 35 44511 4 21632 19 17 44834 1 21651 20 15 45134 1 21671 21 13 45407 0 21671 22 11 45649 1 21693 23 12 45925 0 21693 24 6 46069 0 21693 25 8 46269 0 21693 26 3 46347 0 21693 27 5 46482 0 21693 28 1 46510 0 21693 29 3 46597 0 21693 30 1 46627 0 21693 31 1 46658 0 21693 32 1 46690 0 21693 33 2 46756 0 21693 34 1 46790 0 21693 35 1 46825 0 21693 36 0 46825 1 21729 37 2 46899 0 21729 38 0 46899 0 21729 39 0 46899 0 21729 40 0 46899 0 21729 41 0 46899 0 21729 42 1 46941 0 21729 43 0 46941 1 21772 44 0 46941 0 21772 45 1 46986 0 21772 total var= 46986 total pis= 21772 sampled unlinked SNPs= 11159 sampled unlinked bi-allelic SNPs= 11074
%%bash
head -n 100 outfiles/oly_gbs_pyrad.loci | cut -b 1-88
>1HL_10A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1HL_1A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1HL_3A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1HL_5A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1HL_8A.assembled TATAAAATGAATGAAATATTGNATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1NF_10A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1NF_4A.assembled -ATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1NF_6A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1NF_7A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1NF_8A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1SN_6A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG >1SN_8A.assembled TATAAAATGAATGAAATATTG-ATTACTTTGAAGGCGCTTAATGTTACATCAACTTCCAGCCAGTG // >1NF_5A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT >1SN_1A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT >1SN_3A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT >1SN_4A.assembled -ATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT >1SN_7A.assembled ------------------------------------------------------ATACCATCTGGT >1SN_8A.assembled CATTTTGCAAGATGAAAGTTATCGCTCTATTTGTCTTATTTGACATTTCCCACAATACCATCTGGT // >1HL_10A.assembled CCTCCCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT >1NF_6A.assembled CCTCCCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT >1NF_9A.assembled CCTCCCGCGCCCTAATTGNGNACCAGTTTTANA----NCGATAACCTGTGTCATGGCTTTTCTTTT >1SN_6A.assembled ----CCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT >1SN_7A.assembled CCTCCCGCGCCCTAATTGGGAACCAGTTTTATATTATTCGATAACCTGTGTCATGGCTTTTCTTTT // >1HL_9A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT >1NF_8A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT >1NF_9A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT >1SN_7A.assembled CGACTAATAGCTTCTGTATATTCCGGAATGCCAATTTTGAATAGGTTTGAACTTTATCTGCGTTCT // >1HL_6A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1HL_9A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1NF_2A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1NF_5A.assembled GCTCATAGTACCACNGAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1NF_6A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1NF_7A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1NF_8A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1SN_10A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1SN_5A.assembled GCTCATAGTACCACNGAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1SN_6A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1SN_7A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1SN_8A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG >1SN_9A.assembled GCTCATAGTACCAC-GAATTTTCAATCCTGATATAGCGGGCTAATCAAGGTGTGCCGTTCATAAAG // >1NF_5A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG >1SN_1A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG >1SN_3A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG >1SN_7A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG >1SN_8A.assembled GACAACAACTAATTCGTAGTCTAGCGAAAGACCCCGTTGCTGTACATAAATCTGGAGTATTGGAAG // >1HL_1A.assembled ----ATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCATCCTCTCACCTGCACGCCC >1SN_1A.assembled TGCGATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCAKCCTCTCACCTGCACGCCC >1SN_2A.assembled TGCGATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCAKCCTCTCACCTGCACGCCC >1SN_4A.assembled --------------------------------------------------TCTCACCTGCACGCCC >1SN_6A.assembled TGCGATAAGTACGAGGTGATGTGTAAGCCCGAGACCTGCACCATTCATCCTCTCACCTGCACGCCC // * >1HL_4A.assembled CAAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT >1HL_9A.assembled -AAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT >1NF_6A.assembled -AAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT >1NF_8A.assembled CAAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCCAGGTAATCCAGTTGCATGGGCTGTTT >1NF_9A.assembled CAAGAGATCCAGTTGCAGTGGCTRTTTCCATGTTGTTGCNAGGTAATCCAGTTGCATGGGCTGTTT >1SN_3A.assembled ----------------AGTGGCTGTTTCCATGTTGTTGCAAGGTAATCCAGTTGCATGGNCTGTTT >1SN_4A.assembled CAAGAGATCCAGTTGCAGTGGCTATTTCCATGTTGTTGCMAGGTAATCCAGTTGCATGGRCTGTTT >1SN_8A.assembled CAAGAGATCCAGTTGCAGTGGCT-TTTCCATGTTGTTGCNAGGTAATCCAGTTGCATGGNCTGTTT // * * - >1HL_10A.assembled AATTTCTGCGGGACTGTACAGTCCCATATCGGCTACATGTAGGAGAGTC >1HL_8A.assembled AATTTCTGCGGGACTGTACNGTCCCATNTCGGCTACATNTAGGAGAGTC >1NF_2A.assembled AATTTCTGCGGGACTGTACAGTCCCATATCGGCTACATGTAGGAGAGTC >1NF_9A.assembled AATTTCTGCGGGACTGTACNGTCCCATATCGGCTANATGNAGGAGAGTC >1SN_3A.assembled AATTTCTGCGGGACTGTACAGTCCCATATCGGCTACATGTAGGAGAGTC >1SN_5A.assembled AATTTCTGCGGGACTGNACNGTCCCATNTCGGCTACAWRNAGGAGAGTC >1SN_7A.assembled AATTTCTGCGGGACTGWACTGTCCMATNTCGGC---AWRWAGGAGAGTC // - - - **- |9| >1HL_2A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT >1HL_9A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT >1NF_2A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT >1NF_6A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT >1NF_8A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT >1SN_7A.assembled AACATNAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT >1SN_8A.assembled AACATTAATGTACAGAACAAGTAAGACAGCAACATTAATGTACAGAACAAGTAAGACAGCAACATT // >1HL_6A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1NF_10A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1NF_5A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1NF_9A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_10A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_1A.assembled -----------------------------------------------------TAGNNNCTGACCT >1SN_2A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_4A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_5A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_6A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_7A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_8A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT >1SN_9A.assembled AAAAGTGATTTCTATTAGGGGCATATGGGGCTTAGTAAACATTATAACATGATTAGTATCTGACCT // >1HL_2A.assembled CGYAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA >1NF_5A.assembled CGTAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA >1NF_8A.assembled CGTAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA >1NF_9A.assembled CGCAGAAAACATCTTTCCTACCAGAGCTAACACTGTCGTCAA
ls stats/
Pi_E_estimate.txt oly_gbs_pyrad.stats s2.rawedit.txt s3.clusters.txt s5.consens.txt
ls outfiles/
oly_gbs_pyrad.alleles oly_gbs_pyrad.loci oly_gbs_pyrad.phy.partitions oly_gbs_pyrad.str oly_gbs_pyrad.vcf oly_gbs_pyrad.excluded_loci oly_gbs_pyrad.nex oly_gbs_pyrad.snps oly_gbs_pyrad.unlinked_snps oly_gbs_pyrad.gphocs oly_gbs_pyrad.phy oly_gbs_pyrad.snps.geno oly_gbs_pyrad.usnps.geno
%%bash
head -1 outfiles/oly_gbs_pyrad.snps
## 28 taxa, 36424 loci, 72251 snps