The tetrad
tool is a framework for inferring a species tree topology using quartet-joining on large unlinked SNP data sets. It is particularly optimized for RAD-seq type datasets that are likely to involve a lot of missing data.
# conda install ipyrad -c conda-forge -c bioconda
# conda install tetrad -c conda-forge
import ipyrad.analysis as ipa
import toytree
import ipcoal
tree = toytree.rtree.bdtree(ntips=20, seed=555)
tree = tree.mod.node_scale_root_height(10e6)
tree.draw(scalebar=True);
# init simulator with one diploid sample from each tip
model = ipcoal.Model(tree, Ne=1e6, nsamples=2, recomb=0)
# simulate sequence data on 10K loci
model.sim_loci(10000, 50)
# add missing data (50%)
model.apply_missing_mask(0.5)
# write results to database file
model.write_snps_to_hdf5(name="test-tet-miss50", outdir='/tmp', diploid=True)
wrote 259208 SNPs to /tmp/test-tet-miss50.snps.hdf5
SNPS = "/tmp/test-tet-miss50.snps.hdf5"
tet = ipa.tetrad(
data=SNPS,
name="test-tet-miss50",
workdir="/tmp",
nboots=10,
nquartets=1e6,
)
loading snps array [20 taxa x 259208 snps] max unlinked SNPs per quartet [nloci]: 10000 quartet sampler [full]: 4845 / 4845
tet.run(auto=True, force=True)
Parallel connection | latituba: 8 cores initializing quartet sets database [####################] 100% 0:00:14 | full tree * | mean SNPs/qrt: 3167 [####################] 100% 0:00:12 | boot rep. 1 | mean SNPs/qrt: 3163 [####################] 100% 0:00:12 | boot rep. 2 | mean SNPs/qrt: 3161 [####################] 100% 0:00:12 | boot rep. 3 | mean SNPs/qrt: 3170 [####################] 100% 0:00:12 | boot rep. 4 | mean SNPs/qrt: 3144 [####################] 100% 0:00:11 | boot rep. 5 | mean SNPs/qrt: 3172 [####################] 100% 0:00:11 | boot rep. 6 | mean SNPs/qrt: 3191 [####################] 100% 0:00:11 | boot rep. 7 | mean SNPs/qrt: 3144 [####################] 100% 0:00:11 | boot rep. 8 | mean SNPs/qrt: 3153 [####################] 100% 0:00:11 | boot rep. 9 | mean SNPs/qrt: 3180 [####################] 100% 0:00:11 | boot rep. 10 | mean SNPs/qrt: 3128
tre = toytree.tree(tet.trees.cons)
rtre = tre.root(["r19", "r18", "r17"])
rtre.draw(ts='d', use_edge_lengths=False, node_labels="support");
rfdist = rtre.treenode.robinson_foulds(tree.treenode)[0]
rfdist == 0
True