All required software can be installed locally using conda. I assume here that you already have ipyrad
installed using conda.
## conda install toytree -c eaton-lab
## conda install ipyrad -c ipyrad
## conda install bpp -c ipyrad
## import packages
import ipyrad as ip
import ipyrad.analysis as ipa
import numpy as np
import toyplot
import toytree
import glob
## print ipyrad info
print "ipyrad v.{}".format(ip.__version__)
ipyrad v.0.6.20
see more information on ipyparallel setup here.
## print ipyparallel cluster information
import ipyparallel as ipp
ipyclient = ipp.Client()
print ip.cluster_info(ipyclient)
host compute node: [40 cores] on tinus
## create subsampled pharma-clade branch
pharma = ip.load_json("analysis-ipyrad/pharma_dhi_s4.json")
america = ip.load_json("analysis-ipyrad/america_dhi_s4.json")
loading Assembly: pharma_dhi_s4 from saved path: ~/Documents/Ficus/analysis-ipyrad/pharma_dhi_s4.json loading Assembly: america_dhi_s4 from saved path: ~/Documents/Ficus/analysis-ipyrad/america_dhi_s4.json
## a tree hypothesis (guidetree) (here based on tetrad results)
newick = "((((glabrata, insipida), yoponensis), maxima), tonduzii);"
## a dictionary mapping sample names to 'species' names
imap = {
"glabrata": ["B133_glabrata", "A97_glabrata", "B134_glabrata", "B131_glabrataXmaxima"],
"insipida": ["A95_insipida", "B127_insipida", "C15_insipida",
"B128_insipida", "B127_insipida", "A95_insipida"],
"yoponensis": ["C45_yoponensis", "C47_yoponensis", "C46_yoponensis"],
"maxima": ["A94_maxima", "C17_maxima", "B119_maxima", "B120_maxima"],
"tonduzii": ["C48_tonduzii"],
}
## loci must have data for at least N samples in each species.
minmap = {
"glabrata": 4,
"insipida": 4,
"yoponensis": 3,
"maxima": 4,
"tonduzii": 1,
}
## check your (starting) tree hypothesis
toytree.tree(newick).draw();
## create a bpp object to run algorithm 00
boo = ipa.bpp(
locifile=pharma.outfiles.loci,
guidetree=newick,
imap=imap,
minmap=minmap,
workdir="analysis-bpp/",
)
## set some optional params, leaving others at their defaults
boo.params.burnin = 10000
boo.params.nsample = 50000
boo.params.sampfreq = 25
boo.params
burnin 10000 cleandata 0 copied False delimit_alg (0, 5) finetune (0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01) infer_delimit 0 infer_sptree 0 nsample 50000 sampfreq 25 seed 12345 tauprior (2, 2000, 1) thetaprior (2, 2000) usedata 1
## set some optional filters leaving others at their defaults
boo.filters.maxloci=500
boo.filters.minsnps=4
## print filters
boo.filters
maxloci 500 minmap {'insipida': 4, 'tonduzii': 1, 'maxima': 4, 'glabrata': 4, 'yoponensis': 3} minsnps 4
## write files
boo.write_bpp_files(prefix="pharma-ltest")
input files created for job pharma-ltest (500 loci)
boo.submit_bpp_jobs(
prefix="pharma-loci500",
nreps=3,
ipyclient=ipyclient,
seed=12345,
randomize_order=True,
)
submitted 3 bpp jobs [pharma-loci500] (500 loci)
b10 = boo.copy()
b10.params.infer_sptree = 1
b10.params
burnin 10000 cleandata 0 copied False delimit_alg (0, 5) finetune (0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01) infer_delimit 0 infer_sptree 1 nsample 50000 sampfreq 25 seed 285080861 tauprior (2, 2000, 1) thetaprior (2, 2000) usedata 1
b10.filters.maxloci = 500
b10.filters
maxloci 500 minmap {'insipida': 4, 'tonduzii': 1, 'maxima': 4, 'glabrata': 4, 'yoponensis': 3} minsnps 4
b10.submit_bpp_jobs(
prefix="pharma-loci500-b10",
nreps=3,
ipyclient=ipyclient,
seed=12345,
randomize_order=True,
)
submitted 3 bpp jobs [pharma-loci500-b10] (500 loci)
bb10 = b10.copy()
bb10.filters.maxloci = 50
bb10.filters
bb10.submit_bpp_jobs(
prefix="pharma-loci50-bb10",
nreps=10,
ipyclient=ipyclient,
seed=12345,
randomize_order=True,
)
submitted 10 bpp jobs [pharma-loci50-bb10] (50 loci)
import pandas as pd
tab = pd.read_csv("analysis-bpp/pharma-loci500-r0.mcmc.txt", sep="\t", index_col=0)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
tab.describe().T
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
theta_1glabrata | 760.0000 | 0.0011 | 0.0001 | 0.0008 | 0.0010 | 0.0010 | 0.0012 | 0.0016 |
theta_2insipida | 760.0000 | 0.0011 | 0.0001 | 0.0008 | 0.0010 | 0.0010 | 0.0011 | 0.0017 |
theta_3maxima | 760.0000 | 0.0038 | 0.0003 | 0.0030 | 0.0036 | 0.0038 | 0.0040 | 0.0046 |
theta_5yoponensis | 760.0000 | 0.0020 | 0.0002 | 0.0016 | 0.0019 | 0.0020 | 0.0021 | 0.0025 |
theta_6glabratainsipidayoponensismaximatonduzii | 760.0000 | 0.0157 | 0.0008 | 0.0135 | 0.0152 | 0.0157 | 0.0162 | 0.0181 |
theta_7glabratainsipidayoponensismaxima | 760.0000 | 0.0015 | 0.0009 | 0.0001 | 0.0008 | 0.0013 | 0.0020 | 0.0051 |
theta_8glabratainsipidayoponensis | 760.0000 | 0.0019 | 0.0012 | 0.0001 | 0.0010 | 0.0016 | 0.0024 | 0.0070 |
theta_9glabratainsipida | 760.0000 | 0.0132 | 0.0019 | 0.0079 | 0.0120 | 0.0132 | 0.0143 | 0.0191 |
tau_6glabratainsipidayoponensismaximatonduzii | 760.0000 | 0.0030 | 0.0002 | 0.0024 | 0.0029 | 0.0030 | 0.0031 | 0.0036 |
tau_7glabratainsipidayoponensismaxima | 760.0000 | 0.0030 | 0.0002 | 0.0024 | 0.0029 | 0.0030 | 0.0031 | 0.0036 |
tau_8glabratainsipidayoponensis | 760.0000 | 0.0029 | 0.0002 | 0.0024 | 0.0028 | 0.0030 | 0.0031 | 0.0036 |
tau_9glabratainsipida | 760.0000 | 0.0011 | 0.0002 | 0.0008 | 0.0010 | 0.0010 | 0.0011 | 0.0016 |
lnL | 760.0000 | -69160.9028 | 43.5274 | -69280.3200 | -69189.8015 | -69157.8950 | -69129.9325 | -68997.7510 |
def plotit(mtre):
## set up axes
canvas = toyplot.Canvas(width=450, height=350)
axes = canvas.cartesian()
## plot the tree
mtre.draw_cloudtree(
axes=axes,
edge_style={"opacity": 0.01},
use_edge_lengths=True,
orient='right',
);
## style axes
axes.y.show = False
axes.x.show = True
axes.x.ticks.show = True
axes.x.ticks.locator = toyplot.locator.Explicit(
locations=np.linspace(0, -3, 3) / 1000.,
labels=np.linspace(0, 3, 3),
)
axes.x.label.text = "Divergence time (substitutions/site x 10<sup>-3</sup>)"
return canvas, axes
mtre = toytree.multitree(
"analysis-bpp/pharma-loci500-b10-r0.mcmc.txt",
fixed_order=boo.tree.get_leaf_names(),
treeslice=(150, 1500, 5),
)
plotit(mtre)
(<toyplot.canvas.Canvas at 0x7f8a39bddd50>, <toyplot.coordinates.Cartesian at 0x7f8a39bdd690>)
mtre = toytree.multitree(
"analysis-bpp/pharma-loci500-b10-r2.mcmc.txt",
fixed_order=boo.tree.get_leaf_names(),
treeslice=(150, 1500, 5),
)
plotit(mtre)
(<toyplot.canvas.Canvas at 0x7f8a3a84dfd0>, <toyplot.coordinates.Cartesian at 0x7f8a444d5750>)
ctre = mtre.get_consensus_tree()
#ctre.root(['tonduzii', 'maxima'])
ctre.draw(width=300, height=300, node_labels=True);
mtre = toytree.multitree(
"analysis-bpp/pharma-loci50-bb10-r0.mcmc.txt",
fixed_order=boo.tree.get_leaf_names(),
treeslice=(500, 5000, 10),
)
plotit(mtre);
mtre = toytree.multitree(
"analysis-bpp/pharma-loci50-bb10-r9.mcmc.txt",
fixed_order=boo.tree.get_leaf_names(),
treeslice=(1000, 5000, 10),
)
plotit(mtre);