Calculate summary statistics such as pi, Tajima's D, Fst
import ipyrad
import ipyrad.analysis as ipa
import ipyparallel as ipp
from ipyrad.analysis.popgen import Popgen
# Start the ipcluster instance externally before running this.
# Run this on the command line:
# `ipcluster start --cluster-id=popgen --n=12 --daemonize`
#
# And remember to stop the cluster when you're done
# `ipcluster stop --cluster-id=popgen`
ipyclient = ipp.Client(cluster_id="popgen")
print(len(ipyclient))
12
# The necessary data file is the HDF5 formatted sequence file (in the output directory)
data = "/tmp/ipyrad-test/wat_outfiles/wat.seqs.hdf5"
# popgen tools can also accept an ipyrad assembly
#data = ipyrad.load_json("/tmp/ipyrad-test/wat.json")
# Dictionary mapping samples to populations
imap = {
"pop1" : ["1A_0", "1B_0", "1C_0", "1D_0"],
"pop2" : ["2E_0", "2F_0", "2G_0", "2H_0"],
"pop3" : ["3I_0", "3J_0", "3K_0", "3L_0"],
}
# Create the popgen object and run it
popgen = Popgen(data=data, imap=imap)
popgen.run(ipyclient=ipyclient)
Parallel connection | bobolink: 12 cores [locus filter] full data: 1000 [locus filter] post filter: 998 [####################] 100% 0:00:05 | Calculating sumstats for nloci 998 [####################] 100% 0:00:01 | Collating sumstats for npops 3
# Check the results
popgen.results
between {'Dxy': pop1 pop2 pop3 pop1 0.0 0.025 0.025 pop2 0.0 0.000 0.024 pop3 0.0 0.000 0.000, 'Fst': pop1 pop2 pop3 pop1 0.0 0.372 0.416 pop2 0.0 0.000 0.416 pop3 0.0 0.000 0.000, 'Fst_adj': pop1 pop2 pop3 pop1 0.0 0.299 0.343 pop2 0.0 0.000 0.344 pop3 0.0 0.000 0.000} within mean_pi mean_Watterson mean_TajimasD pop1 0.006 0.005 0.312 pop2 0.006 0.005 0.279 pop3 0.006 0.005 0.293