#!/usr/bin/env python # coding: utf-8 #

ipyrad-analysis toolkit: construct

# # The program construct is a STRUCTURE-like tool that incorporates expectations of isolation by distance. It is available as an R package. This notebook demonstrates how to convert data to the proper format for use in construct using simulated data as an example. For details on running construct see [their documentation](https://github.com/gbradburd/conStruct). # ### Required software # In[1]: # conda install ipyrad ipcoal -c conda-forge -c bioconda # In[3]: import ipyrad.analysis as ipa import toytree import ipcoal # In[22]: print('ipyrad', ipa.__version__) print('toytree', toytree.__version__) print('ipcoal', ipcoal.__version__) # ### Simulate example data # In[5]: # network model tree = toytree.rtree.unittree(7, treeheight=3e6, seed=123) tree.draw(ts='o', admixture_edges=(3, 2)); # In[15]: # simulation model with admixture and missing data model = ipcoal.Model(tree, Ne=1e4, nsamples=4, admixture_edges=(3, 2, 0.5, 0.1)) model.sim_snps(250) model.write_snps_to_hdf5(name="test-construct", outdir="/tmp", diploid=True) # ### Input data file # In[16]: # the path to your HDF5 formatted snps file SNPS = "/tmp/test-construct.snps.hdf5" # ### Population assignments # In[17]: IMAP = { "r0": ["r0-0", "r0-1"], "r1": ["r1-0", "r1-1"], "r2": ["r2-0", "r2-1"], "r3": ["r3-0", "r3-1"], "r4": ["r4-0", "r4-1"], "r5": ["r5-0", "r5-1"], "r6": ["r6-0", "r6-1"], } # ### Filter missing data and convert to genotype frequencies # In[18]: # apply filtering to the SNPs file tool = ipa.snps_extracter(data=SNPS, imap=IMAP, minmap={i:2 for i in IMAP}) tool.parse_genos_from_hdf5(); # In[19]: # convert SNP data to genotype frequencies df = tool.get_population_geno_frequency() df.head() # ### Write data to file # In[20]: # write to a file df.to_csv("/tmp/freqs.csv")