#!/usr/bin/env python # coding: utf-8 # This notebook creates a jackknifed PCoA plot based on multiple rarefactions. # In[ ]: from emperor import Emperor, nbinstall nbinstall() from skbio.stats.ordination import pcoa from skbio.diversity import beta_diversity from biom import load_table # pydata/scipy import pandas as pd, numpy as np from scipy.spatial import procrustes def load_mf(fn, index='#SampleID'): _df = pd.read_csv(fn, sep='\t', dtype=str, keep_default_na=False, na_values=[]) _df.set_index(index, inplace=True) return _df # We are going to load data from [Fierer et al. 2010](http://www.pnas.org/content/107/14/6477.full) (the data was retrieved from study [232](https://qiita.ucsd.edu/study/description/232) in [Qiita](https://qiita.ucsd.edu), remember you need to be logged in to access the study). # In[ ]: mf = load_mf('keyboard/mapping-file.txt') bt = load_table('keyboard/otu-table.biom') # We'll create five different distance matrices and compare them. # In[ ]: ordinations = [] distances = ['jaccard', 'dice', 'russellrao'] rarefied = bt.subsample(1000) for r in range(len(distances)): data = np.array([rarefied.data(i) for i in rarefied.ids()], dtype='int64') res = pcoa(beta_diversity(distances[r], data, rarefied.ids())) ordinations.append(res) # Procrustes plots need a *master* set of coordinates where there rest of the matrices will be fitted around. # In[ ]: master = ordinations[0] fitted_ordinations = [] for i in range(1, len(ordinations)): reference, matrix, _ = procrustes(master.samples.values, ordinations[i].samples.values) if i == 0: master.samples.iloc[:] = reference samples = pd.DataFrame(index=ordinations[i].samples.index, columns=ordinations[i].samples.columns, data=matrix) ordinations[i].samples = samples fitted_ordinations.append(ordinations[i]) # If you want to share your notebook via GitHub use `remote=True` and make sure you share your notebook using nbviewer. # In[ ]: viz = Emperor(master, mf, procrustes=fitted_ordinations, remote=False) # Lastly, we set the name of the distances as the `procrustes_names` attribute so we can differentiate them in the plot. # In[ ]: viz.procrustes_names = distances # In[ ]: viz