#!/usr/bin/env python
# coding: utf-8
# #Seagrass Edge Effects Analysis
# #### This notebook is for analysis of 16S rRNA PCR libraries produced from DNA extracted from samples collected by Sofie Voerman. Leaves and roots from Zostera marina were sampled as well as sediment at two depths (at less than .5 cm and at 3 cm) from the inside of each seagrass patch, the edge of each patch and from unvegitated sediment from outside of the patch. For the analysis in this notebook, I am using MacQiime/Qiime 1.9.0 http://www.wernerlab.org/software/macqiime
I am also using Ipython, which can be installed via http://ipython.org/ipython-doc/dev/install/install.html
sudo easy_install ipython[all]
# Useful Tutorials:
#
# http://nbviewer.ipython.org/github/biocore/qiime/blob/master/examples/ipynb/illumina_overview_tutorial.ipynb?create=1
#
# http://www.wernerlab.org/teaching/qiime/overview
#
# http://nbviewer.ipython.org/gist/jennomics/c6fe5e113525c6aa8add
#
# In[6]:
from os import chdir, mkdir
from os.path import join
from IPython.display import FileLinks, FileLink
# ### Demultiplex Data
# Sequence data was demultiplexed and filtered using an inhouse script available at https://github.com/gjospin/scripts/blob/master/Demul_trim_prep.pl
# In[ ]:
#The subsequent files containing the merged 16S reads only were concatonated into one file using:
get_ipython().system('cat *.M.* > EverythingMerged.fasta.gz')
#Then they were unzipped
get_ipython().system('gunzip EverythingMerged.fasta.gz')
#Then they were reverse complemented (as reads are in the wrong direction relative to the Greengenes/Unite databases)
get_ipython().system('adjust_seq_orientation.py -i EverythingMerged.fasta -o EverythingMerged_RC.fasta')
# ### Defining Useful Variables
# In[7]:
#Note: if there are spaces in your path make sure they have a '\' before them so they are recognized
#16S sequences and mapping file
bactarch_seqs = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/EverythingMerged_RC.fasta"
bactarch_map = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/Sofie_only_mapping_w_metadata.txt"
#Databases
otu_base = "/macqiime/greengenes/gg_13_8_otus/"
reference_seqs = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/rep_set/97_otus.fasta")
reference_tree = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/trees/97_otus.tree")
reference_tax = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt")
# ### Validate Mapping File
# In[3]:
#checks mapping file for qiime use
get_ipython().system('validate_mapping_file.py -m $bactarch_map')
# In[ ]:
#replaces original mapping file with new corrected file
get_ipython().system('mv Sofie_only_mapping_w_metadata_corrected.txt Sofie_only_mapping_w_metadata.txt')
# ### Validate Demultiplexing
# In[ ]:
get_ipython().system('validate_demultiplexed_fasta.py -i $bactarch_seqs -m $bactarch_map')
#check the log file generated to see if any duplicate barcodes/sample names are used; mostly this is a sanity check
# ### Check for Chimeras
# Tutorial: http://qiime.org/tutorials/chimera_checking.html
# There are two versions of USEARCH and you will need both in QIIME 1.9.0: USEARCH v5.2.236 and USEARCH 6.1. Name the 5.2.236 executable "usearch" and the 6.1 executable "usearch61" and make sure they're in your path.
# http://www.drive5.com/usearch/manual/install.html
#
# Code to Install: (repeat for usearch)
# sudo mv usearch61 /usr/local/bin/usearch61
# sudo chmod a+x /usr/local/bin/usearch61
#
# In[ ]:
#identifies chimeric sequences using usearch61 in our bacterial data using the 97% OTU databases as the reference
get_ipython().system('identify_chimeric_seqs.py -i $bactarch_seqs -m usearch61 -o qiime_ready_chimeras/ -r $reference_seqs')
# In[ ]:
#filters out chimeric seqs from our fasta file
get_ipython().system('filter_fasta.py -f $bactarch_seqs -o EverythingMerged_RC_Filtered.fasta -s qiime_ready_chimeras/chimeras.txt -n')
# ### Redefining Useful Variables
# In[3]:
#16S sequences after chimera filtering
bactarch_seqs = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/EverythingMerged_RC_Filtered.fasta"
# ### Picking OTU's
# Make sure to install BLAST Legacy, if using (http://www.wernerlab.org/software/macqiime/macqiime-installation/installing-blast-in-os-x)
# In[ ]:
#Picking Open Reference OTUS for 16S
get_ipython().system('pick_open_reference_otus.py -o open_ref_97_otus_EverythingRCFiltered -i $bactarch_seqs -r $reference_seqs -p params.txt -a -O 6 -f')
# In[3]:
get_ipython().system('cat /Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/params.txt')
# Repeat OTU picking above using the 99% OTU reference database or alternate database as is desired.
# ### Making & Filtering Biom Tables
# In[19]:
#summarizes the biom table obtained from running open ref otu picking at 97% with greengenes; sanity check
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/otu_table_mc2_w_tax_no_pynast_failures.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_summary.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/otu_table_summary.txt")
# In[ ]:
#filters out all the chloroplasts/mitochondria/singletons
get_ipython().system('filter_taxa_from_otu_table.py \\')
-i open_ref_97_otus_EverythingRCFiltered/otu_table_mc2_w_tax_no_pynast_failures.biom \
-o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks.biom \
-n c__Chloroplast,f__mitochondria
get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons.biom -n 2')
#filters out anything unable to be assigned at Domain level
get_ipython().system('filter_taxa_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom -n Unassigned')
# In[20]:
#summarizes the biom table obtained above after filtering; sanity check
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom -o open_ref_97_otus_EverythingRCFiltered/otu_summary_filtered.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/otu_summary_filtered.txt")
# ### Adding Metadata to biom tables
# In[7]:
#97% 16S OTUs
get_ipython().system('biom add-metadata -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom -m $bactarch_map')
# ### Split Edge Effects Data from Harbor Data in Biom Table
# The data analyzed so far in this Ipython notebook is for two projects - a project on edge effects and a project on distance from the harbor. Here we seperate the Edge Effects data from the biom table for further analysis. (NOTE: I could have analyzed these two datasets seperately from the beginning if I desired)
# In[5]:
#97% 16S OTUS
#Edge Effects samples
get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint.biom -m $bactarch_map --sample_id_fp open_ref_97_otus_EverythingRCFiltered/WestpointBacKEEP.txt')
#Harbor samples
get_ipython().system("filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom -o open_ref_97_otus_EverythingRCFiltered/Harbor.biom -m $bactarch_map -s 'CN_ratio:NA'")
# In[45]:
#sanity check
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint.txt")
# In[2]:
#List of Sample IDs to keep (these are the sample IDs for the Edge Effects Data)
#Westpoint == Edge Effects
get_ipython().system('cat /Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/open_ref_97_otus_EverythingRCFiltered/WestpointBacKEEP.txt')
# ### Remove the Negative Control
# For 16S, the ratio of OTUS for negative control:smallest sample is approx 1:10 after filtering and 1:50 before filtering. The number one contaminant is chloroplast DNA - since the negative control had no plant material in it, this indicates that some spillover occured between the negative control and the rest of my samples. I looked at the negative control in more detail (steps not included in this Ipython Notebook) and ultimately decided to simply remove the negative control sample from downstream analysis.
# In[12]:
#16S Greengenes 97% OTUs
get_ipython().system('filter_samples_from_otu_table.py \\')
-i open_ref_97_otus_EverythingRCFiltered/Westpoint.biom \
-o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \
-m $bactarch_map \
-s "phinchID:*,!Negative_control"
# ### Rarifying Biom Tables
# In[11]:
#Investigating levels of rarification - Bacteria
get_ipython().system('alpha_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -n 25 -o open_ref_97_otus_EverythingRCFiltered/arare_WestpointNONC -m $bactarch_map -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -f')
FileLink("open_ref_97_otus_EverythingRCFiltered/arare_WestpointNONC/alpha_rarefaction_plots/rarefaction_plots.html")
# In[5]:
#Investigating how rarification to smallest sample size would effect PCoA plots
get_ipython().system('jackknifed_beta_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/jacknifed_betadiv_3277/ -e 3277 -m $bactarch_map -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -f')
# In[12]:
#16S Greengenes 97% OTU Data; Minimum OTUs = 3277 from biom table summary so rarify to 3277
get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -d 3277')
# In[8]:
#sanity check 16S data
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.txt")
# ### Splitting of Unrarified Biom Table By Substrate (Root, Leaf or Soil)
# In[4]:
#Leaf
get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom -m $bactarch_map -s "Substrate:leaf"')
#Root
get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom -m $bactarch_map -s "Substrate:root"')
#Soil
get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom -m $bactarch_map -s "Substrate:soil"')
# In[10]:
#sanity check - Leaf
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom -o open_ref_97_otus_EverythingRCFiltered/Leaf.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/Leaf.txt")
# In[ ]:
#sanity check - Root
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom -o open_ref_97_otus_EverythingRCFiltered/Root.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/Root.txt")
# In[22]:
#sanity check - Soil
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom -o open_ref_97_otus_EverythingRCFiltered/Soil.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/Soil.txt")
# In[31]:
#Leaf & Root (sanity check to see if soil artificially pulls leaf and root apart)
get_ipython().system("filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom -m $bactarch_map -s 'Substrate:*,!soil'")
# In[32]:
get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.txt')
FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.txt")
# ### Rarification of Root, Leaf and Soil Biom Tables
# In[7]:
#16S Data: Rarify each substrate biom table by minimum OTU in each table
#Leaf: Min #OTU = 5177
get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom -d 5177')
#Root: Min #OTU = 3277
get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom -d 3277')
#Soil: Min #OTU = 23059; but rarify to 20000
get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -d 20000')
# In[8]:
#Leaf + Root #OTU = 3277
get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot_3277.biom -d 3277')
# ### Diversity Analyses
# For exploratory analysis - used core_diversity_analyses
# In[26]:
#For ALL 16S Data
get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/ -m $bactarch_map -e 3277 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre')
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/index.html")
# In[27]:
#For Leaf 16S
get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/ -m $bactarch_map -e 5177 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre')
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/index.html")
# In[28]:
#For Root 16S
get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/ -m $bactarch_map -e 3277 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre')
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/index.html")
# In[24]:
#For Soil 16S
get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/ -m $bactarch_map -e 20000 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre')
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/index.html")
# In[25]:
#For Leaf + Root 16S
get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot_3277.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_LeafAndRoot_3277/ -m $bactarch_map -e 3277 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre')
FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_LeafAndRoot_3277/index.html")
# ### Calculate Alpha Diversity
# In[42]:
#Soil, 20000
get_ipython().system('alpha_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/ -p open_ref_97_otus_EverythingRCFiltered/alpha_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -m $bactarch_map')
#All, 3277
get_ipython().system('alpha_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/ -p open_ref_97_otus_EverythingRCFiltered/alpha_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -m $bactarch_map')
# In[3]:
#Parameter file used above
get_ipython().system('cat open_ref_97_otus_EverythingRCFiltered/alpha_params.txt')
# In[22]:
get_ipython().system('alpha_diversity.py -s')
# ## Statistics in QIIME
# ###Testing for significance of location on community composition. Repeated these in R using adonis for final stats
# In[29]:
#16S All
#unweighted
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/unweighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_UU -n 9999')
#bray
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/bray_curtis_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_BC -n 9999')
#weighted unifrac
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_WU -n 9999')
#16S Soil
#unifrac
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_WU -n 9999')
#unweighted
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/unweighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_UU -n 9999')
#bray
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/bray_curtis_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_BC -n 9999')
#16S Root
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/bdiv_even3277/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Root_WU -n 9999')
#16S Leaf
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/bdiv_even5177/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Leaf_WU -n 9999')
# ###Testing for significance of substrate (leaf, soil or roots) on microbial community composition (redid in R, adonis)
# In[33]:
#16S Data - All (b/c testing if substrate signficant)
#weighted unifrac
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Substrate -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_WU -n 9999')
#unweighted unifrac
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Substrate -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/unweighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_UU -n 9999')
#bray curtis
get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Substrate -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/bray_curtis_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_BC -n 9999')
# ###Testing for signficiant differences in alpha diversity across locations
# In[4]:
#Substrate (sample type differences)
#All, chao1
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/chao1.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_chao1.txt -n 9999 -p bonferroni')
#All, observed otus
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/observed_otus.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_otus.txt -n 9999 -p bonferroni')
#All, shannon
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/shannon.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_shannon.txt -n 9999 -p bonferroni')
#All, simpson
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/simpson.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_simpson.txt -n 9999 -p bonferroni')
#Location (inside, edge, outside)
#All, chao1
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/chao1.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_chao1.txt -n 9999 -p bonferroni')
#All, observed otus
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/observed_otus.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_otus.txt -n 9999 -p bonferroni')
#All, shannon
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/shannon.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_shannon.txt -n 9999 -p bonferroni')
#All, simpson
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/simpson.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_simpson.txt -n 9999 -p bonferroni')
#Location
#Sediment, chao1
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/chao1.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_chao1.txt -n 9999 -p bonferroni')
#Sediment, observed otus
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/observed_otus.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_otus.txt -n 9999 -p bonferroni')
#Sediment, shannon
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/shannon.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_shannon.txt -n 9999 -p bonferroni')
#Sediment, simpson
get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/simpson.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_simpson.txt -n 9999 -p bonferroni')
# ###Testing for significant variation in the frequency of individual OTUs across locations/substrates
# In[12]:
#filter out low abundance otus
#All
get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom --min_count_fraction .001')
#Sediment
get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom --min_count_fraction .001')
# In[13]:
#Stats
#All, Location
get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/group_sig_Location_3277_top99.txt')
#All, Sample type
get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/group_sig_Substrate_3277_top99.txt')
#Sediment, location
get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/group_sig_Location_Soil_20000_top99.txt \\')
# ###Now looking at significant taxa (in terms of Relative Abundance)
# In[3]:
#First summarize taxa & have keep only > 0.1% taxa
#All
get_ipython().system('summarize_taxa.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/ -u 0.01')
#Sediment
get_ipython().system('summarize_taxa.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/ -u 0.01')
# In[9]:
#Stats at Order Level (L4)
#All, Sample Type
get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/Westpoint_NoNC_3277_L4.biom -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/group_sig_substrate_kw_9999_L4.txt --permutations 9999')
#Sediment, location
get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/Westpoint_NoNC_Soil_20000_L4.biom -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/group_sig_location_kw_9999_L4.txt --permutations 9999')
# ###Random Forest Classifier
# In[5]:
#All, cross-validation 10 fold, sample type, order level
get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99_supervised_learning_substrate_cv10/ -m $bactarch_map -c Substrate --ntree 1000 -e cv10 -f')
#All, leave one out, sample type, order level
get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99_supervised_learning_substrate_loo/ -m $bactarch_map -c Substrate --ntree 1000 -e loo -f')
#Sediment, cross-validation 10 fold, location, order level
get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_200000_w_meta_supervised_learning_location/ -m $bactarch_map -c Substrate --ntree 1000 -e cv10 -f')
#Sediment, leave one out, location, order level
get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_200000_w_meta_supervised_learning_location_loo/ -m $bactarch_map -c Substrate --ntree 1000 -e loo -f')
# ### Exporting Biom Tables for Further Analysis
# In[ ]:
#how to convert to json for phyloseq and phinch
get_ipython().system('biom convert -i table.txt -o table.from_txt_json.biom --table-type="OTU table" --to-json')
# In[41]:
#converting biom tables for phyloseq
#All 16S
get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_json.biom --table-type="OTU table" --to-json')
#Soil
get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_json.biom --table-type="OTU table" --to-json')
#Leaf
get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177_json.biom --table-type="OTU table" --to-json')
#Root
get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277_json.biom --table-type="OTU table" --to-json')
# #Picrust
# Picrust only accepts closed-reference OTUS, so this will filter out all open reference OTUS
# In[6]:
get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed.biom --negate_ids_to_exclude -e $reference_tax')
get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed.biom --negate_ids_to_exclude -e $reference_tax')
# In[1]:
get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed_json.biom --table-type="OTU table" --to-json')
get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed_json.biom --table-type="OTU table" --to-json')