#!/usr/bin/env python # coding: utf-8 # #Seagrass Edge Effects Analysis # #### This notebook is for analysis of 16S rRNA PCR libraries produced from DNA extracted from samples collected by Sofie Voerman. Leaves and roots from Zostera marina were sampled as well as sediment at two depths (at less than .5 cm and at 3 cm) from the inside of each seagrass patch, the edge of each patch and from unvegitated sediment from outside of the patch. For the analysis in this notebook, I am using MacQiime/Qiime 1.9.0 http://www.wernerlab.org/software/macqiime

I am also using Ipython, which can be installed via http://ipython.org/ipython-doc/dev/install/install.html
sudo easy_install ipython[all] # Useful Tutorials: # # http://nbviewer.ipython.org/github/biocore/qiime/blob/master/examples/ipynb/illumina_overview_tutorial.ipynb?create=1 # # http://www.wernerlab.org/teaching/qiime/overview # # http://nbviewer.ipython.org/gist/jennomics/c6fe5e113525c6aa8add # # In[6]: from os import chdir, mkdir from os.path import join from IPython.display import FileLinks, FileLink # ### Demultiplex Data # Sequence data was demultiplexed and filtered using an inhouse script available at https://github.com/gjospin/scripts/blob/master/Demul_trim_prep.pl # In[ ]: #The subsequent files containing the merged 16S reads only were concatonated into one file using: get_ipython().system('cat *.M.* > EverythingMerged.fasta.gz') #Then they were unzipped get_ipython().system('gunzip EverythingMerged.fasta.gz') #Then they were reverse complemented (as reads are in the wrong direction relative to the Greengenes/Unite databases) get_ipython().system('adjust_seq_orientation.py -i EverythingMerged.fasta -o EverythingMerged_RC.fasta') # ### Defining Useful Variables # In[7]: #Note: if there are spaces in your path make sure they have a '\' before them so they are recognized #16S sequences and mapping file bactarch_seqs = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/EverythingMerged_RC.fasta" bactarch_map = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/Sofie_only_mapping_w_metadata.txt" #Databases otu_base = "/macqiime/greengenes/gg_13_8_otus/" reference_seqs = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/rep_set/97_otus.fasta") reference_tree = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/trees/97_otus.tree") reference_tax = join(otu_base,"/macqiime/greengenes/gg_13_8_otus/taxonomy/97_otu_taxonomy.txt") # ### Validate Mapping File # In[3]: #checks mapping file for qiime use get_ipython().system('validate_mapping_file.py -m $bactarch_map') # In[ ]: #replaces original mapping file with new corrected file get_ipython().system('mv Sofie_only_mapping_w_metadata_corrected.txt Sofie_only_mapping_w_metadata.txt') # ### Validate Demultiplexing # In[ ]: get_ipython().system('validate_demultiplexed_fasta.py -i $bactarch_seqs -m $bactarch_map') #check the log file generated to see if any duplicate barcodes/sample names are used; mostly this is a sanity check # ### Check for Chimeras # Tutorial: http://qiime.org/tutorials/chimera_checking.html # There are two versions of USEARCH and you will need both in QIIME 1.9.0: USEARCH v5.2.236 and USEARCH 6.1. Name the 5.2.236 executable "usearch" and the 6.1 executable "usearch61" and make sure they're in your path. # http://www.drive5.com/usearch/manual/install.html # # Code to Install: (repeat for usearch)
# sudo mv usearch61 /usr/local/bin/usearch61
# sudo chmod a+x /usr/local/bin/usearch61 # # In[ ]: #identifies chimeric sequences using usearch61 in our bacterial data using the 97% OTU databases as the reference get_ipython().system('identify_chimeric_seqs.py -i $bactarch_seqs -m usearch61 -o qiime_ready_chimeras/ -r $reference_seqs') # In[ ]: #filters out chimeric seqs from our fasta file get_ipython().system('filter_fasta.py -f $bactarch_seqs -o EverythingMerged_RC_Filtered.fasta -s qiime_ready_chimeras/chimeras.txt -n') # ### Redefining Useful Variables # In[3]: #16S sequences after chimera filtering bactarch_seqs = "/Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/EverythingMerged_RC_Filtered.fasta" # ### Picking OTU's # Make sure to install BLAST Legacy, if using (http://www.wernerlab.org/software/macqiime/macqiime-installation/installing-blast-in-os-x) # In[ ]: #Picking Open Reference OTUS for 16S get_ipython().system('pick_open_reference_otus.py -o open_ref_97_otus_EverythingRCFiltered -i $bactarch_seqs -r $reference_seqs -p params.txt -a -O 6 -f') # In[3]: get_ipython().system('cat /Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/params.txt') # Repeat OTU picking above using the 99% OTU reference database or alternate database as is desired. # ### Making & Filtering Biom Tables # In[19]: #summarizes the biom table obtained from running open ref otu picking at 97% with greengenes; sanity check get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/otu_table_mc2_w_tax_no_pynast_failures.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_summary.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/otu_table_summary.txt") # In[ ]: #filters out all the chloroplasts/mitochondria/singletons get_ipython().system('filter_taxa_from_otu_table.py \\') -i open_ref_97_otus_EverythingRCFiltered/otu_table_mc2_w_tax_no_pynast_failures.biom \ -o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks.biom \ -n c__Chloroplast,f__mitochondria get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons.biom -n 2') #filters out anything unable to be assigned at Domain level get_ipython().system('filter_taxa_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom -n Unassigned') # In[20]: #summarizes the biom table obtained above after filtering; sanity check get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom -o open_ref_97_otus_EverythingRCFiltered/otu_summary_filtered.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/otu_summary_filtered.txt") # ### Adding Metadata to biom tables # In[7]: #97% 16S OTUs get_ipython().system('biom add-metadata -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned.biom -o open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom -m $bactarch_map') # ### Split Edge Effects Data from Harbor Data in Biom Table # The data analyzed so far in this Ipython notebook is for two projects - a project on edge effects and a project on distance from the harbor. Here we seperate the Edge Effects data from the biom table for further analysis. (NOTE: I could have analyzed these two datasets seperately from the beginning if I desired) # In[5]: #97% 16S OTUS #Edge Effects samples get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint.biom -m $bactarch_map --sample_id_fp open_ref_97_otus_EverythingRCFiltered/WestpointBacKEEP.txt') #Harbor samples get_ipython().system("filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/otu_table_no_euks_no_singletons_no_unassigned_w_metadata.biom -o open_ref_97_otus_EverythingRCFiltered/Harbor.biom -m $bactarch_map -s 'CN_ratio:NA'") # In[45]: #sanity check get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint.txt") # In[2]: #List of Sample IDs to keep (these are the sample IDs for the Edge Effects Data) #Westpoint == Edge Effects get_ipython().system('cat /Users/Cassie/Dropbox/Seagrass/EdgeAnalysis/open_ref_97_otus_EverythingRCFiltered/WestpointBacKEEP.txt') # ### Remove the Negative Control # For 16S, the ratio of OTUS for negative control:smallest sample is approx 1:10 after filtering and 1:50 before filtering. The number one contaminant is chloroplast DNA - since the negative control had no plant material in it, this indicates that some spillover occured between the negative control and the rest of my samples. I looked at the negative control in more detail (steps not included in this Ipython Notebook) and ultimately decided to simply remove the negative control sample from downstream analysis. # In[12]: #16S Greengenes 97% OTUs get_ipython().system('filter_samples_from_otu_table.py \\') -i open_ref_97_otus_EverythingRCFiltered/Westpoint.biom \ -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom \ -m $bactarch_map \ -s "phinchID:*,!Negative_control" # ### Rarifying Biom Tables # In[11]: #Investigating levels of rarification - Bacteria get_ipython().system('alpha_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -n 25 -o open_ref_97_otus_EverythingRCFiltered/arare_WestpointNONC -m $bactarch_map -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -f') FileLink("open_ref_97_otus_EverythingRCFiltered/arare_WestpointNONC/alpha_rarefaction_plots/rarefaction_plots.html") # In[5]: #Investigating how rarification to smallest sample size would effect PCoA plots get_ipython().system('jackknifed_beta_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/jacknifed_betadiv_3277/ -e 3277 -m $bactarch_map -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -f') # In[12]: #16S Greengenes 97% OTU Data; Minimum OTUs = 3277 from biom table summary so rarify to 3277 get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -d 3277') # In[8]: #sanity check 16S data get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.txt") # ### Splitting of Unrarified Biom Table By Substrate (Root, Leaf or Soil) # In[4]: #Leaf get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom -m $bactarch_map -s "Substrate:leaf"') #Root get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom -m $bactarch_map -s "Substrate:root"') #Soil get_ipython().system('filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom -m $bactarch_map -s "Substrate:soil"') # In[10]: #sanity check - Leaf get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom -o open_ref_97_otus_EverythingRCFiltered/Leaf.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/Leaf.txt") # In[ ]: #sanity check - Root get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom -o open_ref_97_otus_EverythingRCFiltered/Root.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/Root.txt") # In[22]: #sanity check - Soil get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom -o open_ref_97_otus_EverythingRCFiltered/Soil.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/Soil.txt") # In[31]: #Leaf & Root (sanity check to see if soil artificially pulls leaf and root apart) get_ipython().system("filter_samples_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom -m $bactarch_map -s 'Substrate:*,!soil'") # In[32]: get_ipython().system('biom summarize-table -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.txt') FileLink("open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.txt") # ### Rarification of Root, Leaf and Soil Biom Tables # In[7]: #16S Data: Rarify each substrate biom table by minimum OTU in each table #Leaf: Min #OTU = 5177 get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom -d 5177') #Root: Min #OTU = 3277 get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom -d 3277') #Soil: Min #OTU = 23059; but rarify to 20000 get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -d 20000') # In[8]: #Leaf + Root #OTU = 3277 get_ipython().system('single_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot_3277.biom -d 3277') # ### Diversity Analyses # For exploratory analysis - used core_diversity_analyses # In[26]: #For ALL 16S Data get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/ -m $bactarch_map -e 3277 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre') FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/index.html") # In[27]: #For Leaf 16S get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/ -m $bactarch_map -e 5177 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre') FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/index.html") # In[28]: #For Root 16S get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/ -m $bactarch_map -e 3277 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre') FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/index.html") # In[24]: #For Soil 16S get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/ -m $bactarch_map -e 20000 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre') FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/index.html") # In[25]: #For Leaf + Root 16S get_ipython().system('core_diversity_analyses.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_LeafAndRoot_3277.biom -o open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_LeafAndRoot_3277/ -m $bactarch_map -e 3277 -p open_ref_97_otus_EverythingRCFiltered/betadiv_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre') FileLink("open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_LeafAndRoot_3277/index.html") # ### Calculate Alpha Diversity # In[42]: #Soil, 20000 get_ipython().system('alpha_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/ -p open_ref_97_otus_EverythingRCFiltered/alpha_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -m $bactarch_map') #All, 3277 get_ipython().system('alpha_rarefaction.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/ -p open_ref_97_otus_EverythingRCFiltered/alpha_params.txt -t open_ref_97_otus_EverythingRCFiltered/rep_set.tre -m $bactarch_map') # In[3]: #Parameter file used above get_ipython().system('cat open_ref_97_otus_EverythingRCFiltered/alpha_params.txt') # In[22]: get_ipython().system('alpha_diversity.py -s') # ## Statistics in QIIME # ###Testing for significance of location on community composition. Repeated these in R using adonis for final stats # In[29]: #16S All #unweighted get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/unweighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_UU -n 9999') #bray get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/bray_curtis_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_BC -n 9999') #weighted unifrac get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_WU -n 9999') #16S Soil #unifrac get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_WU -n 9999') #unweighted get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/unweighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_UU -n 9999') #bray get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Soil_20000/bdiv_even20000/bray_curtis_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Soil_BC -n 9999') #16S Root get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Root/bdiv_even3277/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Root_WU -n 9999') #16S Leaf get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Location -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint_Leaf/bdiv_even5177/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Location_Leaf_WU -n 9999') # ###Testing for significance of substrate (leaf, soil or roots) on microbial community composition (redid in R, adonis) # In[33]: #16S Data - All (b/c testing if substrate signficant) #weighted unifrac get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Substrate -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/weighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_WU -n 9999') #unweighted unifrac get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Substrate -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/unweighted_unifrac_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_UU -n 9999') #bray curtis get_ipython().system('compare_categories.py --method permanova -m $bactarch_map -c Substrate -i open_ref_97_otus_EverythingRCFiltered/core_diversity_analyses_Westpoint/bdiv_even3277/bray_curtis_dm.txt -o open_ref_97_otus_EverythingRCFiltered/permanova_Substrate_BC -n 9999') # ###Testing for signficiant differences in alpha diversity across locations # In[4]: #Substrate (sample type differences) #All, chao1 get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/chao1.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_chao1.txt -n 9999 -p bonferroni') #All, observed otus get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/observed_otus.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_otus.txt -n 9999 -p bonferroni') #All, shannon get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/shannon.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_shannon.txt -n 9999 -p bonferroni') #All, simpson get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/simpson.txt -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_3277_simpson.txt -n 9999 -p bonferroni') #Location (inside, edge, outside) #All, chao1 get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/chao1.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_chao1.txt -n 9999 -p bonferroni') #All, observed otus get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/observed_otus.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_otus.txt -n 9999 -p bonferroni') #All, shannon get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/shannon.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_shannon.txt -n 9999 -p bonferroni') #All, simpson get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_AlphaDiv/alpha_div_collated/simpson.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_location_westpoint_nonc_3277_simpson.txt -n 9999 -p bonferroni') #Location #Sediment, chao1 get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/chao1.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_chao1.txt -n 9999 -p bonferroni') #Sediment, observed otus get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/observed_otus.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_otus.txt -n 9999 -p bonferroni') #Sediment, shannon get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/shannon.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_shannon.txt -n 9999 -p bonferroni') #Sediment, simpson get_ipython().system('compare_alpha_diversity.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_AlphaDiv/alpha_div_collated/simpson.txt -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/compare_alpha_div_substrate_westpoint_nonc_soil_20000_simpson.txt -n 9999 -p bonferroni') # ###Testing for significant variation in the frequency of individual OTUs across locations/substrates # In[12]: #filter out low abundance otus #All get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom --min_count_fraction .001') #Sediment get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom --min_count_fraction .001') # In[13]: #Stats #All, Location get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/group_sig_Location_3277_top99.txt') #All, Sample type get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/group_sig_Substrate_3277_top99.txt') #Sediment, location get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/group_sig_Location_Soil_20000_top99.txt \\') # ###Now looking at significant taxa (in terms of Relative Abundance) # In[3]: #First summarize taxa & have keep only > 0.1% taxa #All get_ipython().system('summarize_taxa.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/ -u 0.01') #Sediment get_ipython().system('summarize_taxa.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/ -u 0.01') # In[9]: #Stats at Order Level (L4) #All, Sample Type get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/Westpoint_NoNC_3277_L4.biom -m $bactarch_map -c Substrate -o open_ref_97_otus_EverythingRCFiltered/taxa_Westpoint_NoNC_3277_top99/group_sig_substrate_kw_9999_L4.txt --permutations 9999') #Sediment, location get_ipython().system('group_significance.py -i open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/Westpoint_NoNC_Soil_20000_L4.biom -m $bactarch_map -c Location -o open_ref_97_otus_EverythingRCFiltered/Taxa_Soil_20000_top99/group_sig_location_kw_9999_L4.txt --permutations 9999') # ###Random Forest Classifier # In[5]: #All, cross-validation 10 fold, sample type, order level get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99_supervised_learning_substrate_cv10/ -m $bactarch_map -c Substrate --ntree 1000 -e cv10 -f') #All, leave one out, sample type, order level get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json_top99_supervised_learning_substrate_loo/ -m $bactarch_map -c Substrate --ntree 1000 -e loo -f') #Sediment, cross-validation 10 fold, location, order level get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_200000_w_meta_supervised_learning_location/ -m $bactarch_map -c Substrate --ntree 1000 -e cv10 -f') #Sediment, leave one out, location, order level get_ipython().system('supervised_learning.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_top99.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_200000_w_meta_supervised_learning_location_loo/ -m $bactarch_map -c Substrate --ntree 1000 -e loo -f') # ### Exporting Biom Tables for Further Analysis # In[ ]: #how to convert to json for phyloseq and phinch get_ipython().system('biom convert -i table.txt -o table.from_txt_json.biom --table-type="OTU table" --to-json') # In[41]: #converting biom tables for phyloseq #All 16S get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_json.biom --table-type="OTU table" --to-json') #Soil get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_json.biom --table-type="OTU table" --to-json') #Leaf get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Leaf_5177_json.biom --table-type="OTU table" --to-json') #Root get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Root_3277_json.biom --table-type="OTU table" --to-json') # #Picrust # Picrust only accepts closed-reference OTUS, so this will filter out all open reference OTUS # In[6]: get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed.biom --negate_ids_to_exclude -e $reference_tax') get_ipython().system('filter_otus_from_otu_table.py -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_json.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed.biom --negate_ids_to_exclude -e $reference_tax') # In[1]: get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_3277_w_meta_closed_json.biom --table-type="OTU table" --to-json') get_ipython().system('biom convert -i open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed.biom -o open_ref_97_otus_EverythingRCFiltered/Westpoint_NoNC_Soil_20000_w_meta_closed_json.biom --table-type="OTU table" --to-json')