f = open('test.biom','w') f.write("""{ "columns": [ { "id": "Sample1", "metadata": { "BarcodeSequence": "AGCACGAGCCTA", "DOB": 20060805 } }, { "id": "Sample2", "metadata": { "BarcodeSequence": "AACTCGTCGATG", "DOB": 20060216 } }, { "id": "Sample3", "metadata": { "BarcodeSequence": "ACAGACCACTCA", "DOB": 20060109 } }, { "id": "Sample4", "metadata": { "BarcodeSequence": "ACCAGCGACTAG", "DOB": 20070530 } }, { "id": "Sample5", "metadata": { "BarcodeSequence": "AGCAGCACTTGT", "DOB": 20070101 } }, { "id": "Sample6", "metadata": { "BarcodeSequence": "AGCAGCACAACT", "DOB": 20070716 } } ], "data": [ [0, 2, 1.0], [1, 0, 5.0], [1, 1, 1.0], [1, 3, 2.0], [1, 4, 3.0], [1, 5, 1.0], [2, 2, 1.0], [2, 3, 4.0], [2, 5, 2.0], [3, 0, 2.0], [3, 1, 1.0], [3, 2, 1.0], [3, 5, 1.0], [4, 1, 1.0], [4, 2, 1.0] ], "date": "2012-12-11T07:30:29.870689", "format": "Biological Observation Matrix 1.0.0", "format_url": "http://biom-format.org", "generated_by": "some software package", "id": null, "matrix_element_type": "float", "matrix_type": "sparse", "rows": [ { "id": "GG_OTU_1", "metadata": { "confidence": 0.665, "taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__Lachnospiraceae"] } }, { "id": "GG_OTU_2", "metadata": { "confidence": 0.98, "taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__OnlyOnce1"] } }, { "id": "GG_OTU_3", "metadata": { "confidence": 1.0, "taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__Lachnospiraceae"] } }, { "id": "GG_OTU_4", "metadata": { "confidence": 0.842, "taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__Lachnospiraceae"] } }, { "id": "GG_OTU_5", "metadata": { "confidence": 1.0, "taxonomy": ["Root", "k__Bacteria", "p__Firmicutes", "c__Clostridia", "o__Clostridiales", "f__OnlyOnce2"] } } ], "shape": [5, 6], "type": "OTU table" }""") f.close() from biom import load_table t = load_table('test.biom') for e in t.observation_metadata: print e['taxonomy'] def collapse_on_family(id_, md): return ';'.join(md['taxonomy'][:6]) t_biom_collapsed = t.collapse(collapse_on_family, axis='observation') print t_biom_collapsed !summarize_taxa.py -i test.biom -o summarize_taxa_out/ t_qiime_collapsed = load_table('summarize_taxa_out/test_L6.biom') print t_qiime_collapsed collapse_f = lambda id_, md: md['taxonomy'][5] alt_t_biom_collapsed = t.collapse(collapse_f, axis='observation') print alt_t_biom_collapsed collapse_f = lambda id_, md: md['taxonomy'][5] alt_t_biom_collapsed = t.collapse(collapse_f, axis='observation', min_group_size=1) print alt_t_biom_collapsed !print_qiime_config.py