The aim of this notebook is to show to create an ISA document for depositing Stable Isotope Resolved Metabolomics Study metadata using the ISA API.
This notebook highlights key steps of the deposition, including:
Stable Isotope Resolved Metabolomics Studies are a type of studies using MS and NMR acquisition techniques to decypher biochemical reactions using tracer molecule
, i.e. molecules for which certain positions carry an isotope (e.g. 13C, 15N). Specific data acquisition and data processing techniques are required and dedicated software is used to make sense of the data. Software such as IsoSolve
[1], Ramid
[2](for primary processing of 13C mass isotopomer data obtained with GCMS) or midcor
[3] (for natural abundance correction processes on13C mass isotopomers spectra), may be used to accomplish those tasks. The output of such tools are tables which may comply with a new specifications devised to better support the reporting of SIRM study results.
from isatools.model import (
Comment,
Investigation,
Study,
StudyFactor,
FactorValue,
OntologyAnnotation,
Characteristic,
OntologySource,
Material,
Sample,
Source,
Protocol,
ProtocolParameter,
ProtocolComponent,
ParameterValue,
Process,
Publication,
Person,
Assay,
DataFile,
plink
)
import datetime
import os
investigation = Investigation()
chebi=OntologySource(name="CHEBI",description="Chemical Entity of Biological Interest")
efo=OntologySource(name="EFO", description="Experimental Factor Ontology")
msio=OntologySource(name="MSIO", description="Metabolomics Standards Initiative Ontology")
obi = OntologySource(name='OBI', description="Ontology for Biomedical Investigations")
pato = OntologySource(name='PATO', description="Phenotype and Trait Ontology")
ncbitaxon = OntologySource(name="NCIBTaxon", description="NCBI Taxonomy")
investigation.ontology_source_references=[chebi,efo,obi,pato,ncbitaxon]
study = Study(filename="s_13C-SIRM-study.txt")
study.identifier = "MTBLS-XXXX-SIRM"
study.title = "[U-13C6]-D-glucose labeling experiment in MCF7 cancer cell line"
study.description = "Probing cancer pathways of MCF7 cell line using 13C stable isotope resolved metabolomics study using isotopologue distribution analysis with mass spectrometry and isotopomer analysis by 1D 1H NMR."
study.submission_date = "15/08/2021"
study.public_release_date = "15/08/2021"
# These EMBL-EBI Metabolights (MTBLS) related ISA Comments fields may be used for deposition to EMBL-EBI
src_comment_mtbls1 = Comment(name="MTBLS Broker Name", value="OXFORD")
src_comment_mtbls2 = Comment(name="MTBLS Center Name", value="OXFORD")
src_comment_mtbls3 = Comment(name="MTBLS Center Project Name", value="OXFORD")
src_comment_mtbls4 = Comment(name="MTBLS Lab Name", value="Oxford e-Research Centre")
src_comment_mtbls5 = Comment(name="MTBLS Submission Action", value="ADD")
study.comments.append(src_comment_mtbls1)
study.comments.append(src_comment_mtbls2)
study.comments.append(src_comment_mtbls3)
study.comments.append(src_comment_mtbls4)
study.comments.append(src_comment_mtbls5)
# These ISA Comments are optional and may be used to report funding information
src_comment_st1 = Comment(name="Study Funding Agency", value="")
src_comment_st2 = Comment(name="Study Grant Number", value="")
study.comments.append(src_comment_st1)
study.comments.append(src_comment_st2)
# Adding a Study Design descriptor to the ISA Study object
intervention_design = OntologyAnnotation(term_source=obi)
intervention_design.term = "intervention design"
intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
study_design = OntologyAnnotation(term_source=msio)
study_design.term = "stable isotope resolved metabolomics study"
study_design.term_accession = "http://purl.obolibrary.org/obo/MSIO_0000096"
study.design_descriptors.append(intervention_design)
study.design_descriptors.append(study_design)
# Declaring the Study Factors
study.factors = [
StudyFactor(name="compound",factor_type=OntologyAnnotation(term="chemical substance",
term_accession="http://purl.obolibrary.org/obo/CHEBI_59999",
term_source=chebi)),
StudyFactor(name="dose",factor_type=OntologyAnnotation(term="dose", term_accession="http://www.ebi.ac.uk/efo/EFO_0000428",term_source=efo)),
StudyFactor(name="duration",factor_type=OntologyAnnotation(term="time", term_accession="http://purl.obolibrary.org/obo/PATO_0000165", term_source=pato))
]
# Associating the levels to each of the Study Factor.
fv1 = FactorValue(factor_name=study.factors[0], value=OntologyAnnotation(term="dioxygen"))
fv2 = FactorValue(factor_name=study.factors[1], value=OntologyAnnotation(term="high"))
fv3 = FactorValue(factor_name=study.factors[1], value=OntologyAnnotation(term="normal"))
fv4 = FactorValue(factor_name=study.factors[2], value=OntologyAnnotation(term="hour"))
study.publications = [
Publication(doi="10.1371/journal.pone.0000000",pubmed_id="",
title="Decyphering new cancer pathways with stable isotope resolved metabolomics in MCF7 cell lines",
status=OntologyAnnotation(term="indexed in PubMed"),
author_list="Min,W. and Everest H"),
]
study.contacts = [
Person(first_name="Weng", last_name="Min", affiliation="Beijing Institute of Metabolism", email="weng.min@bim.edu.cn",
address="Prospect Street, Beijing, People's Republic of China",
comments=[Comment(name="Study Person REF", value="")],
roles=[OntologyAnnotation(term="principal investigator role"),
OntologyAnnotation(term="SRA Inform On Status"),
OntologyAnnotation(term="SRA Inform On Error")]
),
Person(first_name="Hillary", last_name="Everest", affiliation="Centre for Cell Metabolism",
address="CCM, Edinborough, United Kingdom",
comments=[Comment(name="Study Person REF", value="")],
roles=[OntologyAnnotation(term="principal investigator role")]
)
]
study.protocols = [
#Protocol #0
Protocol(name="cell culture and isotopic labeling",
description="SOP for growing MCF7 cells and incubating them with the tracer molecule",
protocol_type=OntologyAnnotation(term="sample collection"),
parameters=[
ProtocolParameter(parameter_name=OntologyAnnotation(term="tracer molecule"))
]
),
#Protocol #1
Protocol(
name="intracellular metabolite extraction",
description="SOP for extracting metabolites from harvested cells",
protocol_type=OntologyAnnotation(term="metabolite extraction")
),
#Protocol #2
Protocol(
name="extracellular metabolite extraction",
description="SOP for extracting metabolites from cell culture supernatant",
protocol_type=OntologyAnnotation(term="metabolite extraction")
),
#Protocol #3
Protocol(
name="liquid chromatography mass spectrometry",
description="SOP for LC-MS data acquisition",
protocol_type=OntologyAnnotation(term="mass spectrometry"),
parameters=[
ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography column")),
ProtocolParameter(parameter_name=OntologyAnnotation(term="mass spectrometry instrument")),
ProtocolParameter(parameter_name=OntologyAnnotation(term="mass analyzer"))
]
),
#Protocol #4
Protocol(
name="1D 13C NMR spectroscopy for isotopomer analysis",
description="SOP for 1D 13C NMR data acquisition for isotopomer analysis",
protocol_type=OntologyAnnotation(term="nmr spectroscopy"),
parameters=[
ProtocolParameter(parameter_name=OntologyAnnotation(term="magnetic field strength")),
ProtocolParameter(parameter_name=OntologyAnnotation(term="nmr tube")),
ProtocolParameter(parameter_name=OntologyAnnotation(term="pulse sequence"))
]
),
#Protocol #5
Protocol(
name="1D 13C NMR spectroscopy for metabolite profiling",
description="SOP for 1D 13C NMR data acquisition for metabolite profiling",
protocol_type=OntologyAnnotation(term="nmr spectroscopy"),
parameters=[
ProtocolParameter(parameter_name=OntologyAnnotation(term="magnetic field strength")),
ProtocolParameter(parameter_name=OntologyAnnotation(term="nmr tube")),
ProtocolParameter(parameter_name=OntologyAnnotation(term="pulse sequence"))
]
),
#Protocol #6
Protocol(
name="MS metabolite identification",
description="SOP for MS signal processing and metabolite and isotopologue identification",
protocol_type=OntologyAnnotation(term="metabolite identification"),
parameters=[
ProtocolParameter(parameter_name=OntologyAnnotation(term="ms software"))
]
),
#Protocol #7
Protocol(
name="NMR metabolite identification",
description="SOP for NMR signal processing and metabolite and isotopomer identification",
uri="https://doi.org/10.1021/acs.analchem.1c01064",
protocol_type=OntologyAnnotation(term="data transformation"),
parameters=[
ProtocolParameter(parameter_name=OntologyAnnotation(term="nmr software"))
]
)
]
In this fictional study, we assume the following underlying experimental setup:
# Creating the ISA Source Materials
study.sources = [Source(name="culture-1"), Source(name="culture-2")]
characteristic_organism = Characteristic(category=OntologyAnnotation(term="Organism"),
value=OntologyAnnotation(term="Homo sapiens", term_source=ncbitaxon,
term_accession="http://purl.obolibrary.org/obo/NCBITaxon_9606"))
characteristic_cell = Characteristic(category=OntologyAnnotation(term="cell line"),
value=OntologyAnnotation(term="MCF-7", term_source="",
term_accession=""))
for i in range(len(study.sources)):
study.sources[i].characteristics.append(characteristic_organism)
study.sources[i].characteristics.append(characteristic_cell)
# Note how the treatment groups are defined as sets of factor values attached to the ISA.Sample object
treatment_1 = [fv1,fv2,fv4]
treatment_2 = [fv1,fv3,fv4]
# Ensuring the Tracer Molecule(s) used for the SIRM study is properly reported
tracer_mol_C = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="tracer molecule")),
value=OntologyAnnotation(term="80% [1-13C1]-D-glucose + 20% [U-13C6]-D-glucose"))
tracers = [tracer_mol_C]
# the number of samples collected from each culture condition
replicates = 4
# Now creating a Process showing a `Protocol Application` using Source as input and producing Sample as output.
for k in range(replicates):
study.samples.append(Sample(name=(study.sources[0].name + "-sample-" + str(k)), factor_values=treatment_1))
study.samples.append(Sample(name=(study.sources[1].name + "-sample-" + str(k)), factor_values=treatment_2))
study.process_sequence.append(Process(executes_protocol=study.protocols[0], # a sample collection
inputs=[study.sources[0]],
outputs=[study.samples[0],study.samples[2],study.samples[4],study.samples[6]],
parameter_values= [tracer_mol_C]))
study.process_sequence.append(Process(executes_protocol=study.protocols[0], # a sample collection
inputs=[study.sources[1]],
outputs=[study.samples[1],study.samples[3],study.samples[5],study.samples[7]],
parameter_values= [tracer_mol_C]))
# Now appending the ISA Study object to the ISA Investigation object
investigation.studies = [study]
# Starting by declaring the 2 types of assays used in BII-S-3 as coded with ISAcreator tool
assay = Assay(filename="a_isotopologue-ms-assay.txt")
assay.measurement_type = OntologyAnnotation(term="isotopologue distribution analysis",term_accession="http://purl.obolibrary.org/obo/msio.owl#mass_isotopologue_distribution_analysis", term_source=msio)
assay.technology_type = OntologyAnnotation(term="mass spectrometry", term_accession="http://purl.obolibrary.org/obo/CHMO_0000470", term_source=msio)
assay_nmr_topo = Assay(filename="a_isotopomer-nmr-assay.txt")
assay_nmr_topo.measurement_type = OntologyAnnotation(term="isotopomer analysis",term_accession="http://purl.obolibrary.org/obo/msio.owl#isotopomer_analysis", term_source=msio)
assay_nmr_topo.technology_type = OntologyAnnotation(term="NMR spectroscopy",term_accession="http://purl.obolibrary.org/obo/CHMO_0000591", term_source=msio)
assay_nmr_metpro = Assay(filename="a_metabolite-profiling-nmr-assay.txt")
assay_nmr_metpro.measurement_type = OntologyAnnotation(term="untargeted metabolite profiling",term_accession="http://purl.obolibrary.org/obo/MSIO_0000101", term_source=msio)
assay_nmr_metpro.technology_type = OntologyAnnotation(term="NMR spectroscopy",term_accession="http://purl.obolibrary.org/obo/CHMO_0000591", term_source=msio)
Warning
technology type
OntologyAnnotation.term is left emptyfor i, sample in enumerate(study.samples):
# create an extraction process that executes the extraction protocol
extraction_process = Process(executes_protocol=study.protocols[1])
# extraction process takes as input a sample, and produces an extract material as output
char_ext = Characteristic(category=OntologyAnnotation(term="Material Type"),
value=OntologyAnnotation(term="pellet"))
char_ext1 = Characteristic(category=OntologyAnnotation(term="quantity"),
value=40, unit=OntologyAnnotation(term="mg"))
extraction_process.inputs.append(sample)
ms_material = Material(name="extract-ms-{}".format(i))
ms_material.type = "Extract Name"
ms_material.characteristics.append(char_ext)
ms_material.characteristics.append(char_ext1)
extraction_process.outputs.append(ms_material)
# create a ms acquisition process that executes the ms acquisition protocol
column = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="chromatography column")),
value=OntologyAnnotation(term="Agilent C18 TTX"))
ms_inst = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="mass spectrometry instrument")),
value=OntologyAnnotation(term="Agilent QTOF XL"))
ms_anlzr = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="mass analyzer")),
value=OntologyAnnotation(term="Agilent MassDiscovery"))
isotopologue_process = Process(executes_protocol=study.protocols[3], parameter_values=[column, ms_inst, ms_anlzr] )
isotopologue_process.name = "assay-name-ms-{}".format(i)
isotopologue_process.inputs.append(extraction_process.outputs[0])
# ms acquisition process usually has an output mzml data file
datafile = DataFile(filename="ms-data-{}.mzml".format(i), label="Spectral Raw Data File")
data_comment = Comment(name="data_comment",value="data_value")
datafile.comments.append(data_comment)
isotopologue_process.outputs.append(datafile)
# Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set
# these links for you. It is found in the isatools.model package
assay.samples.append(sample)
assay.other_material.append(ms_material)
assay.data_files.append(datafile)
assay.process_sequence.append(extraction_process)
assay.process_sequence.append(isotopologue_process)
ms_sw = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="ms software")),
value=OntologyAnnotation(term="IsoSolve"))
ms_da_process = Process(executes_protocol=study.protocols[6], parameter_values=[ms_sw])
ms_da_process.name = "MS-DT-ident"
ms_da_process.inputs.append(datafile)
ms_da_process.outputs.append(DataFile(filename="isotopologue-distribution-analysis.txt", label="Derived Data File"))
assay.process_sequence.append(ms_da_process)
# create an extraction process that executes the extraction protocol
# plink(aliquoting_process, sequencing_process)
plink(extraction_process, isotopologue_process)
plink(isotopologue_process, ms_da_process)
# make sure the extract, data file, and the processes are attached to the assay
NOTE
make sure to used ISA API plink function
to connects the protocols in a chain.
for i, sample in enumerate(study.samples):
extraction_process_nmr = Process(executes_protocol=study.protocols[1])
# extraction process takes as input a sample, and produces an extract material as output
extraction_process_nmr.inputs.append(sample)
material_nmr = Material(name="extract-nmr-topo-{}".format(i))
material_nmr.type = "Extract Name"
extraction_process_nmr.outputs.append(material_nmr)
# create a nmr acquisition process that executes the nmr protocol
magnet = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="magnetic field strength")),
value=6, unit=OntologyAnnotation(term="Tesla"))
tube = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="nmr tube")),
value=OntologyAnnotation(term="Brucker 14 mm Oscar"))
pulse_a = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="pulse sequence")),
value=OntologyAnnotation(term="HSQC"))
pulse_b = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="pulse sequence")),
value=OntologyAnnotation(term="ZQF-TOCSY"))
pulse_c = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="pulse sequence")),
value=OntologyAnnotation(term="HNCA"))
pulse_d = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="pulse sequence")),
value=OntologyAnnotation(term="HACO-DIPSY"))
pulses=[pulse_a,pulse_b,pulse_c,pulse_d]
for j in range(len(pulses)):
isotopomer_process = Process(executes_protocol=study.protocols[4],parameter_values=[magnet,tube,pulses[j]])
isotopomer_process.name = "assay-name-nmr-topo-"+ pulses[j].value.term +"-{}".format(i+1)
isotopomer_process.inputs.append(extraction_process_nmr.outputs[0])
# Sequencing process usually has an output data file
datafile_nmr = DataFile(filename="nmr-data-topo"+ pulses[j].value.term +"-{}.nmrml".format(i+1), label="Free Induction Decay File")
isotopomer_process.outputs.append(datafile_nmr)
# Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set
# these links for you. It is found in the isatools.model package
assay_nmr_topo.samples.append(sample)
assay_nmr_topo.other_material.append(material_nmr)
assay_nmr_topo.data_files.append(datafile_nmr)
assay_nmr_topo.process_sequence.append(extraction_process_nmr)
assay_nmr_topo.process_sequence.append(isotopomer_process)
nmr_sw = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="nmr software")),
value=OntologyAnnotation(term="https://pypi.org/project/IsoSolve"))
nmr_topo_da_process = Process(executes_protocol=study.protocols[7], parameter_values=[nmr_sw])
nmr_topo_da_process.name = "NMR-TOPO-DT-ident"
nmr_topo_da_process.inputs.append(datafile)
nmr_topo_da_process.outputs.append(DataFile(filename="isotopomer-analysis.txt", label="Derived Data File"))
plink(extraction_process_nmr, isotopomer_process)
plink(isotopomer_process, nmr_topo_da_process)
# make sure the extract, data file, and the processes are attached to the assay
for i, sample in enumerate(study.samples):
extraction_process_nmr_metpro = Process(executes_protocol=study.protocols[1])
# extraction process takes as input a sample, and produces an extract material as output
extraction_process_nmr_metpro.inputs.append(sample)
material_nmr_metpro = Material(name="extract-nmr-metpro-{}".format(i))
material_nmr_metpro.type = "Extract Name"
extraction_process_nmr_metpro.outputs.append(material_nmr_metpro)
# create a nmr acquisition process that executes the nmr protocol
magnet = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="magnetic field strength")),
value=6, unit=OntologyAnnotation(term="Tesla"))
tube = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="nmr tube")),
value=OntologyAnnotation(term="Brucker 14 mm Oscar"))
pulse_a = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="pulse sequence")),
value=OntologyAnnotation(term="CPMG"))
pulses=[pulse_a]
for j in range(len(pulses)):
metpro_process = Process(executes_protocol=study.protocols[5],parameter_values=[magnet,tube,pulses[j]])
metpro_process.name = "assay-name-nmr-metpro-"+ pulses[j].value.term +"-{}".format(i+1)
metpro_process.inputs.append(extraction_process_nmr_metpro.outputs[0])
# a Data acquisition process usually has an output data file
datafile_nmr_metpro = DataFile(filename="nmr-data-metpro"+ pulses[j].value.term +"-{}.nmrml".format(i+1), label="Free Induction Decay File")
metpro_process.outputs.append(datafile_nmr)
# Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set
# these links for you. It is found in the isatools.model package
assay_nmr_metpro.samples.append(sample)
assay_nmr_metpro.other_material.append(material_nmr_metpro)
assay_nmr_metpro.data_files.append(datafile_nmr_metpro)
assay_nmr_metpro.process_sequence.append(extraction_process_nmr_metpro)
assay_nmr_metpro.process_sequence.append(metpro_process)
nmr_sw = ParameterValue(category=ProtocolParameter(parameter_name=OntologyAnnotation(term="nmr software")),
value=OntologyAnnotation(term="Batman"))
nmr_da_process = Process(executes_protocol=study.protocols[7], parameter_values=[nmr_sw])
nmr_da_process.name = "NMR-metpro-DT-ident"
nmr_da_process.inputs.append(datafile_nmr_metpro)
nmr_da_process.outputs.append(DataFile(filename="metpro-analysis.txt", label="Derived Data File"))
plink(extraction_process_nmr_metpro, metpro_process)
plink(metpro_process, nmr_da_process)
# make sure the extract, data file, and the processes are attached to the assay
study.assays.append(assay)
study.assays.append(assay_nmr_topo)
study.assays.append(assay_nmr_metpro)
#Protocol #*
workflow_ref =Protocol(
name="13C SIRM MS and NMR integrative analysis",
description="a workflow for integrating data from NMR and MS acquisition into a consolidated result",
uri="https://doi.org/10.1021/acs.analchem.1c01064",
protocol_type=OntologyAnnotation(term="data transformation"),
parameters=[
ProtocolParameter(parameter_name=OntologyAnnotation(term="software"))
])
study.protocols.append(workflow_ref)
dump
function¶from isatools.isatab import dump
# note the use of the flag for explicit serialization on factor values on assay tables
dump(investigation, "./output/MTBLS-XXXX-SIRM/", write_factor_values_in_assay_table=True)
validate
function¶from isatools import isatab
my_json_report_isa_flux = isatab.validate(open(os.path.join("./output/MTBLS-XXXX-SIRM/","i_investigation.txt")))
my_json_report_isa_flux["errors"]
NOTE: The error report indicates the need to add new configurations files matching the assay definitions.
from isatools.isatab import load
with open(os.path.join("./output/MTBLS-XXXX-SIRM/","i_investigation.txt")) as isa_sirm_test:
roundtrip = load(isa_sirm_test)
from isatools.convert import isatab2json
from isatools import isajson
import json
isa_json = isatab2json.convert('./output/MTBLS-XXXX-SIRM/', validate_first=False, use_new_parser=True)
isa_j = json.dumps(
isa_json, cls=isajson.ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': ')
)
with open(os.path.join('./output/MTBLS-XXXX-SIRM/', 'isa-sirm-test.json'), 'w') as out_fp:
out_fp.write(isa_j)
# note the use of the flag for explicit serialization on factor values on assay tables
# dump(roundtrip, "./notebook-output/MTBLS-0000-SIRM-roundtrip/", write_factor_values_in_assay_table=True)