This example shows how to use the ProtocolProcessSequence to build an ISA graph with Node merging(pooling) events. The notebook shows 2 examples:
The notebooks shows how to serialize (write) the ISA Model content to ISA-Tab and ISA-JSON formats.
from isatools.model import *
from isatools.create.model import *
import datetime
# creating an ISA.Investigation object
investigation = Investigation()
# creating an ISA.Study object
study = Study(filename="s_study.txt")
study.identifier = "S1"
study.title = "ISA Study example: creating sample pools"
study.description = "a jupytern notebook showing how to create pooled samples (a node merging event with material nodes)"
# creating the necessary ISA.Protocol objects
study.protocols = [Protocol(name="sample collection",protocol_type="pooling"),
Protocol(name="intracellular fraction extraction",
protocol_type=OntologyAnnotation(term="extraction"),
parameters=[ProtocolParameter(parameter_name=OntologyAnnotation(term="concentration")),
ProtocolParameter(parameter_name=OntologyAnnotation(term="sample QC"))]),
Protocol(name="data collection",
protocol_type=OntologyAnnotation(term="data acquisition")),
Protocol(name="data transformation",
protocol_type=OntologyAnnotation(term="data normalization"))
]
# creating 4 ISA.Source objects
study.sources = [Source(name="source1"),Source(name="source2"),Source(name="source3"),Source(name="source4")]
# creating 2 ISA.Sample objects
study.samples = [Sample(name="sample1"),Sample(name="sample2")]
# creating an ISA.ProtocolApplication pooling Source1 and Source2 into Sample1
study.process_sequence = [Process(executes_protocol=study.protocols[0], inputs=[study.sources[0],study.sources[1]], outputs=[study.samples[0]])]
# doing the same again for pooling Source3 and Source4 into Sample2
study.process_sequence.append(Process(executes_protocol=study.protocols[0], inputs=[study.sources[2],study.sources[3]], outputs=[study.samples[1]]))
investigation.studies = [study]
# let's check how this looks in ISA-Tab
from isatools.isatab import dumps
print(dumps(investigation))
/var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmp5xyrzxr6/i_investigation.txt ONTOLOGY SOURCE REFERENCE Term Source Name Term Source File Term Source Version Term Source Description INVESTIGATION Investigation Identifier Investigation Title Investigation Description Investigation Submission Date Investigation Public Release Date INVESTIGATION PUBLICATIONS Investigation PubMed ID Investigation Publication DOI Investigation Publication Author List Investigation Publication Title Investigation Publication Status Investigation Publication Status Term Accession Number Investigation Publication Status Term Source REF INVESTIGATION CONTACTS Investigation Person Last Name Investigation Person First Name Investigation Person Mid Initials Investigation Person Email Investigation Person Phone Investigation Person Fax Investigation Person Address Investigation Person Affiliation Investigation Person Roles Investigation Person Roles Term Accession Number Investigation Person Roles Term Source REF STUDY Study Identifier S1 Study Title ISA Study example: creating sample pools Study Description a jupytern notebook showing how to create pooled samples (a node merging event with material nodes) Study Submission Date Study Public Release Date Study File Name s_study.txt STUDY DESIGN DESCRIPTORS Study Design Type Study Design Type Term Accession Number Study Design Type Term Source REF STUDY PUBLICATIONS Study PubMed ID Study Publication DOI Study Publication Author List Study Publication Title Study Publication Status Study Publication Status Term Accession Number Study Publication Status Term Source REF STUDY FACTORS Study Factor Name Study Factor Type Study Factor Type Term Accession Number Study Factor Type Term Source REF STUDY ASSAYS Study Assay File Name Study Assay Measurement Type Study Assay Measurement Type Term Accession Number Study Assay Measurement Type Term Source REF Study Assay Technology Type Study Assay Technology Type Term Accession Number Study Assay Technology Type Term Source REF Study Assay Technology Platform STUDY PROTOCOLS Study Protocol Name sample collection intracellular fraction extraction data collection data transformation Study Protocol Type pooling extraction data acquisition data normalization Study Protocol Type Term Accession Number Study Protocol Type Term Source REF Study Protocol Description Study Protocol URI Study Protocol Version Study Protocol Parameters Name concentration;sample QC Study Protocol Parameters Name Term Accession Number ; Study Protocol Parameters Name Term Source REF ; Study Protocol Components Name Study Protocol Components Type Study Protocol Components Type Term Accession Number Study Protocol Components Type Term Source REF STUDY CONTACTS Study Person Last Name Study Person First Name Study Person Mid Initials Study Person Email Study Person Phone Study Person Fax Study Person Address Study Person Affiliation Study Person Roles Study Person Roles Term Accession Number Study Person Roles Term Source REF -------- /var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmp5xyrzxr6/s_study.txt Source Name Protocol REF Sample Name source1 sample collection sample1 source2 sample collection sample1 source3 sample collection sample2 source4 sample collection sample2
import json
from isatools.isajson import ISAJSONEncoder
print(json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': ')))
{ "comments": [], "description": "", "identifier": "", "ontologySourceReferences": [], "people": [], "publicReleaseDate": "", "publications": [], "studies": [ { "assays": [], "characteristicCategories": [], "comments": [], "description": "a jupytern notebook showing how to create pooled samples (a node merging event with material nodes)", "factors": [], "filename": "s_study.txt", "identifier": "S1", "materials": { "otherMaterials": [], "samples": [ { "@id": "#sample/5116543904", "characteristics": [], "factorValues": [], "name": "sample1" }, { "@id": "#sample/5116545584", "characteristics": [], "factorValues": [], "name": "sample2" } ], "sources": [ { "@id": "#source/5116545344", "characteristics": [], "name": "source1" }, { "@id": "#source/5116545824", "characteristics": [], "name": "source2" }, { "@id": "#source/5119072912", "characteristics": [], "name": "source3" }, { "@id": "#source/5119071040", "characteristics": [], "name": "source4" } ] }, "people": [], "processSequence": [ { "@id": "#process/5119073488", "comments": [], "date": "", "executesProtocol": { "@id": "#5109903952" }, "inputs": [ { "@id": "#source/5116545344" }, { "@id": "#source/5116545824" } ], "name": "", "outputs": [ { "@id": "#sample/5116543904" } ], "parameterValues": [], "performer": "" }, { "@id": "#process/5119071088", "comments": [], "date": "", "executesProtocol": { "@id": "#5109903952" }, "inputs": [ { "@id": "#source/5119072912" }, { "@id": "#source/5119071040" } ], "name": "", "outputs": [ { "@id": "#sample/5116545584" } ], "parameterValues": [], "performer": "" } ], "protocols": [ { "@id": "#5109903952", "comments": [], "components": [], "description": "", "name": "sample collection", "parameters": [], "protocolType": { "@id": "#87632ca1-1109-4f42-bdc9-fd56c30cead8", "annotationValue": "pooling", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" }, { "@id": "#5118472000", "comments": [], "components": [], "description": "", "name": "intracellular fraction extraction", "parameters": [ { "@id": "#5119051952", "parameterName": { "@id": "#590aaeb5-2e11-4aff-a2c9-9b34bf676249", "annotationValue": "concentration", "comments": [], "termAccession": "", "termSource": "" } }, { "@id": "#5119050704", "parameterName": { "@id": "#33dfce38-e133-4bc0-9666-9a81bfb39362", "annotationValue": "sample QC", "comments": [], "termAccession": "", "termSource": "" } } ], "protocolType": { "@id": "#9a41779d-eb92-467d-88ac-d1595fe189de", "annotationValue": "extraction", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" }, { "@id": "#5118472096", "comments": [], "components": [], "description": "", "name": "data collection", "parameters": [], "protocolType": { "@id": "#26e7d865-e6be-454b-bd91-a8ee33a3cbcd", "annotationValue": "data acquisition", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" }, { "@id": "#5116464528", "comments": [], "components": [], "description": "", "name": "data transformation", "parameters": [], "protocolType": { "@id": "#e3ed694a-215c-45f5-bb09-cf4c2a6154e7", "annotationValue": "data normalization", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" } ], "publicReleaseDate": "", "publications": [], "studyDesignDescriptors": [], "submissionDate": "", "title": "ISA Study example: creating sample pools", "unitCategories": [] } ], "submissionDate": "", "title": "" }
Let's now augment the ISA.Study by adding an Assay table where
raw data
will be collected independently on each of the samples created in the previous.derived data
resulting from a data transformation acting on the raw data (node merging)# This creates intermediate ISA.Materials (Extracts) from Samples.
# The extracts will be used as input to the next protocol application
extraction_process1 = Process(executes_protocol=study.protocols[1])
extraction_process1.inputs.append(study.samples[0])
material1 = Material(name="extract-1")
material1.type = "Extract Name"
extraction_process2 = Process(executes_protocol=study.protocols[1])
extraction_process2.inputs.append(study.samples[1])
material2 = Material(name="extract-2")
material2.type = "Extract Name"
extraction_process1.outputs=[material1]
extraction_process2.outputs=[material2]
metprof_assay = Assay(measurement_type=OntologyAnnotation(term="metabolite profiling"),
technology_type=OntologyAnnotation(term="mass spectrometry"),filename="a_mp_by_ms.txt")
metprof_assay.samples.append(study.samples[0])
metprof_assay.samples.append(study.samples[1])
# metprof_assay.data_files.append(DataFile(filename="sequenced-data-1", label="Raw Data File"))
datafile1=DataFile(filename="file-1",label="Spectral Raw Data File")
datafile2=DataFile(filename="file-2",label="Spectral Raw Data File")
metprof_assay.data_files.append(datafile1)
metprof_assay.data_files.append(datafile2)
metprof_assay.other_material.append(material1)
metprof_assay.other_material.append(material2)
metprof_assay.process_sequence.append(extraction_process1)
metprof_assay.process_sequence.append(extraction_process2)
da_process1 = Process(executes_protocol=study.protocols[2],inputs=[material1], outputs=[datafile1], date_="2021-03-30", performer="Bob Louis")
da_process1.name = "assay-name-test-1"
da_process2 = Process(executes_protocol=study.protocols[2],inputs=[material2], outputs=[datafile2], date_="2021-04-10", performer="Yu Wong")
da_process2.name = "assay-name-test-2"
metprof_assay.process_sequence.append(da_process1)
metprof_assay.process_sequence.append(da_process2)
# IMPORTANT: explictly set the linking/sequence between processes
# NOTE: one-to-one mapping between protocol applications
plink(extraction_process1, da_process1)
plink(extraction_process2, da_process2)
datafile3 = DataFile(filename="analysis-output1.txt", label="Derived Spectral Data File")
dt_process1 = Process(executes_protocol=study.protocols[3], inputs=[datafile1,datafile2],outputs=[datafile3], date_="2021-04-25", performer="Data Science Officer")
dt_process1.name = "data transformation 1"
metprof_assay.process_sequence.append(dt_process1)
# IMPORTANT: explictly set the linking/sequence between processes
# NOTE: many-to-one mapping between protocol applications ~ pooling/merging event
plink(da_process1,dt_process1)
plink(da_process2,dt_process1)
study.assays.append(metprof_assay)
from isatools.isatab import dumps
print(dumps(investigation))
/var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmp3hybi37n/i_investigation.txt ONTOLOGY SOURCE REFERENCE Term Source Name Term Source File Term Source Version Term Source Description INVESTIGATION Investigation Identifier Investigation Title Investigation Description Investigation Submission Date Investigation Public Release Date INVESTIGATION PUBLICATIONS Investigation PubMed ID Investigation Publication DOI Investigation Publication Author List Investigation Publication Title Investigation Publication Status Investigation Publication Status Term Accession Number Investigation Publication Status Term Source REF INVESTIGATION CONTACTS Investigation Person Last Name Investigation Person First Name Investigation Person Mid Initials Investigation Person Email Investigation Person Phone Investigation Person Fax Investigation Person Address Investigation Person Affiliation Investigation Person Roles Investigation Person Roles Term Accession Number Investigation Person Roles Term Source REF STUDY Study Identifier S1 Study Title ISA Study example: creating sample pools Study Description a jupytern notebook showing how to create pooled samples (a node merging event with material nodes) Study Submission Date Study Public Release Date Study File Name s_study.txt STUDY DESIGN DESCRIPTORS Study Design Type Study Design Type Term Accession Number Study Design Type Term Source REF STUDY PUBLICATIONS Study PubMed ID Study Publication DOI Study Publication Author List Study Publication Title Study Publication Status Study Publication Status Term Accession Number Study Publication Status Term Source REF STUDY FACTORS Study Factor Name Study Factor Type Study Factor Type Term Accession Number Study Factor Type Term Source REF STUDY ASSAYS Study Assay File Name a_mp_by_ms.txt Study Assay Measurement Type metabolite profiling Study Assay Measurement Type Term Accession Number Study Assay Measurement Type Term Source REF Study Assay Technology Type mass spectrometry Study Assay Technology Type Term Accession Number Study Assay Technology Type Term Source REF Study Assay Technology Platform STUDY PROTOCOLS Study Protocol Name sample collection intracellular fraction extraction data collection data transformation Study Protocol Type pooling extraction data acquisition data normalization Study Protocol Type Term Accession Number Study Protocol Type Term Source REF Study Protocol Description Study Protocol URI Study Protocol Version Study Protocol Parameters Name concentration;sample QC Study Protocol Parameters Name Term Accession Number ; Study Protocol Parameters Name Term Source REF ; Study Protocol Components Name Study Protocol Components Type Study Protocol Components Type Term Accession Number Study Protocol Components Type Term Source REF STUDY CONTACTS Study Person Last Name Study Person First Name Study Person Mid Initials Study Person Email Study Person Phone Study Person Fax Study Person Address Study Person Affiliation Study Person Roles Study Person Roles Term Accession Number Study Person Roles Term Source REF -------- /var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmp3hybi37n/s_study.txt Source Name Protocol REF Sample Name source1 sample collection sample1 source2 sample collection sample1 source3 sample collection sample2 source4 sample collection sample2 -------- /var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmp3hybi37n/a_mp_by_ms.txt Sample Name Protocol REF Extract Name Protocol REF Assay Name Date Performer Spectral Raw Data File Protocol REF Date Performer Derived Spectral Data File sample1 intracellular fraction extraction extract-1 data collection assay-name-test-1 2021-03-30 Bob Louis file-1 data transformation 2021-04-25 Data Science Officer analysis-output1.txt sample2 intracellular fraction extraction extract-2 data collection assay-name-test-2 2021-04-10 Yu Wong file-2 data transformation 2021-04-25 Data Science Officer analysis-output1.txt
import json
from isatools.isajson import ISAJSONEncoder
print(json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': ')))
{ "comments": [], "description": "", "identifier": "", "ontologySourceReferences": [], "people": [], "publicReleaseDate": "", "publications": [], "studies": [ { "assays": [ { "characteristicCategories": [], "comments": [], "dataFiles": [ { "@id": "#data/spectralrawdatafile-5120399872", "comments": [], "name": "file-1", "type": "Spectral Raw Data File" }, { "@id": "#data/spectralrawdatafile-5119233712", "comments": [], "name": "file-2", "type": "Spectral Raw Data File" } ], "filename": "a_mp_by_ms.txt", "materials": { "otherMaterials": [ { "@id": "#material/extract-5116526304", "characteristics": [], "name": "extract-1", "type": "Extract Name" }, { "@id": "#material/extract-5116523040", "characteristics": [], "name": "extract-2", "type": "Extract Name" } ], "samples": [ { "@id": "#sample/5116543904", "characteristics": [], "factorValues": [], "name": "sample1" }, { "@id": "#sample/5116545584", "characteristics": [], "factorValues": [], "name": "sample2" } ] }, "measurementType": { "@id": "#08e97f57-79b7-45e1-ba49-fec0232deb22", "annotationValue": "metabolite profiling", "comments": [], "termAccession": "", "termSource": "" }, "processSequence": [ { "@id": "#process/5116524192", "comments": [], "date": "", "executesProtocol": { "@id": "#5118472000" }, "inputs": [ { "@id": "#sample/5116543904" } ], "name": "", "nextProcess": { "@id": "#process/5119233568" }, "outputs": [ { "@id": "#material/extract-5116526304" } ], "parameterValues": [], "performer": "" }, { "@id": "#process/5116526016", "comments": [], "date": "", "executesProtocol": { "@id": "#5118472000" }, "inputs": [ { "@id": "#sample/5116545584" } ], "name": "", "nextProcess": { "@id": "#process/5119234000" }, "outputs": [ { "@id": "#material/extract-5116523040" } ], "parameterValues": [], "performer": "" }, { "@id": "#process/5119233568", "comments": [], "date": "2021-03-30", "executesProtocol": { "@id": "#5118472096" }, "inputs": [ { "@id": "#material/extract-5116526304" } ], "name": "assay-name-test-1", "nextProcess": { "@id": "#process/5116526112" }, "outputs": [ { "@id": "#data/spectralrawdatafile-5120399872" } ], "parameterValues": [], "performer": "Bob Louis", "previousProcess": { "@id": "#process/5116524192" } }, { "@id": "#process/5119234000", "comments": [], "date": "2021-04-10", "executesProtocol": { "@id": "#5118472096" }, "inputs": [ { "@id": "#material/extract-5116523040" } ], "name": "assay-name-test-2", "nextProcess": { "@id": "#process/5116526112" }, "outputs": [ { "@id": "#data/spectralrawdatafile-5119233712" } ], "parameterValues": [], "performer": "Yu Wong", "previousProcess": { "@id": "#process/5116526016" } }, { "@id": "#process/5116526112", "comments": [], "date": "2021-04-25", "executesProtocol": { "@id": "#5116464528" }, "inputs": [ { "@id": "#data/spectralrawdatafile-5120399872" }, { "@id": "#data/spectralrawdatafile-5119233712" } ], "name": "data transformation 1", "outputs": [ { "@id": "#data/derivedspectraldatafile-5116523904" } ], "parameterValues": [], "performer": "Data Science Officer", "previousProcess": { "@id": "#process/5119234000" } } ], "technologyPlatform": "", "technologyType": { "@id": "#4eb6ea6c-c1a9-4b8c-ae88-a59f328cfcd7", "annotationValue": "mass spectrometry", "comments": [], "termAccession": "", "termSource": "" }, "unitCategories": [] } ], "characteristicCategories": [], "comments": [], "description": "a jupytern notebook showing how to create pooled samples (a node merging event with material nodes)", "factors": [], "filename": "s_study.txt", "identifier": "S1", "materials": { "otherMaterials": [], "samples": [ { "@id": "#sample/5116543904", "characteristics": [], "factorValues": [], "name": "sample1" }, { "@id": "#sample/5116545584", "characteristics": [], "factorValues": [], "name": "sample2" } ], "sources": [ { "@id": "#source/5116545344", "characteristics": [], "name": "source1" }, { "@id": "#source/5116545824", "characteristics": [], "name": "source2" }, { "@id": "#source/5119072912", "characteristics": [], "name": "source3" }, { "@id": "#source/5119071040", "characteristics": [], "name": "source4" } ] }, "people": [], "processSequence": [ { "@id": "#process/5119073488", "comments": [], "date": "", "executesProtocol": { "@id": "#5109903952" }, "inputs": [ { "@id": "#source/5116545344" }, { "@id": "#source/5116545824" } ], "name": "", "outputs": [ { "@id": "#sample/5116543904" } ], "parameterValues": [], "performer": "" }, { "@id": "#process/5119071088", "comments": [], "date": "", "executesProtocol": { "@id": "#5109903952" }, "inputs": [ { "@id": "#source/5119072912" }, { "@id": "#source/5119071040" } ], "name": "", "outputs": [ { "@id": "#sample/5116545584" } ], "parameterValues": [], "performer": "" } ], "protocols": [ { "@id": "#5109903952", "comments": [], "components": [], "description": "", "name": "sample collection", "parameters": [], "protocolType": { "@id": "#87632ca1-1109-4f42-bdc9-fd56c30cead8", "annotationValue": "pooling", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" }, { "@id": "#5118472000", "comments": [], "components": [], "description": "", "name": "intracellular fraction extraction", "parameters": [ { "@id": "#5119051952", "parameterName": { "@id": "#590aaeb5-2e11-4aff-a2c9-9b34bf676249", "annotationValue": "concentration", "comments": [], "termAccession": "", "termSource": "" } }, { "@id": "#5119050704", "parameterName": { "@id": "#33dfce38-e133-4bc0-9666-9a81bfb39362", "annotationValue": "sample QC", "comments": [], "termAccession": "", "termSource": "" } } ], "protocolType": { "@id": "#9a41779d-eb92-467d-88ac-d1595fe189de", "annotationValue": "extraction", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" }, { "@id": "#5118472096", "comments": [], "components": [], "description": "", "name": "data collection", "parameters": [], "protocolType": { "@id": "#26e7d865-e6be-454b-bd91-a8ee33a3cbcd", "annotationValue": "data acquisition", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" }, { "@id": "#5116464528", "comments": [], "components": [], "description": "", "name": "data transformation", "parameters": [], "protocolType": { "@id": "#e3ed694a-215c-45f5-bb09-cf4c2a6154e7", "annotationValue": "data normalization", "comments": [], "termAccession": "", "termSource": "" }, "uri": "", "version": "" } ], "publicReleaseDate": "", "publications": [], "studyDesignDescriptors": [], "submissionDate": "", "title": "ISA Study example: creating sample pools", "unitCategories": [] } ], "submissionDate": "", "title": "" }