#!/usr/bin/env python # coding: utf-8 # # Read related files # # Non-standardized metadata files # # # In[445]: import os input_dir = "./isa_container/input_files" os.listdir(path = input_dir) # ## Sample identifiers from ACTION # In[446]: IDs_file_path = "./isa_container/input_files/IDs/ACTIONdemonstrator_XOmics_IDs_fake.csv" import pandas as pd IDs_df = pd.read_csv(IDs_file_path).iloc[:10,] print("Data frame dimensions\n", IDs_df.shape) print("Column names\n", IDs_df.columns) print("Missing value counts\n", IDs_df.isna().sum()) IDs_df.head # # Create ISA object # In[447]: # print current directory import numpy as np import os os.getcwd() # ## Investigation # # Create a new ISA object # In[450]: # create new investigation with single study from isatools.model import * investigation = Investigation() # In[452]: # define title investigation.title = "X-omics data analysis, integration and stewardship demonstrator dataset: NTR ACTION omics data" investigation.title # In[401]: investigation.description = "Predict childhood aggression with multi-omics data and demonstrate the FAIRification process and data analysis of a multi-omics project" investigation.identifier = "tbd" # ## Study # In[402]: # add one study to investigation investigation.studies.append(Study()) investigation.studies # According to MetaboLights help site, the title should ideally be the same as for a corresponding manuscript. # In[403]: investigation.studies[0].title = "X-omics data analysis, integration and stewardship demonstrator dataset: NTR ACTION omics data" investigation.studies[0].identifier = "tbd" # TODO: add identifier; update title investigation.studies[0].filename = "s_study.txt" # In[404]: investigation.studies[0] # ### Ontologies # # Ontologies can be searched e.g. at http://www.ontobee.org/ or https://www.ebi.ac.uk/ols/index. # # FAIR genomes lookups: https://github.com/fairgenomes/fairgenomes-semantic-model/tree/main/lookups # In[405]: # ontologies ontologies = { "afo": OntologySource( name = "AFO", description = "Allotrope Merged Ontology Suite"), "chebi": OntologySource( name = "CHEBI", description = "Chemical Entities of Biological Interest"), "chmo": OntologySource( name = "CHMO", description = "Chemical Methods Ontology"), "edam": OntologySource( name = "EDAM", description = "Bioinformatics operations, data types, formats, identifiers and topics"), "efo": OntologySource( name = "EFO", description = "Experimental Factor Ontology"), "ero": OntologySource( name = "eagle-i resource ontology", description = "An ontology of research resources such as instruments, protocols, reagents, animal models and biospecimens"), "maxo": OntologySource( name = "MAXO", description = "Medical Action Ontology"), "msio": OntologySource( name = "MSIO", description = "Metabolite Standards Initiative Ontology"), "ncbitaxon": OntologySource( name = "NCBITAXON", description = "NCBI organismal classification"), "ncit": OntologySource( name = "NCIT", description = "NCI Thesaurus OBO Edition"), "obi": OntologySource( name = "OBI", description = "Ontology for Biomedical Investigations"), "pato": OntologySource( name = "PATO", description = "PATO - the Phenotype And Trait Ontology"), "uberon": OntologySource( name = "UBERON", description = "Uber-anatomy ontology") } # add ontologies to investigation for o in ontologies.values(): investigation.ontology_source_references.append(o) # ### Protocols # Note that the protocol used in the process to derive `sample` from `source` MUST be of type 'sample collection' (see https://isa-specs.readthedocs.io/en/latest/isatab.html#study-table-file). # # - ISA model source: https://github.com/ISA-tools/isa-api/blob/master/isatools/model.py # In[406]: protocol_params = { "anatomical entity": ProtocolParameter( parameter_name = OntologyAnnotation( term = "anatomical entity", term_source = ontologies["uberon"], term_accession = "http://purl.obolibrary.org/obo/UBERON_0001062")) } # define sample collection protocol sample_collection_protocol = Protocol( name = "sample collection", # see github.com/ISA-tools/isa-specs/blob/master/source/isatab.rst # -> MUST be of type 'sample collection' protocol_type = OntologyAnnotation(term = "sample collection"), parameters = [protocol_params["anatomical entity"]]) investigation.studies[0].protocols.append(sample_collection_protocol) # ### Metabolomics # # See also https://ebi.ac.uk/metabolights/guides/Protocol/Protocol for protocols required for submission in MetaboLights, i.e. Sample collection, Extraction, Chromatography, Mass spectrometry, Data transformation, and Metabolite identification. # ### Study factors # In[407]: # gender studyfactor_gender = StudyFactor( name = "genotypic sex", factor_type = OntologyAnnotation( # Ontology source reference term = "genotypic sex", # also used in FAIR genomes term_source = ontologies["pato"], term_accession = "http://purl.obolibrary.org/obo/PATO_0020000")) # female factorvalue_female = FactorValue( factor_name = studyfactor_gender, value = OntologyAnnotation( # str or OntologyAnnotation term = "XX Genotype", # also used in FAIR genomes term_source = ontologies["ncit"], term_accession = "http://purl.obolibrary.org/obo/NCIT_C45976")) # male factorvalue_male = FactorValue( factor_name = studyfactor_gender, value = OntologyAnnotation( # str or OntologyAnnotation term = "XY Genotype", # also used in FAIR genomes term_source = ontologies["ncit"], term_accession = "http://purl.obolibrary.org/obo/NCIT_C45977")) # In[408]: # agressive behaviour assessment - T-scores studyfactor_aggression = StudyFactor( name = "aggression score", factor_type = OntologyAnnotation( term = "childhood aggressive behaviour measurement", term_source = ontologies["efo"], term_accession = "http://www.ebi.ac.uk/efo/EFO_0007663"), comments = [ Comment(name = "T-score reference", value = "Age- and sex-specific Aggressive Behaviour T-score as described in Hagenbeek et al. https://doi.org/10.3389/fpyst.2020.00165")]) # T-score value class FactorValueAggressionScore(FactorValue): def __init__(self, factor_name = studyfactor_aggression, value = None, unit = OntologyAnnotation( term = "T-score", term_source = ontologies["ncit"], term_accession = "http://purl.obolibrary.org/obo/NCIT_C120401"), comments = None): super().__init__(factor_name = factor_name, value = value, unit = unit, comments = comments) # ## Assays # ### Genotyping # In[409]: assay_genotype = Assay(filename = "a_assay_genotype.txt", measurement_type = OntologyAnnotation(term = "", term_source = "", term_accession = ""), technology_type = OntologyAnnotation(term = "nucleotide sequencing", term_source ="", term_accession = ""), technology_platform = OntologyAnnotation(term = "", term_source = "", term_accession = "")) # TODO: What sequencing plaform has been used? # In[410]: # define extraction and measurement protocols dna_extraction_protocol = Protocol( name = "DNA extraction", protocol_type = OntologyAnnotation( term = "DNA extraction", term_source = ontologies["obi"], term_accession = "http://purl.obolibrary.org/obo/OBI_0000257")) # TODO: check type; compare to FAIR genomes investigation.studies[0].protocols.append(dna_extraction_protocol) # In[411]: genotype_profiling_protocol = Protocol( name = "genotype profiling", protocol_type = OntologyAnnotation( term = "genotyping", term_source = ontologies["efo"], term_accession = "http://www.ebi.ac.uk/efo/EFO_0000750") ) investigation.studies[0].protocols.append(genotype_profiling_protocol) # TODO: check type; compare to FAIR genomes # ### DNA methylation # In[412]: assay_methylation = Assay( filename = "a_assay_methylation.txt", measurement_type = OntologyAnnotation( term = "Methylation Beta Value", term_source = ontologies["ncit"], term_accession = "http://purl.obolibrary.org/obo/NCIT_C164051"), technology_type = OntologyAnnotation( term = "DNA methylation profiling by array assay", term_source = ontologies["obi"], term_accession = "http://purl.obolibrary.org/obo/OBI_0001332"), technology_platform = OntologyAnnotation( term = "Illumina Infinium MethylationEPIC BeadChip", term_source = ontologies["obi"], term_accession = "http://purl.obolibrary.org/obo/OBI_0002131") ) # In[413]: # define extraction and measurement protocols dna_extraction_protocol = Protocol( name = "DNA extraction", protocol_type = OntologyAnnotation( term = "DNA extraction", term_source = ontologies["obi"], term_accession = "http://purl.obolibrary.org/obo/OBI_0000257")) # TODO: check type; compare to FAIR genomes investigation.studies[0].protocols.append(dna_extraction_protocol) # In[414]: methylation_profiling_protocol = Protocol( name = "methylation profiling", protocol_type = OntologyAnnotation( term = "methylation profiling", term_source = ontologies["efo"], term_accession = "http://www.ebi.ac.uk/efo/EFO_0000751"), # components = [OntologyAnnotation( # term = "Illumina Infinium MethylationEPIC BeadChip", # term_source = obi, # term_accession = "http://purl.obolibrary.org/obo/OBI_0002131")] ) investigation.studies[0].protocols.append(methylation_profiling_protocol) # In[415]: methylation_data_processing_protocol = Protocol( name = "methylation data processing protocol", protocol_type = OntologyAnnotation( term = "Protocol", term_source = ontologies["edam"], term_accession = "http://edamontology.org/data_2531"), description = "Sinke, Lucy, van Iterson, Maarten, Cats, Davy, Slieker, Roderick, & Heijmans, Bas. (2019, July 11). DNAmArray: Streamlined workflow for the quality control, normalization, and analysis of Illumina methylation array data (Version 2.1). Zenodo. http://doi.org/10.5281/zenodo.3355292", uri = "http://doi.org/10.5281/zenodo.3355292") investigation.studies[0].protocols.append(methylation_data_processing_protocol) # ### Metabolomics # In[416]: assay_metabolomics_amines = Assay( filename = "a_assay_metabolomics_amines.txt", measurement_type = OntologyAnnotation( term = "targeted metabolite profiling", term_source = ontologies["msio"], term_accession = "http://purl.obolibrary.org/obo/MSIO_0000100"), technology_type = OntologyAnnotation( term = "liquid chromatography-mass spectrometry", term_source = ontologies["chmo"], term_accession = "http://purl.obolibrary.org/obo/CHMO_0000524")) # sample_type = OntologyAnnotation( # term = "urine specimen", # term_source = ['obi'], # term_accession = "http:/purl.obolibrary.org/obo/OBI_0000651") # # technology_platform = OntologyAnnotation( # term = "", # term_source = ontologies[""], # term_accession = "") # TODO: What exact platform/instrument has been used? # In[417]: assay_metabolomics_OA = Assay( filename = "a_assay_metabolomics_OA.txt", measurement_type = OntologyAnnotation( term = "targeted metabolite profiling", term_source = ontologies["msio"], term_accession = "http://purl.obolibrary.org/obo/MSIO_0000100"), technology_type = OntologyAnnotation( term = "gas chromatography-mass spectrometry", term_source = ontologies["chmo"])) # sample_type = OntologyAnnotation( # term = "urine specimen", # term_source = ['obi'], # term_accession = "http:/purl.obolibrary.org/obo/OBI_0000651") # technology_platform = OntologyAnnotation( # term = "", # term_source = ontologies[""], # term_accession = "") # TODO: What exact platform/instrument has been used? # In[418]: assay_metabolomics_steroids = Assay( filename = "a_assay_metabolomics_steroids.txt", measurement_type = OntologyAnnotation( term = "targeted metabolite profiling", term_source = ontologies["msio"], term_accession = "http://purl.obolibrary.org/obo/MSIO_0000100"), technology_type = OntologyAnnotation( term = "high-performance liquid chromatography-mass spectrometry", term_source = ontologies["chmo"], term_accession = "http://purl.obolibrary.org/obo/CHMO_0000796")) # sample_type = OntologyAnnotation( # term = "urine specimen", # term_source = ['obi'], # term_accession = "http:/purl.obolibrary.org/obo/OBI_0000651") # technology_platform = OntologyAnnotation( # term = "", # term_source = ontologies[""], # term_accession = "") # TODO: What exact platform/instrument has been used? # In[419]: urine_sampling_protocol = Protocol( name = "urine sampling", protocol_type = OntologyAnnotation( term = 'urine speciment collection', # term_source = [''], term_accession = 'http://snomed.info/id/57617002') ) # TODO: is this useful a ontology? investigation.studies[0].protocols.append(urine_sampling_protocol) # In[420]: extraction_metabolomics = Protocol( name = "Extraction", protocol_type = OntologyAnnotation( term = 'Extraction', term_source = ontologies["ncit"], term_accession = 'http://purl.obolibrary.org/obo/NCIT_C61575'), parameters = [ ProtocolParameter(parameter_name=OntologyAnnotation(term = "Post Extraction")), ProtocolParameter(parameter_name=OntologyAnnotation(term = "Derivatization")) ]) # In[421]: chromatography = Protocol( name = "Chromatography", protocol_type = OntologyAnnotation( term = 'Chromatography', term_source = ontologies["ncit"], term_accession = 'http://purl.obolibrary.org/obo/NCIT_C16431'), parameters = [ ProtocolParameter(parameter_name=OntologyAnnotation(term = "Chromatography Instrument")), ProtocolParameter(parameter_name=OntologyAnnotation(term = "Column model")), ProtocolParameter(parameter_name=OntologyAnnotation(term = "Column type")) ]) # In[422]: labelling_metabolites = Protocol( name = "Labelling metabolites", protocol_type = OntologyAnnotation( term = 'Labelling', term_source = ontologies["chmo"], term_accession = 'http://purl.obolibrary.org/obo/CHMO_0001675') ) # In[423]: mass_spectrometry = Protocol( name = "Mass spectrometry", protocol_type = OntologyAnnotation( term = 'Mass spectrometry', term_source = ontologies["ncit"], term_accession = 'http://purl.obolibrary.org/obo/NCIT_C17156'), parameters = [ ProtocolParameter(parameter_name=OntologyAnnotation(term = "Scan polarity")), ProtocolParameter(parameter_name=OntologyAnnotation(term = "Scan m/z range")), ProtocolParameter(parameter_name=OntologyAnnotation(term = "Instrument")), ProtocolParameter(parameter_name=OntologyAnnotation(term = "Ion source")), ProtocolParameter(parameter_name=OntologyAnnotation(term = "Mass analyzer")) ]) # In[424]: data_transformation = Protocol( name = "Data transformation", protocol_type = OntologyAnnotation( term = 'Data Transformation', term_source = ontologies["ncit"], term_accession = 'http://purl.obolibrary.org/obo/NCIT_C43582') ) # In[425]: metabolite_identification = Protocol( name = "Metabolite identification", protocol_type = OntologyAnnotation( term = 'peak identification', term_source = ontologies["afo"], term_accession = 'http://purl.allotrope.erg/ontologies/process#AFP_0003618') ) # TODO: Is this the correct ontlogy (source)? # ## ACTION samples # # Add samples to study and link to previously defined protocols and assays. # # For MetaboLights, sample information should include unique sample name, organism, organism part, sample type (control, QC, experimental sample), other descriptors as factors (age, gender). # In[426]: for idx, row in IDs_df.iterrows(): # add subjects (sources) # TODO: issue - source should represent a source material such as urine, # and sample a respective extract or similar # check if source was already added already (rows can contain duplicate entries) #is_new_source = True source_name = row["XOmicsPhenoID"] source = next((src for src in investigation.studies[0].sources if src.name == source_name), None) if not source: #is_new_source = False # create new source for subject source = Source(name = row["XOmicsPhenoID"]) # Characteristics - Organism - should be included for Metabolights # here, organism is defined per source, i.e. individual source.characteristics.append( Characteristic( category = OntologyAnnotation( term = "organism"), # TODO: add term source and accession for category? would such information be lost in ISA-Tab? value = OntologyAnnotation( term = "Homo sapiens", term_source = ontologies["ncbitaxon"], term_accession = "http://purl.bioontology.org/ontology/NCBITAXON/9606"))) # TODO: check if family ID should/should not be added / is required for analysis source.characteristics.append( Characteristic(category = "family ID", value = row["XOmicsFamID"])) source.characteristics.append( Characteristic( category = OntologyAnnotation( term = "childhood aggressive behaviour measurement", term_source = ontologies["efo"], term_accession = "http://www.ebi.ac.uk/efo/EFO_0007663"), value = "", #tscore, unit = OntologyAnnotation( term = "T-score", term_source = ontologies["ncit"], term_accession = "http://purl.obolibrary.org/obo/NCIT_C120401"))) #source.factor_values.append(FactorValueAggressionScore(value = tscore)) # add subject to study investigation.studies[0].sources.append(source) # add samples - sample names need to be unique # urine sample for metabolomics if not pd.isna(row["XOmicsmetaboID"]): # check if urine sample was already added to study urine_sample_name = "urine_{0}".format(source_name) urine_sample = next( (smpl for smpl in investigation.studies[0].samples if smpl.name == urine_sample_name), None) if not urine_sample: # create a new sample with unique name urine_sample = Sample( name = urine_sample_name, derives_from = [source]) # the individual # Characteristics - Organism part - should be included for Metabolights # here, organism part is defined per sample urine_sample.characteristics.append( Characteristic( category = OntologyAnnotation(term = "organism part"), value = OntologyAnnotation( term = "urine", term_source = ontologies["uberon"], term_accession = "http://purl.obolibrary.org/obo/UBERON_0001088"))) # Characteristics - sample type - should be included for Metabolights # i.e. control, QC, experimental sample urine_sample.characteristics.append( Characteristic( category = "sample type", # TODO: could not find a term yet; sample type is not an ontological term, but required by MetaboLights value = OntologyAnnotation( term = "experimental sample", term_source = ontologies["chmo"], term_accession = "http://purl.obolibrary.org/obo/CHMO_0002746"))) # add urine sample to study investigation.studies[0].samples.append(urine_sample) # check if urine sampling process exists for source urine_p_name = "urine_specimen_collection_process_{0}".format(source.name) urine_collection_process = next( (prcs for prcs in investigation.studies[0].process_sequence if prcs.name == urine_p_name), None) if not urine_collection_process: # define urine sampling process for this subject urine_collection_process = Process( name = urine_p_name, executes_protocol = sample_collection_protocol, parameter_values = [ ParameterValue( category = protocol_params["anatomical entity"], #ProtocolParameter value = "urine")], inputs = [source], outputs = [urine_sample]) investigation.studies[0].process_sequence.append(urine_collection_process) else: # urine sampling process already exists for the source # add urine sample to outputs of existing process urine_collection_process.outputs.append(urine_sample) # add samples # buccal swab sample for genotyping and DNA methylation arrays if not pd.isna(row["XOmicsGenoID"]) or not pd.isna(row["XOmicsMethylID"]): buccal_sample_name = "buccal_mucosa_{0}".format(source_name) buccal_sample = next( (smpl for smpl in investigation.studies[0].samples if smpl.name == buccal_sample_name), None) if not buccal_sample: # create sample of buccal mucosa buccal_sample = Sample( name = buccal_sample_name, derives_from = [source]) # same source as urine sample buccal_sample.characteristics.append( Characteristic( category = OntologyAnnotation(term = "organism part"), value = "")) # TODO: add more characteristics - compare to urime sample # add sample to study investigation.studies[0].samples.append(buccal_sample) # check if buccal swab sampling process exists for source # needs to be checked, because multiple samples can be derived from one source buccal_p_name = "buccal_specimen_collection_process_{0}".format(source.name) buccal_collection_process = next( (prcs for prcs in investigation.studies[0].process_sequence if prcs.name == buccal_p_name), None) if not buccal_collection_process: # define buccal sampling process for this subject buccal_collection_process = Process( name = buccal_p_name, executes_protocol = sample_collection_protocol, parameter_values = [ ParameterValue( category = protocol_params["anatomical entity"], #ProtocolParameter value = "buccal mucosa")], inputs = [source], outputs = [buccal_sample] ) investigation.studies[0].process_sequence.append(buccal_collection_process) else: # buccal sampling process already exists for the source # add buccal sample to outputs of existing process buccal_collection_process.outputs.append(buccal_sample) # NOTE: adding extraction process at study level doesn seem to work # causes sample to disappear from study file #if not pd.isna(row["XOmicsMethylID"]): # dna_extraction_process = next(()) # assay_methylation.samples.append(buccal_sample) # first check if already added # methylation_profiling_process = Process( # name = "methylation_profiling_{0}".format(source.name), # executes_protocol = methylation_profiling_protocol, # inputs = [buccal_sample], ## outputs = [buccal_dna]) # Genotype assay # In[427]: # add samples to genotype assay for idx, row in IDs_df.iterrows(): source_name = row["XOmicsPhenoID"] if not pd.isna(row["XOmicsGenoID"]): buccal_sample_name = "buccal_mucosa_{0}".format(source_name) buccal_sample = next( (smpl for smpl in investigation.studies[0].samples if smpl.name == buccal_sample_name), None) genotype_sample = next( (smpl for smpl in assay_genotype.samples if smpl.name == buccal_sample_name), None) if not genotype_sample: assay_genotype.samples.append(buccal_sample) # first check if already added # define DNA as material extracted from buccal mucosa sample # on study level, because the same DNA is used for genotyping AND DNA methylation profiling # TODO: check if this works; could be that extraction has to be on assay level # but Study object has process_sequence, i.e. this is technically possible # define DNA material for this sample # now trying on assay level buccal_dna = Material( name = "buccal_DNA_{0}".format(row["XOmicsGenoID"]), type_ = "Extract Name") # define extraction process for buccal DNA dna_extraction_process = Process( name = "DNA_extraction_{0}".format(row["XOmicsGenoID"]), executes_protocol = dna_extraction_protocol, inputs = [buccal_sample], outputs = [buccal_dna]) #if not pd.isna(row["XOmicsMethylID"]): # dna_extraction_process = next(()) genotype_profiling_process = Process( name = "genotype_profiling_{0}".format(row["XOmicsGenoID"]), executes_protocol = genotype_profiling_protocol, inputs = [buccal_dna]) plink(dna_extraction_process, genotype_profiling_process) assay_genotype.process_sequence.append(dna_extraction_process) assay_genotype.process_sequence.append(genotype_profiling_process) # In[428]: # add samples to methylation assay for idx, row in IDs_df.iterrows(): source_name = row["XOmicsPhenoID"] if not pd.isna(row["XOmicsMethylID"]): buccal_sample_name = "buccal_mucosa_{0}".format(source_name) buccal_sample = next( (smpl for smpl in investigation.studies[0].samples if smpl.name == buccal_sample_name), None) methylation_sample = next( (smpl for smpl in assay_methylation.samples if smpl.name == buccal_sample_name), None) if not methylation_sample: assay_methylation.samples.append(buccal_sample) # first check if already added # define DNA as material extracted from buccal mucosa sample # on study level, because the same DNA is used for genotyping AND DNA methylation profiling # TODO: check if this works; could be that extraction has to be on assay level # but Study object has process_sequence, i.e. this is technically possible # define DNA material for this sample # now trying on assay level buccal_dna = Material( name = "buccal_DNA_{0}".format(row["XOmicsMethylID"]), type_ = "Extract Name") # define extraction process for buccal DNA dna_extraction_process = Process( name = "DNA_extraction_{0}".format(row["XOmicsMethylID"]), executes_protocol = dna_extraction_protocol, inputs = [buccal_sample], outputs = [buccal_dna]) #if not pd.isna(row["XOmicsMethylID"]): # dna_extraction_process = next(()) methylation_profiling_process = Process( name = "methylation_profiling_{0}".format(row["XOmicsMethylID"]), executes_protocol = methylation_profiling_protocol, inputs = [buccal_dna]) plink(dna_extraction_process, methylation_profiling_process) assay_methylation.process_sequence.append(dna_extraction_process) assay_methylation.process_sequence.append(methylation_profiling_process) # Metabolomics assays # In[429]: # add samples, processes and datafiles to metabolomics Amines assay Assay = assay_metabolomics_amines # Define datafiles (not all may be relevant) raw_datafile = DataFile(filename="link/to/raw/data", label="Raw Spectral Data File") normalized_datafile = DataFile(filename="link/to/normalized_data", label="Normalization Name") derived_spectral_data_file = DataFile(filename="link/to/spectral_file", label="Derived Spectral Data File") Data_Transformation_Name = DataFile(filename="link/to/data_transformation_name", label="Data Transformation Name") MAF = DataFile(filename="link/to/MAF", label="Metabolite Assignment File") # Loop over samples and add process to samples for idx, row in IDs_df.iterrows(): source_name = row["XOmicsPhenoID"] # print(source_name) if not pd.isna(row["XOmicsmetaboID"]): # print(row['XOmicsmetaboID']) urine_sample_name = "urine_{0}".format(source_name) # print(urine_sample_name) urine_sample = next( (smpl for smpl in investigation.studies[0].samples if smpl.name == urine_sample_name), None) metabolomics_sample = next( (smpl for smpl in Assay.samples if smpl.name == urine_sample_name), None) if not metabolomics_sample: ## Extraction Post_extraction = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Post Extraction")), value = OntologyAnnotation(term="1 uL borate buffer (pH 8.8) with AQC reagent")) Derivatization = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Derivatization")), value = "AQC") material_extract = Material( name = "extract_{0}".format(row["XOmicsmetaboID"]), type_ = "Extract Name") extraction_process = Process( executes_protocol=extraction_metabolomics, parameter_values=[Post_extraction, Derivatization], inputs = [urine_sample], outputs = [material_extract]) ## Labelling material_label = Material( name ="labeled_{0}".format(row["XOmicsmetaboID"]), type_ ="Labeled Extract Name") labelling_process = Process( executes_protocol=labelling_metabolites, inputs = [extraction_process.outputs[0]], outputs = [material_label]) ## Chromatography # separated_molecules = Material( # name = "separated_molecules_{0}".format(row["XOmicsmetaboID"], # type_ ="Labeled Extract Name") # ) instrument = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Chromatography Instrument")), value = "Agilent 1290 Infinity II") column_model = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Column model")), value = "Accq-Tag Ultra column (waters + FURHTER SPECS?)") column_type = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Column type")), value = "reverse phase") chromatography_process = Process( name = "chromatography_{0}".format(row["XOmicsmetaboID"]), executes_protocol = chromatography, parameter_values = [instrument, column_model, column_type], inputs = [labelling_process.outputs[0]], outputs = [] # outputs = [separated_molecules] ) ## Mass spectrometry scan_polarity = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Scan polarity")), value = "positive") scan_range = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Scan m/z range")), value = "5-2000?") instrument = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Instrument")), value = "AB SCIEX Qtrap 6500") ion_source = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Ion source")), value = "ESI") mass_analyzer = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Mass Analyzer")), value = "triple quadrupole linear ion trap") mass_spectrometry_process = Process( name = "mass_spectrometry_{0}".format(row["XOmicsmetaboID"]), executes_protocol= mass_spectrometry, parameter_values = [scan_polarity, scan_range, instrument, ion_source, mass_analyzer], # inputs = [separated_molecules], inputs = [], outputs = [raw_datafile] ) ## Data transformation data_transformation_process = Process( name = "data_transformation_{0}".format(row["XOmicsmetaboID"]), executes_protocol = data_transformation, inputs = [raw_datafile], outputs = [normalized_datafile, derived_spectral_data_file] ) ## Metabolite identification metabolite_identification_process = Process( name = "metabolite_identification_{0}".format(row["XOmicsmetaboID"]), executes_protocol = metabolite_identification, inputs = [normalized_datafile], outputs= [Data_Transformation_Name, MAF] ) # Link processes plink(extraction_process, labelling_process) plink(labelling_process, chromatography_process) plink(chromatography_process, mass_spectrometry_process) plink(mass_spectrometry_process, data_transformation_process) plink(data_transformation_process, metabolite_identification_process) # Add samples, materials and data files to the amines assay Assay.samples.append(urine_sample) Assay.other_material.append(material_extract) Assay.other_material.append(material_label) # Assay.other_material.append(separated_molecules) Assay.data_files.append(raw_datafile) Assay.data_files.append(normalized_datafile) Assay.data_files.append(derived_spectral_data_file) Assay.data_files.append(Data_Transformation_Name) Assay.data_files.append(MAF) ## Add processes to the amines assay Assay.process_sequence.append(extraction_process) Assay.process_sequence.append(labelling_process) Assay.process_sequence.append(chromatography_process) Assay.process_sequence.append(mass_spectrometry_process) Assay.process_sequence.append(data_transformation_process) Assay.process_sequence.append(metabolite_identification_process) # In[430]: # add samples, processes and datafiles to metabolomics OA assay Assay = assay_metabolomics_OA # Define datafiles (not all may be relevant) raw_datafile = DataFile(filename="link/to/raw/data", label="Raw Spectral Data File") normalized_datafile = DataFile(filename="link/to/normalized_data", label="Normalization Name") derived_spectral_data_file = DataFile(filename="link/to/spectral_file", label="Derived Spectral Data File") Data_Transformation_Name = DataFile(filename="link/to/data_transformation_name", label="Data Transformation Name") MAF = DataFile(filename="link/to/MAF", label="Metabolite Assignment File") # Loop over samples and add process to samples for idx, row in IDs_df.iterrows(): source_name = row["XOmicsPhenoID"] # print(source_name) if not pd.isna(row["XOmicsmetaboID"]): # print(row['XOmicsmetaboID']) urine_sample_name = "urine_{0}".format(source_name) # print(urine_sample_name) urine_sample = next( (smpl for smpl in investigation.studies[0].samples if smpl.name == urine_sample_name), None) metabolomics_sample = next( (smpl for smpl in Assay.samples if smpl.name == urine_sample_name), None) if not metabolomics_sample: Assay.samples.append(urine_sample) ## Extraction Post_extraction = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Post Extraction")), value = "1 uL pyridine") Derivatization = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Derivatization")), value = "oximation followed by silylation") material_extract = Material( name = "extract_{0}".format(row["XOmicsmetaboID"]), type_ = "Extract Name") extraction_process = Process( executes_protocol=extraction_metabolomics, parameter_values=[Post_extraction, Derivatization], inputs = [urine_sample], outputs = [material_extract]) ## Labelling material_label = Material( name ="labeled_{0}".format(row["XOmicsmetaboID"]), type_ ="Labeled Extract Name") labelling_process = Process( executes_protocol=labelling_metabolites, inputs = [extraction_process.outputs[0]], outputs = [material_label]) # ## Chromatography # separated_molecules = Material( # name = "separated_molecules_{0}".format(row["XOmicsmetaboID"], # type_ ="Labeled Extract Name") # ) instrument = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Chromatography Instrument")), value = "Agilent Technologies 7890A") column_model = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Column model")), value = "HP-5MS UI (5% Phenyl Methyl Silox), 30 m x 0.25 m ID column with a film thickness of 25 um") column_type = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Column type")), value = "low polarity") chromatography_process = Process( name = "chromatography_{0}".format(row["XOmicsmetaboID"]), executes_protocol = chromatography, parameter_values = [instrument, column_model, column_type], inputs = [labelling_process.outputs[0]], outputs = [], # outputs = [separated_molecules] ) ## Mass spectrometry scan_polarity = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Scan polarity")), value = "positive") scan_range = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Scan m/z range")), value = "50-500") instrument = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Instrument")), value = "Agilent Technologies mass selective detector (MSD 5975C) and MultiPurpose Sampler (MPS, MXY016-02A, GERSTEL)") ion_source = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Ion source")), value = "EI (70 eV)") mass_analyzer = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Mass Analyzer")), value = "single-quadrupole") mass_spectrometry_process = Process( name = "mass_spectrometry_{0}".format(row["XOmicsmetaboID"]), executes_protocol= mass_spectrometry, parameter_values = [scan_polarity, scan_range, instrument, ion_source, mass_analyzer], inputs = [], # inputs = [separated_molecules], outputs = [raw_datafile] ) ## Data transformation data_transformation_process = Process( name = "data_transformation_{0}".format(row["XOmicsmetaboID"]), executes_protocol = data_transformation, inputs = [raw_datafile], outputs = [normalized_datafile, derived_spectral_data_file] ) ## Metabolite identification metabolite_identification_process = Process( name = "metabolite_identification_{0}".format(row["XOmicsmetaboID"]), executes_protocol = metabolite_identification, inputs = [normalized_datafile], outputs= [Data_Transformation_Name, MAF] ) # ## Link processes plink(extraction_process, labelling_process) plink(labelling_process, chromatography_process) plink(chromatography_process, mass_spectrometry_process) plink(mass_spectrometry_process, data_transformation_process) plink(data_transformation_process, metabolite_identification_process) # ## Add samples, materials and data files to the OA assay Assay.other_material.append(material_extract) Assay.other_material.append(material_label) # Assay.other_material.append(separated_molecules) Assay.data_files.append(raw_datafile) Assay.data_files.append(normalized_datafile) Assay.data_files.append(derived_spectral_data_file) Assay.data_files.append(Data_Transformation_Name) Assay.data_files.append(MAF) # ## Add processes to the OA assay Assay.process_sequence.append(extraction_process) Assay.process_sequence.append(labelling_process) Assay.process_sequence.append(chromatography_process) Assay.process_sequence.append(mass_spectrometry_process) Assay.process_sequence.append(data_transformation_process) Assay.process_sequence.append(metabolite_identification_process) # In[432]: # add samples, processes and datafiles to metabolomics steroids assay Assay = assay_metabolomics_steroids # Define datafiles (not all may be relevant) raw_datafile = DataFile(filename="link/to/raw/data", label="Raw Spectral Data File") normalized_datafile = DataFile(filename="link/to/normalized_data", label="Normalization Name") derived_spectral_data_file = DataFile(filename="link/to/spectral_file", label="Derived Spectral Data File") Data_Transformation_Name = DataFile(filename="link/to/data_transformation_name", label="Data Transformation Name") MAF = DataFile(filename="link/to/MAF", label="Metabolite Assignment File") # Loop over samples and add process to samples for idx, row in IDs_df.iterrows(): source_name = row["XOmicsPhenoID"] # print(source_name) if not pd.isna(row["XOmicsmetaboID"]): # print(row['XOmicsmetaboID']) urine_sample_name = "urine_{0}".format(source_name) # print(urine_sample_name) urine_sample = next( (smpl for smpl in investigation.studies[0].samples if smpl.name == urine_sample_name), None) metabolomics_sample = next( (smpl for smpl in Assay.samples if smpl.name == urine_sample_name), None) if not metabolomics_sample: Assay.samples.append(urine_sample) ## Extraction Post_extraction = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Post Extraction")), value = "1 uL filtered urine") Derivatization = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Derivatization")), value = "NA") material_extract = Material( name = "extract_{0}".format(row["XOmicsmetaboID"]), type_ = "Extract Name") extraction_process = Process( executes_protocol=extraction_metabolomics, parameter_values=[Post_extraction, Derivatization], inputs = [urine_sample], outputs = [material_extract]) ## Labelling material_label = Material( name ="labeled_{0}".format(row["XOmicsmetaboID"]), type_ ="Labeled Extract Name") labelling_process = Process( executes_protocol=labelling_metabolites, inputs = [extraction_process.outputs[0]], outputs = [material_label]) ## Chromatography # separated_molecules = Material( # name = "new_separated_molecules_{0}".format(row["XOmicsmetaboID"], # type_ ="Labeled Extract Name") # ) instrument = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Chromatography Instrument")), value = "Agilent 1290") column_model = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Column model")), value = "Acquity UPLC CSH C18 column (Waters)") column_type = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Column type")), value = "reverse phase") chromatography_process = Process( name = "chromatography_{0}".format(row["XOmicsmetaboID"]), executes_protocol = chromatography, parameter_values = [instrument, column_model, column_type], inputs = [labelling_process.outputs[0]], outputs = [] #outputs = [separated_molecules] ) ## Mass spectrometry scan_polarity = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Scan polarity")), value = "switching positive and negative ion mode !! MAYBE SERPARATE INTO NEGATIVE AND POSITIVE ASSAY?") scan_range = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Scan m/z range")), value = "5-3000?") instrument = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Instrument")), value = "Agilent 6460") ion_source = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Ion source")), value = "ESI") mass_analyzer = ParameterValue(category = ProtocolParameter(parameter_name=OntologyAnnotation(term="Mass Analyzer")), value = "triple quadrupole") mass_spectrometry_process = Process( name = "mass_spectrometry_{0}".format(row["XOmicsmetaboID"]), executes_protocol= mass_spectrometry, parameter_values = [scan_polarity, scan_range, instrument, ion_source, mass_analyzer], # inputs = [separated_molecules], inputs = [], outputs = [raw_datafile] ) ## Data transformation data_transformation_process = Process( name = "data_transformation_{0}".format(row["XOmicsmetaboID"]), executes_protocol = data_transformation, inputs = [raw_datafile], outputs = [normalized_datafile, derived_spectral_data_file] ) ## Metabolite identification metabolite_identification_process = Process( name = "metabolite_identification_{0}".format(row["XOmicsmetaboID"]), executes_protocol = metabolite_identification, inputs = [normalized_datafile], outputs= [Data_Transformation_Name, MAF] ) ## Link processes plink(extraction_process, labelling_process) plink(labelling_process, chromatography_process) plink(chromatography_process, mass_spectrometry_process) plink(mass_spectrometry_process, data_transformation_process) plink(data_transformation_process, metabolite_identification_process) ## Add samples, materials and data files to the steroids assay Assay.other_material.append(material_extract) Assay.other_material.append(material_label) # Assay.other_material.append(separated_molecules) Assay.data_files.append(raw_datafile) Assay.data_files.append(normalized_datafile) Assay.data_files.append(derived_spectral_data_file) Assay.data_files.append(Data_Transformation_Name) Assay.data_files.append(MAF) ## Add processes to the steroids assay Assay.process_sequence.append(extraction_process) Assay.process_sequence.append(labelling_process) Assay.process_sequence.append(chromatography_process) Assay.process_sequence.append(mass_spectrometry_process) Assay.process_sequence.append(data_transformation_process) Assay.process_sequence.append(metabolite_identification_process) # In[433]: # add assays to study investigation.studies[0].assays.append(assay_genotype) investigation.studies[0].assays.append(assay_methylation) investigation.studies[0].assays.append(assay_metabolomics_amines) investigation.studies[0].assays.append(assay_metabolomics_OA) investigation.studies[0].assays.append(assay_metabolomics_steroids) # # Write ISA-Tab files # In[434]: # create ISA files directory out_dir = "isa_template" if not os.path.isdir(out_dir): os.makedirs(out_dir) # write to ISA-Tab from isatools import isatab isatab.dump(investigation, out_dir) print() # In[435]: import json from isatools.isajson import ISAJSONEncoder print(json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': '))) # In[ ]: