This notebook documents a behavior of the ISA-Tab writer which results in declared but unused ISA Source objects not to be serialized in the ISA-Tab file. The ISA objects are serialized fine if using the ISA-JSON write. The future releases of the ISA-API will see to address the issue.
# If executing the notebooks on `Google Colab`,uncomment the following command
# and run it to install the required python libraries. Also, make the test datasets available.
# !pip install -r requirements.txt
import os
import json
import datetime
from isatools.model import (
Investigation,
Study,
Assay,
Person,
Material,
DataFile,
OntologySource,
OntologyAnnotation,
Sample,
Source,
Characteristic,
Protocol,
Process,
plink
)
from isatools import isatab
from isatools.isajson import ISAJSONEncoder
final_dir = os.path.abspath(os.path.join('notebook-output', 'issue-brapi'))
investigation = Investigation()
investigation.identifier = "BRAPI-test-unused-source"
investigation.title = "BRAPI-test-unused-source"
investigation.description = "this is test to understand the conditions under which ISA-API will serialize or not serialize a Source entity declared but not used in a workflow. Note: while the python ISA-API does not serialize in the Tab format, the information is available from ISA-JSON."
prs_test_study = Study(filename="s_prs_test.txt")
prs_test_study.identifier = "PRS"
prs_test_study.title = "Unused Sources"
prs_test_study.description = "testing if the python ISA-API supports unusued Sources in ISA-Tab serialization"
prs = Person(last_name="Rocca-Serra", first_name="Philippe", mid_initials="T", affiliation="OeRC", email="prs@hotmail.com" )
prs_test_study.contacts.append(prs)
print(prs.mid_initials)
T
ncbi_taxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
human_characteristic= Characteristic(category=OntologyAnnotation(term="Organism"),
value=OntologyAnnotation(term="Homo Sapiens", term_source=ncbi_taxon,
term_accession="http://purl.bioontology.org/ontology/NCBITAXON/9606"))
subject_0 = Source(name='human individual-0', characteristics=[human_characteristic])
subject_1 = Source(name='human individual-1', characteristics=[human_characteristic])
subject_2 = Source(name='human individual-2', characteristics=[human_characteristic])
sample_0 = Sample(name='SBJ0_sample1')
# note that 2 samples are generated from subject_1
sample_1 = Sample(name='SBJ1_sample1')
sample_2 = Sample(name='SBJ1_sample2')
# note that no sample is generated from subject_
sample_3 = Sample(name='SBJ2')
prs_test_study.sources.append(subject_0)
prs_test_study.sources.append(subject_1)
prs_test_study.sources.append(subject_2)
prs_test_study.samples.append(sample_0)
prs_test_study.samples.append(sample_1)
prs_test_study.samples.append(sample_2)
#prs_test_study.samples.append(subject_2)
prs_test_study.samples.append(sample_3)
prs_protocol = Protocol(name="sample collection",
protocol_type=OntologyAnnotation(term="sample collection"))
prs_test_study.protocols.append(prs_protocol)
prs_process0 = Process(executes_protocol=prs_protocol)
now = str(datetime.datetime.now().strftime("%Y-%m-%d"))
prs_process1 = Process(executes_protocol=prs_protocol, performer=prs.first_name, date_=now)
prs_process2 = Process(executes_protocol=prs_protocol, performer=prs.first_name, date_=now)
prs_process3 = Process(executes_protocol=prs_protocol, performer=prs.first_name, date_=now)
prs_process0.inputs.append(subject_0)
prs_process0.outputs.append(sample_0)
prs_process1.inputs.append(subject_1)
prs_process1.outputs.append(sample_1)
prs_process2.inputs.append(subject_1)
prs_process2.outputs.append(sample_2)
prs_process3.inputs.append(subject_2)
prs_process3.outputs.append(sample_3)
prs_test_study.process_sequence.append(prs_process0)
prs_test_study.process_sequence.append(prs_process1)
prs_test_study.process_sequence.append(prs_process2)
prs_test_study.process_sequence.append(prs_process3)
assay_on_source = Assay(measurement_type=OntologyAnnotation(term="phenotyping"),
technology_type=OntologyAnnotation(term=""),
filename="a_assay-test.txt")
data acquisition
¶assay_protocol = Protocol(name="assay-on-source",
protocol_type="data acquisition")
prs_test_study.protocols.append(assay_protocol)
assay_process = Process(executes_protocol=assay_protocol, performer=prs.first_name)
dummy_file= DataFile(filename="dummy.txt")
assay_process.inputs.append(sample_0)
assay_process.outputs.append(dummy_file)
assay_on_source.data_files.append(dummy_file)
assay_on_source.process_sequence.append(prs_process3)
assay_on_source.process_sequence.append(assay_process)
assay_on_source.samples.append(sample_3)
#assay_on_source.other_material.append(subject_2)
plink(prs_process3, assay_process)
prs_test_study.process_sequence.append(assay_process)
prs_test_study.assays.append(assay_on_source)
investigation.studies.append(prs_test_study)
dataframes = isatab.dump_tables_to_dataframes(investigation)
2021-07-21 17:44:03,166 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [0, 1, 2] 2021-07-21 17:44:03,166 [WARNING]: isatab.py(write_study_table_files:1194) >> [7, 3, 0, 8, 4, 1, 9, 5, 10, 6, 2, 11] 2021-07-21 17:44:03,167 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[1, 8, 4], [1, 9, 5], [2, 10, 6]] 2021-07-21 17:44:03,187 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [6, 3] 2021-07-21 17:44:03,189 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[3, 11]] 2021-07-21 17:44:03,189 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[3, 11]]
dataframes['s_prs_test.txt']
Source Name | Characteristics[Organism] | Term Source REF | Term Accession Number | Protocol REF | Date | Performer | Sample Name | |
---|---|---|---|---|---|---|---|---|
0 | human individual-1 | Homo Sapiens | NCBITaxon | http://purl.bioontology.org/ontology/NCBITAXON... | sample collection | 2021-07-21 | Philippe | SBJ1_sample1 |
1 | human individual-1 | Homo Sapiens | NCBITaxon | http://purl.bioontology.org/ontology/NCBITAXON... | sample collection | 2021-07-21 | Philippe | SBJ1_sample2 |
2 | human individual-2 | Homo Sapiens | NCBITaxon | http://purl.bioontology.org/ontology/NCBITAXON... | sample collection | 2021-07-21 | Philippe | SBJ2 |
#dataframes['assay-test.txt']
%%capture
isatab.dump(isa_obj=investigation, output_path=final_dir)
2021-07-21 17:44:03,322 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [0, 1, 2] 2021-07-21 17:44:03,323 [WARNING]: isatab.py(write_study_table_files:1194) >> [7, 3, 0, 8, 4, 1, 9, 5, 10, 6, 2, 11] 2021-07-21 17:44:03,323 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[1, 8, 4], [1, 9, 5], [2, 10, 6]] 2021-07-21 17:44:03,340 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [6, 3] 2021-07-21 17:44:03,341 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[3, 11]] 2021-07-21 17:44:03,342 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[3, 11]]
isa_j = json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': '))
open(os.path.join("./notebook-output/issue-brapi","isa_as_json_from_dumps.json"),"w").write(isa_j) # this call write the string 'isa_j' to the file called 'isa_as_json_from_dumps.json'
14821
my_json_report = isatab.validate(open(os.path.join(final_dir, 'i_investigation.txt')))
2021-07-21 17:44:03,364 [INFO]: isatab.py(validate:4206) >> Loading... /Users/philippe/Documents/git/isa-api2/isa-api/isa-cookbook/content/notebooks/notebook-output/issue-brapi/i_investigation.txt 2021-07-21 17:44:03,484 [INFO]: isatab.py(validate:4208) >> Running prechecks... 2021-07-21 17:44:03,494 [ERROR]: isatab.py(check_samples_not_declared_in_study_used_in_assay:2403) >> (E) Some samples in an assay file a_assay-test.txt are not declared in the study file s_prs_test.txt: ['SBJ0_sample1'] 2021-07-21 17:44:03,521 [INFO]: isatab.py(validate:4229) >> Finished prechecks... 2021-07-21 17:44:03,522 [INFO]: isatab.py(validate:4230) >> Loading configurations found in /Users/philippe/.pyenv/versions/3.9.0/envs/isa-api-py39/lib/python3.9/site-packages/isatools/resources/config/xml 2021-07-21 17:44:03,552 [INFO]: isatab.py(validate:4235) >> Using configurations found in /Users/philippe/.pyenv/versions/3.9.0/envs/isa-api-py39/lib/python3.9/site-packages/isatools/resources/config/xml 2021-07-21 17:44:03,553 [ERROR]: isatab.py(check_measurement_technology_types:3278) >> (E) Could not load configuration for measurement type 'phenotyping' and technology type '' for STUDY ASSAY.0' 2021-07-21 17:44:03,554 [INFO]: isatab.py(validate:4237) >> Checking investigation file against configuration... 2021-07-21 17:44:03,556 [INFO]: isatab.py(validate:4240) >> Finished checking investigation file 2021-07-21 17:44:03,557 [INFO]: isatab.py(validate:4259) >> Loading... s_prs_test.txt 2021-07-21 17:44:03,560 [INFO]: isatab.py(validate:4265) >> Validating s_prs_test.txt against default study table configuration 2021-07-21 17:44:03,560 [INFO]: isatab.py(validate:4267) >> Checking Factor Value presence... 2021-07-21 17:44:03,561 [INFO]: isatab.py(validate:4270) >> Checking required fields... 2021-07-21 17:44:03,561 [INFO]: isatab.py(validate:4273) >> Checking generic fields... 2021-07-21 17:44:03,562 [INFO]: isatab.py(validate:4281) >> Checking unit fields... 2021-07-21 17:44:03,563 [INFO]: isatab.py(validate:4288) >> Checking protocol fields... 2021-07-21 17:44:03,564 [INFO]: isatab.py(validate:4298) >> Checking ontology fields... 2021-07-21 17:44:03,564 [INFO]: isatab.py(validate:4308) >> Checking study group size... 2021-07-21 17:44:03,564 [INFO]: isatab.py(validate:4312) >> Finished validation on s_prs_test.txt 2021-07-21 17:44:03,565 [ERROR]: isatab.py(validate:4336) >> Could not load config matching (phenotyping, ) 2021-07-21 17:44:03,565 [WARNING]: isatab.py(validate:4340) >> Only have configs matching: 2021-07-21 17:44:03,566 [WARNING]: isatab.py(validate:4342) >> ('protein-dna binding site identification', 'dna microarray') 2021-07-21 17:44:03,566 [WARNING]: isatab.py(validate:4342) >> ('loss of heterozygosity profiling', 'dna microarray') 2021-07-21 17:44:03,567 [WARNING]: isatab.py(validate:4342) >> ('histone modification profiling', 'nucleotide sequencing') 2021-07-21 17:44:03,567 [WARNING]: isatab.py(validate:4342) >> ('dna methylation profiling', 'nucleotide sequencing') 2021-07-21 17:44:03,567 [WARNING]: isatab.py(validate:4342) >> ('protein identification', 'mass spectrometry') 2021-07-21 17:44:03,568 [WARNING]: isatab.py(validate:4342) >> ('protein-protein interaction detection', 'protein microarray') 2021-07-21 17:44:03,568 [WARNING]: isatab.py(validate:4342) >> ('snp analysis', 'dna microarray') 2021-07-21 17:44:03,569 [WARNING]: isatab.py(validate:4342) >> ('[sample]', '') 2021-07-21 17:44:03,569 [WARNING]: isatab.py(validate:4342) >> ('protein-dna binding site identification', 'nucleotide sequencing') 2021-07-21 17:44:03,570 [WARNING]: isatab.py(validate:4342) >> ('metabolite profiling', 'mass spectrometry') 2021-07-21 17:44:03,570 [WARNING]: isatab.py(validate:4342) >> ('metabolite profiling', 'nmr spectroscopy') 2021-07-21 17:44:03,571 [WARNING]: isatab.py(validate:4342) >> ('protein expression profiling', 'protein microarray') 2021-07-21 17:44:03,571 [WARNING]: isatab.py(validate:4342) >> ('cell sorting', 'flow cytometry') 2021-07-21 17:44:03,572 [WARNING]: isatab.py(validate:4342) >> ('[investigation]', '') 2021-07-21 17:44:03,573 [WARNING]: isatab.py(validate:4342) >> ('genome sequencing', 'nucleotide sequencing') 2021-07-21 17:44:03,574 [WARNING]: isatab.py(validate:4342) >> ('metagenome sequencing', 'nucleotide sequencing') 2021-07-21 17:44:03,574 [WARNING]: isatab.py(validate:4342) >> ('clinical chemistry analysis', '') 2021-07-21 17:44:03,575 [WARNING]: isatab.py(validate:4342) >> ('transcription factor binding site identification', 'nucleotide sequencing') 2021-07-21 17:44:03,575 [WARNING]: isatab.py(validate:4342) >> ('copy number variation profiling', 'dna microarray') 2021-07-21 17:44:03,576 [WARNING]: isatab.py(validate:4342) >> ('cell counting', 'flow cytometry') 2021-07-21 17:44:03,576 [WARNING]: isatab.py(validate:4342) >> ('transcription factor binding site identification', 'dna microarray') 2021-07-21 17:44:03,576 [WARNING]: isatab.py(validate:4342) >> ('protein expression profiling', 'mass spectrometry') 2021-07-21 17:44:03,577 [WARNING]: isatab.py(validate:4342) >> ('environmental gene survey', 'nucleotide sequencing') 2021-07-21 17:44:03,577 [WARNING]: isatab.py(validate:4342) >> ('transcription profiling', 'real time pcr') 2021-07-21 17:44:03,578 [WARNING]: isatab.py(validate:4342) >> ('histology', '') 2021-07-21 17:44:03,578 [WARNING]: isatab.py(validate:4342) >> ('dna methylation profiling', 'dna microarray') 2021-07-21 17:44:03,579 [WARNING]: isatab.py(validate:4342) >> ('transcription profiling', 'nucleotide sequencing') 2021-07-21 17:44:03,579 [WARNING]: isatab.py(validate:4342) >> ('protein expression profiling', 'gel electrophoresis') 2021-07-21 17:44:03,580 [WARNING]: isatab.py(validate:4342) >> ('transcription profiling', 'dna microarray') 2021-07-21 17:44:03,580 [WARNING]: isatab.py(validate:4342) >> ('hematology', '') 2021-07-21 17:44:03,581 [WARNING]: isatab.py(validate:4345) >> Skipping configuration validation as could not load config... 2021-07-21 17:44:03,581 [INFO]: isatab.py(validate:4426) >> Checking consistencies between study sample table and assay tables... 2021-07-21 17:44:03,581 [INFO]: isatab.py(validate:4431) >> Finished checking study sample table against assay tables... 2021-07-21 17:44:03,582 [INFO]: isatab.py(validate:4435) >> Skipping pooling test as there are outstanding errors 2021-07-21 17:44:03,582 [INFO]: isatab.py(validate:4444) >> Finished validation...
print(my_json_report)
{'errors': [{'message': 'Measurement/technology type invalid', 'supplemental': 'Measurement phenotyping/technology , STUDY ASSAY.0', 'code': 4002}], 'warnings': [], 'info': [{'message': 'Found -1 study groups in s_prs_test.txt', 'supplemental': 'Found -1 study groups in s_prs_test.txt', 'code': 5001}], 'validation_finished': True}