From f6d0dca22101fec12dbf94eb6a4fa5352d4c3fbf Mon Sep 17 00:00:00 2001 From: oerc0042 Date: Mon, 1 Dec 2025 12:38:34 +0000 Subject: [PATCH 1/5] closes #605, #375 --- isatools/model/datafile.py | 74 +++++++++++++++++++ isatools/resources/config/xml/histology.xml | 51 ++++++++++++- .../config/xml/phenotyping_imaging.xml | 46 ++++++++++++ tests/isajson/test_isajson.py | 12 +++ tests/isatab/validate/test_core.py | 8 ++ tests/model/test_datafile.py | 2 + tests/utils/test_isatab_configurator.py | 2 +- 7 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 isatools/resources/config/xml/phenotyping_imaging.xml diff --git a/isatools/model/datafile.py b/isatools/model/datafile.py index 04aa8f742..ea0f35b4a 100644 --- a/isatools/model/datafile.py +++ b/isatools/model/datafile.py @@ -202,6 +202,42 @@ def __ne__(self, other): return not self == other +class ImageFile(DataFile): + """Represents an Image file in an experimental graph.""" + + def __init__(self, filename="", id_="", generated_from=None, comments=None): + super().__init__(filename=filename, id_=id_, generated_from=generated_from, comments=comments) + + self.label = "Image File" + + def __repr__(self): + return ( + "isatools.model.ImageFile(filename='{0.filename}', " + "generated_from={0.generated_from}, comments={0.comments})".format(self) + ) + + def __str__(self): + return """ImageFile( + filename={data_file.filename} + generated_from={num_generated_from} Sample objects + comments={num_comments} Comment objects +)""".format(data_file=self, num_generated_from=len(self.generated_from), num_comments=len(self.comments)) + + def __hash__(self): + return hash(repr(self)) + + def __eq__(self, other): + return ( + isinstance(other, ImageFile) + and self.filename == other.filename + and self.generated_from == other.generated_from + and self.comments == other.comments + ) + + def __ne__(self, other): + return not self == other + + class RawSpectralDataFile(DataFile): """Represents a raw spectral data file in an experimental graph.""" @@ -428,6 +464,44 @@ def __ne__(self, other): return not self == other +class MetaboliteAssignmentFile(DataFile): + """Represents a metabolite assignment file in an experimental graph.""" + + def __init__(self, filename="", id_="", generated_from=None, comments=None): + super().__init__(filename=filename, id_=id_, generated_from=generated_from, comments=comments) + + self.label = "Metabolite Assignment File" + + def __repr__(self): + return ( + "isatools.model.MetaboliteAssignmentFile(" + "filename='{data_file.filename}', " + "generated_from={data_file.generated_from}, " + "comments={data_file.comments})".format(data_file=self) + ) + + def __str__(self): + return """MetaboliteAssignmentFile( + filename={data_file.filename} + generated_from={num_generated_from} Sample objects + comments={num_comments} Comment objects +)""".format(data_file=self, num_generated_from=len(self.generated_from), num_comments=len(self.comments)) + + def __hash__(self): + return hash(repr(self)) + + def __eq__(self, other): + return ( + isinstance(other, MetaboliteAssignmentFile) + and self.filename == other.filename + and self.generated_from == other.generated_from + and self.comments == other.comments + ) + + def __ne__(self, other): + return not self == other + + class DerivedArrayDataMatrixFile(DataFile): """Represents a derived array data matrix file in an experimental graph.""" diff --git a/isatools/resources/config/xml/histology.xml b/isatools/resources/config/xml/histology.xml index 2255a4454..8a8ef0360 100644 --- a/isatools/resources/config/xml/histology.xml +++ b/isatools/resources/config/xml/histology.xml @@ -1 +1,50 @@ -[INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT].Extract-[EXTRACT_COUNT].LE-[LABEL_COUNT].ASSAY-[HYB_COUNT][INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT] \ No newline at end of file + + + + + + + + + + + + + + + [INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT].Extract-[EXTRACT_COUNT].LE-[LABEL_COUNT].ASSAY-[HYB_COUNT] + + + + + + + + + + [INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT] + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/isatools/resources/config/xml/phenotyping_imaging.xml b/isatools/resources/config/xml/phenotyping_imaging.xml new file mode 100644 index 000000000..b4de93680 --- /dev/null +++ b/isatools/resources/config/xml/phenotyping_imaging.xml @@ -0,0 +1,46 @@ + + + + + + + + + [INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT] + + + + + + + + [INSTITUTION].Group-[GROUP_NO].Subject-[SUBJECT_NO].[SAMPLE_EXTRACT].Extract-[EXTRACT_COUNT].LE-[LABEL_COUNT].ASSAY-[HYB_COUNT] + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/isajson/test_isajson.py b/tests/isajson/test_isajson.py index 6f8ea1a29..deec7dda8 100644 --- a/tests/isajson/test_isajson.py +++ b/tests/isajson/test_isajson.py @@ -460,6 +460,18 @@ def test_json_load_and_dump_bii_s_test(self): assays = [a for a in studies["assays"] if a["filename"] == "a_assay.txt"][0] self.assertEqual(assays["materials"]["otherMaterials"][1]["type"], "Extract Name") + + def test_json_load_and_dump_bii_s_test(self): + # Load into ISA objects + with open(os.path.join(utils.JSON_DATA_DIR, "ISA-Image", "isa-image.json")) as isajson_fp: + investigation = isajson.load(isajson_fp) + + # Dump into ISA JSON from ISA objects + investigation_reload = json.loads(json.dumps(investigation, cls=isajson.ISAJSONEncoder)) + studies = [s for s in investigation_reload["studies"] if s["filename"] == "s_study.txt"][0] + assays = [a for a in studies["assays"] if a["filename"] == "a_assay.txt"][0] + self.assertEqual(assays["dataFiles"][1]["type"], "Image File") + def test_json_load_and_dump_isa_labeled_extract(self): # Load into ISA objects with open(os.path.join(utils.JSON_DATA_DIR, "TEST-ISA-LabeledExtract1", "isa-test-le1.json")) as isajson_fp: diff --git a/tests/isatab/validate/test_core.py b/tests/isatab/validate/test_core.py index cfb5b9f8f..45cd333ca 100644 --- a/tests/isatab/validate/test_core.py +++ b/tests/isatab/validate/test_core.py @@ -43,6 +43,14 @@ def test_bii_s_7(self): report = validate(fp=data_file, config_dir=self.default_conf) self.assertEqual(len(report["warnings"]), 1) + def test_imaging(self): + data_path = path.join(path.dirname(path.abspath(__file__)), "..", "..", "data", "tab", "Imaging") + with open(path.join(data_path, "i_Investigation.txt"), "r") as data_file: + report = validate(fp=data_file, config_dir=self.default_conf) + print(report["errors"]) + self.assertEqual(len(report["errors"]), 0) + + def test_print_rule(self): raw_rule = INVESTIGATION_RULES_MAPPING[0] rule = Rule(**raw_rule) diff --git a/tests/model/test_datafile.py b/tests/model/test_datafile.py index 0d2a35f51..3ce831293 100644 --- a/tests/model/test_datafile.py +++ b/tests/model/test_datafile.py @@ -81,6 +81,7 @@ class TestSubDataFile(TestCase): def setUp(self): self.types = { "RawDataFile": RawDataFile, + "ImageFile": ImageFile, "DerivedDataFile": DerivedDataFile, "RawSpectralDataFile": RawSpectralDataFile, "DerivedArrayDataFile": DerivedArrayDataFile, @@ -92,6 +93,7 @@ def setUp(self): "PostTranslationalModificationAssignmentFile": PostTranslationalModificationAssignmentFile, "AcquisitionParameterDataFile": AcquisitionParameterDataFile, "FreeInductionDecayDataFile": FreeInductionDecayDataFile, + "MetaboliteAssignmentFile": MetaboliteAssignmentFile } self.classes = {} for filetype in self.types: diff --git a/tests/utils/test_isatab_configurator.py b/tests/utils/test_isatab_configurator.py index 2e172541e..434e5550f 100644 --- a/tests/utils/test_isatab_configurator.py +++ b/tests/utils/test_isatab_configurator.py @@ -50,7 +50,7 @@ def test_load_config_metagenome_seq(self): from isatools.io import isatab_configurator as configurator config_dict = configurator.load(self._config_dir) - self.assertEqual(len(config_dict), 30) + self.assertEqual(len(config_dict), 31) self.assertEqual( config_dict[("metagenome sequencing", "nucleotide sequencing")].isatab_configuration[0].table_name, "metagenome_seq", From 25a20c062d41f0dc9cfca86456cdb08abc82d20c Mon Sep 17 00:00:00 2001 From: oerc0042 Date: Mon, 1 Dec 2025 14:39:12 +0000 Subject: [PATCH 2/5] closes #605 fix to test --- tests/isajson/test_isajson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/isajson/test_isajson.py b/tests/isajson/test_isajson.py index deec7dda8..91b36019e 100644 --- a/tests/isajson/test_isajson.py +++ b/tests/isajson/test_isajson.py @@ -461,7 +461,7 @@ def test_json_load_and_dump_bii_s_test(self): self.assertEqual(assays["materials"]["otherMaterials"][1]["type"], "Extract Name") - def test_json_load_and_dump_bii_s_test(self): + def test_json_load_and_dump_imagefile_test(self): # Load into ISA objects with open(os.path.join(utils.JSON_DATA_DIR, "ISA-Image", "isa-image.json")) as isajson_fp: investigation = isajson.load(isajson_fp) From bf664f63f329e754b1059fc0002c400d5496b352 Mon Sep 17 00:00:00 2001 From: Milo Thurston Date: Fri, 6 Feb 2026 13:25:14 +0000 Subject: [PATCH 3/5] Changed data_schema -> data_file_schema. --- isatools/resources/schemas/v1.0.1/assay_schema.json | 4 ++-- isatools/resources/schemas/v1.0.1/data_schema.json | 4 ++-- isatools/resources/schemas/v1.0.1/process_schema.json | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/isatools/resources/schemas/v1.0.1/assay_schema.json b/isatools/resources/schemas/v1.0.1/assay_schema.json index 29ed6f8e8..cfd947e38 100644 --- a/isatools/resources/schemas/v1.0.1/assay_schema.json +++ b/isatools/resources/schemas/v1.0.1/assay_schema.json @@ -20,7 +20,7 @@ "dataFiles" : { "type": "array", "items" : { - "$ref": "data_schema.json#" + "$ref": "data_file_schema.json#" } }, "materials": { @@ -68,4 +68,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} diff --git a/isatools/resources/schemas/v1.0.1/data_schema.json b/isatools/resources/schemas/v1.0.1/data_schema.json index 09d99929c..cf38d3433 100644 --- a/isatools/resources/schemas/v1.0.1/data_schema.json +++ b/isatools/resources/schemas/v1.0.1/data_schema.json @@ -1,5 +1,5 @@ { - "id": "https://raw.githubusercontent.com/ISA-tools/isa-api/master/isatools/resources/schemas/v1.0.1/data_schema.json", + "id": "https://raw.githubusercontent.com/ISA-tools/isa-api/master/isatools/resources/schemas/v1.0.1/data_file_schema.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "ISA Data schema", "name" : "ISA Data schema", @@ -40,4 +40,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} diff --git a/isatools/resources/schemas/v1.0.1/process_schema.json b/isatools/resources/schemas/v1.0.1/process_schema.json index 341860d16..c40919ad8 100644 --- a/isatools/resources/schemas/v1.0.1/process_schema.json +++ b/isatools/resources/schemas/v1.0.1/process_schema.json @@ -47,7 +47,7 @@ "$ref": "sample_schema.json#" }, { - "$ref": "data_schema.json#" + "$ref": "data_file_schema.json#" }, { "$ref": "material_schema.json#" @@ -63,7 +63,7 @@ "$ref": "sample_schema.json#" }, { - "$ref": "data_schema.json#" + "$ref": "data_file_schema.json#" }, { "$ref": "material_schema.json#" @@ -79,4 +79,4 @@ } }, "additionalProperties": false -} \ No newline at end of file +} From f4ad449689b96cbf54976d0fa112217b5757d43c Mon Sep 17 00:00:00 2001 From: Milo Thurston Date: Fri, 6 Feb 2026 13:26:29 +0000 Subject: [PATCH 4/5] Renamed data_schema -> data_file_schema. --- .../schemas/v1.0.1/{data_schema.json => data_file_schema.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename isatools/resources/schemas/v1.0.1/{data_schema.json => data_file_schema.json} (100%) diff --git a/isatools/resources/schemas/v1.0.1/data_schema.json b/isatools/resources/schemas/v1.0.1/data_file_schema.json similarity index 100% rename from isatools/resources/schemas/v1.0.1/data_schema.json rename to isatools/resources/schemas/v1.0.1/data_file_schema.json From 832ddafb6938b9ddaaa71b2af01f0e381feb9dd5 Mon Sep 17 00:00:00 2001 From: Milo Thurston Date: Mon, 9 Feb 2026 13:37:39 +0000 Subject: [PATCH 5/5] Changed tests to reflect changed table headers. #609 --- isatools/constants.py | 1 + isatools/isatab/validate/rules/rules_40xx.py | 1 + tests/convert/test_mzml2isa.py | 6 +++--- tests/isatab/validate/test_core.py | 4 ++-- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/isatools/constants.py b/isatools/constants.py index 7a56c2c4c..15d061f0f 100644 --- a/isatools/constants.py +++ b/isatools/constants.py @@ -23,6 +23,7 @@ "Acquisition Parameter Data File", "Metabolite Assignment File", "Metabolite Identification File", + "Normalization Name" ] _LABELS_DATA_NODES = [ diff --git a/isatools/isatab/validate/rules/rules_40xx.py b/isatools/isatab/validate/rules/rules_40xx.py index 7850114d6..de95b4d6d 100644 --- a/isatools/isatab/validate/rules/rules_40xx.py +++ b/isatools/isatab/validate/rules/rules_40xx.py @@ -448,6 +448,7 @@ def load_table_checks(df, filename): "Hybridization Assay Name", "Array Design REF", "Scan Name", + "Normalization Name", "Data Transformation Name", ] and not _RX_PARAMETER_VALUE.match(col) diff --git a/tests/convert/test_mzml2isa.py b/tests/convert/test_mzml2isa.py index 9d1a61439..7ac853436 100644 --- a/tests/convert/test_mzml2isa.py +++ b/tests/convert/test_mzml2isa.py @@ -24,7 +24,7 @@ def test_mzml2isa_convert_investigation(self): os.path.join(self._mzml_data_dir, study_id + "-partial"), self._tmp_dir, study_id, validate_output=True ) self.assertEqual(len(report["warnings"]), 8) - self.assertEqual(len(report["errors"]), 3) + self.assertEqual(len(report["errors"]), 2) # Strip out the line with Comment[Created With Tool] to avoid changes in version number generated by mzml2isa with open(os.path.join(self._tmp_dir, "i_Investigation.txt")) as in_fp, StringIO() as stripped_actual_file: @@ -45,7 +45,7 @@ def test_mzml2isa_convert_study_table(self): os.path.join(self._mzml_data_dir, study_id + "-partial"), self._tmp_dir, study_id, validate_output=True ) self.assertEqual(len(report["warnings"]), 8) - self.assertEqual(len(report["errors"]), 3) + self.assertEqual(len(report["errors"]), 2) with open(os.path.join(self._tmp_dir, "s_{}.txt".format(study_id))) as out_fp: with open( os.path.join(self._tab_data_dir, study_id + "-partial", "s_{}.txt".format(study_id)) @@ -59,7 +59,7 @@ def test_mzml2isa_convert_assay_table(self): ) self.assertTrue(report["validation_finished"]) self.assertEqual(len(report["warnings"]), 8) - self.assertEqual(len(report["errors"]), 3) + self.assertEqual(len(report["errors"]), 2) with open( os.path.join(self._tmp_dir, "a_{}_metabolite_profiling_mass_spectrometry.txt".format(study_id)) ) as out_fp: diff --git a/tests/isatab/validate/test_core.py b/tests/isatab/validate/test_core.py index 45cd333ca..6f3f796b1 100644 --- a/tests/isatab/validate/test_core.py +++ b/tests/isatab/validate/test_core.py @@ -23,13 +23,13 @@ def test_mtbls267(self): with open(path.join(data_path, "i_Investigation.txt"), "r") as data_file: r = validate(fp=data_file, config_dir=self.default_conf, origin="mzml2isa") print(r["warnings"]) - self.assertEqual(len(r["errors"]), 4) + self.assertEqual(len(r["errors"]), 3) def test_mtbls_1846(self): data_path = path.join(path.dirname(path.abspath(__file__)), "..", "..", "data", "mtbls", "MTBLS1846") with open(path.join(data_path, "i_Investigation.txt"), "r") as data_file: r = validate(fp=data_file, config_dir=self.default_conf) - self.assertEqual(len(r["errors"]), 20) + self.assertEqual(len(r["errors"]), 18) def test_bii_i_1(self): data_path = path.join(path.dirname(path.abspath(__file__)), "..", "..", "data", "tab", "BII-I-1")