diff --git a/CHANGELOG.md b/CHANGELOG.md index 78b318ca..759c779a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,18 @@ +# Unreleased + +## Enhancements + +### Prerelease library schemas can now partner with prerelease standard schemas + +Loading a library schema with `withStandard` pointing to a prerelease version of the standard schema (e.g. `withStandard="8.5.0"`) would fail with a `BAD_WITH_STANDARD` error because the `withStandard` partner lookup was always restricted to released schemas, with no way to opt in to prerelease partner resolution. + +**Changes:** + +- `load_schema()` and `from_string()` in `hed_schema_io.py` now accept a `check_prerelease=False` parameter. When `True`, the `withStandard` partner schema is also searched in the prerelease cache. +- `SchemaLoader` (base class) and all subclasses (`SchemaLoaderXML`, `SchemaLoaderWiki`, `SchemaLoaderJSON`, `SchemaLoaderDF`) accept and forward `check_prerelease`. +- `check_schema_loading.py` (`hed_check_schema_loading` script and `run_loading_check()`) now automatically passes `check_prerelease=True` when loading schemas from a prerelease directory, so `test_all_prerelease_schemas` in `spec_tests` works correctly for library prereleases partnered with a prerelease standard. +- `run_loading_check()` now raises `ValueError` immediately for mutually exclusive flag combinations (`prerelease_only` + `exclude_prereleases`, or `library_filter` + `standard_only`), consistent with the existing CLI-level validation. + # Release 0.9.0 January 22, 2026 The main purpose of this release is to clean up the CLI for the hedtools and to improve the documentation in preparation for release of 1.0.0, which will be a breaking release. diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index 0557bc76..48c30d92 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -63,7 +63,7 @@ def load_schema_version(xml_version=None, xml_folder=None, check_prerelease=Fals return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder, check_prerelease=check_prerelease) -def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> "HedSchema": +def load_schema(hed_path, schema_namespace=None, schema=None, name=None, check_prerelease=False) -> "HedSchema": """Load a schema from the given file or URL path. Parameters: @@ -75,6 +75,7 @@ def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> "Hed schema (HedSchema or None): A HED schema to merge this new file into It must be a with-standard schema with the same value. name (str or None): User supplied identifier for this schema + check_prerelease (bool): If True, allow the partnered standard schema (withStandard) to be a prerelease version. Returns: HedSchema: The loaded schema. @@ -94,13 +95,15 @@ def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> "Hed file_as_string = schema_util.url_to_string(hed_path) except URLError as e: raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e - hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name) + hed_schema = from_string( + file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name, check_prerelease=check_prerelease + ) elif hed_path.lower().endswith(".xml"): - hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name) + hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name, check_prerelease=check_prerelease) elif hed_path.lower().endswith(".mediawiki"): - hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema, name=name) + hed_schema = SchemaLoaderWiki.load(hed_path, schema=schema, name=name, check_prerelease=check_prerelease) elif hed_path.lower().endswith(".json"): - hed_schema = SchemaLoaderJSON.load(hed_path, schema=schema, name=name) + hed_schema = SchemaLoaderJSON.load(hed_path, schema=schema, name=name, check_prerelease=check_prerelease) elif hed_path.lower().endswith(".tsv") or os.path.isdir(hed_path): if schema is not None: raise HedFileError( @@ -108,7 +111,7 @@ def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> "Hed "Cannot pass a schema to merge into spreadsheet loading currently.", filename=name, ) - hed_schema = SchemaLoaderDF.load_spreadsheet(filenames=hed_path, name=name) + hed_schema = SchemaLoaderDF.load_spreadsheet(filenames=hed_path, name=name, check_prerelease=check_prerelease) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path) @@ -118,7 +121,9 @@ def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> "Hed return hed_schema -def from_string(schema_string, schema_format=".xml", schema_namespace=None, schema=None, name=None) -> "HedSchema": +def from_string( + schema_string, schema_format=".xml", schema_namespace=None, schema=None, name=None, check_prerelease=False +) -> "HedSchema": """Create a schema from the given string. Parameters: @@ -129,6 +134,7 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche schema (HedSchema or None): A HED schema to merge this new file into It must be a with-standard schema with the same value. name (str or None): User supplied identifier for this schema + check_prerelease (bool): If True, allow the partnered standard schema (withStandard) to be a prerelease version. Returns: HedSchema: The loaded schema. @@ -149,11 +155,17 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche schema_string = schema_string.replace("\r\n", "\n") if schema_format.endswith(".xml"): - hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name) + hed_schema = SchemaLoaderXML.load( + schema_as_string=schema_string, schema=schema, name=name, check_prerelease=check_prerelease + ) elif schema_format.endswith(".mediawiki"): - hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name) + hed_schema = SchemaLoaderWiki.load( + schema_as_string=schema_string, schema=schema, name=name, check_prerelease=check_prerelease + ) elif schema_format.endswith(".json"): - hed_schema = SchemaLoaderJSON.load(schema_as_string=schema_string, schema=schema, name=name) + hed_schema = SchemaLoaderJSON.load( + schema_as_string=schema_string, schema=schema, name=name, check_prerelease=check_prerelease + ) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name) @@ -162,7 +174,7 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche return hed_schema -def from_dataframes(schema_data, schema_namespace=None, name=None) -> "HedSchema": +def from_dataframes(schema_data, schema_namespace=None, name=None, check_prerelease=False) -> "HedSchema": """Create a schema from the given string. Parameters: @@ -170,6 +182,7 @@ def from_dataframes(schema_data, schema_namespace=None, name=None) -> "HedSchema Should have an entry for all values of DF_SUFFIXES. schema_namespace (str, None): The name_prefix all tags in this schema will accept. name (str or None): User supplied identifier for this schema + check_prerelease (bool): If True, allow the partnered standard schema (withStandard) to be a prerelease version. Returns: HedSchema: The loaded schema. @@ -187,7 +200,9 @@ def from_dataframes(schema_data, schema_namespace=None, name=None) -> "HedSchema HedExceptions.BAD_PARAMETERS, "Empty or non dict value passed to HedSchema.from_dataframes", filename=name ) - hed_schema = SchemaLoaderDF.load_spreadsheet(schema_as_strings_or_df=schema_data, name=name) + hed_schema = SchemaLoaderDF.load_spreadsheet( + schema_as_strings_or_df=schema_data, name=name, check_prerelease=check_prerelease + ) if schema_namespace: hed_schema.set_schema_prefix(schema_namespace=schema_namespace) diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py index c9ce8d10..6e0f4131 100644 --- a/hed/schema/schema_io/base2schema.py +++ b/hed/schema/schema_io/base2schema.py @@ -20,7 +20,7 @@ class SchemaLoader(ABC): SchemaLoaderXML(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name="", check_prerelease=False): """Loads the given schema from one of the two parameters. Parameters: @@ -30,6 +30,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non It must be a with-standard schema with the same value. file_format(str or None): The format of this file if needed(only for owl currently) name(str or None): Optional user supplied identifier, by default uses filename + check_prerelease(bool): If True, allow the partnered standard schema to be a prerelease version. """ if schema_as_string and filename: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Invalid parameters to schema creation.", filename) @@ -38,6 +39,7 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non self.name = name if name else filename self.schema_as_string = schema_as_string self.appending_to_schema = False + self.check_prerelease = check_prerelease try: self.input_data = self._open_file() except OSError as e: @@ -87,7 +89,7 @@ def schema(self): return self._schema @classmethod - def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name=""): + def load(cls, filename=None, schema_as_string=None, schema=None, file_format=None, name="", check_prerelease=False): """Loads and returns the schema, including partnered schema if applicable. Parameters: @@ -98,11 +100,12 @@ def load(cls, filename=None, schema_as_string=None, schema=None, file_format=Non file_format(str or None): If this is an owl file being loaded, this is the format. Allowed values include: turtle, json-ld, and owl(xml) name(str or None): Optional user supplied identifier, by default uses filename + check_prerelease(bool): If True, allow the partnered standard schema to be a prerelease version. Returns: HedSchema: The new schema """ - loader = cls(filename, schema_as_string, schema, file_format, name) + loader = cls(filename, schema_as_string, schema, file_format, name, check_prerelease) return loader._load() def _load(self): @@ -119,7 +122,7 @@ def _load(self): saved_attr = self._schema.header_attributes saved_format = self._schema.source_format try: - base_version = load_schema_version(self._schema.with_standard) + base_version = load_schema_version(self._schema.with_standard, check_prerelease=self.check_prerelease) except HedFileError as e: raise HedFileError( HedExceptions.BAD_WITH_STANDARD, diff --git a/hed/schema/schema_io/df2schema.py b/hed/schema/schema_io/df2schema.py index 15adf115..5e0d6c35 100644 --- a/hed/schema/schema_io/df2schema.py +++ b/hed/schema/schema_io/df2schema.py @@ -22,18 +22,18 @@ class SchemaLoaderDF(SchemaLoader): Note: due to supporting multiple files, this one differs from the other schema loaders """ - def __init__(self, filenames, schema_as_strings_or_df, name=""): + def __init__(self, filenames, schema_as_strings_or_df, name="", check_prerelease=False): self.filenames = df_util.convert_filenames_to_dict(filenames) self.schema_as_strings_or_df = schema_as_strings_or_df if self.filenames: reported_filename = self.filenames.get(constants.STRUCT_KEY) else: reported_filename = "from_strings" - super().__init__(reported_filename, None, None, None, name) + super().__init__(reported_filename, None, None, None, name, check_prerelease) self._schema.source_format = "spreadsheet" @classmethod - def load_spreadsheet(cls, filenames=None, schema_as_strings_or_df=None, name=""): + def load_spreadsheet(cls, filenames=None, schema_as_strings_or_df=None, name="", check_prerelease=False): """Loads and returns the schema, including partnered schema if applicable. Parameters: @@ -41,11 +41,12 @@ def load_spreadsheet(cls, filenames=None, schema_as_strings_or_df=None, name="") If a single filename string, assumes the standard filename suffixes. schema_as_strings_or_df(None or dict of str): A valid set of schema spreadsheet files(tsv as strings) name (str): what to identify this schema as. + check_prerelease(bool): If True, allow the partnered standard schema to be a prerelease version. Returns: HedSchema: The new schema """ - loader = cls(filenames, schema_as_strings_or_df=schema_as_strings_or_df, name=name) + loader = cls(filenames, schema_as_strings_or_df=schema_as_strings_or_df, name=name, check_prerelease=check_prerelease) hed_schema = loader._load() return hed_schema diff --git a/hed/schema/schema_io/json2schema.py b/hed/schema/schema_io/json2schema.py index 8a2839b1..5fbbf73f 100644 --- a/hed/schema/schema_io/json2schema.py +++ b/hed/schema/schema_io/json2schema.py @@ -17,7 +17,7 @@ class SchemaLoaderJSON(SchemaLoader): SchemaLoaderJSON(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name="", check_prerelease=False): """Initialize the JSON schema loader. Parameters: @@ -26,8 +26,9 @@ def __init__(self, filename, schema_as_string=None, schema=None, file_format=Non schema (HedSchema or None): A HED schema to merge this new file into file_format (str or None): Not used for JSON name (str or None): Optional user supplied identifier, by default uses filename + check_prerelease (bool): If True, allow the partnered standard schema to be a prerelease version. """ - super().__init__(filename, schema_as_string, schema, file_format, name) + super().__init__(filename, schema_as_string, schema, file_format, name, check_prerelease) self._json_data = None self._schema.source_format = ".json" diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index 92b20f62..442bbf82 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -45,8 +45,18 @@ class SchemaLoaderWiki(SchemaLoader): SchemaLoaderWiki(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): - super().__init__(filename, schema_as_string, schema, file_format, name) + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name="", check_prerelease=False): + """Initialize the MediaWiki schema loader. + + Parameters: + filename (str or None): A valid filepath or None + schema_as_string (str or None): A full schema as text or None + schema (HedSchema or None): A HED schema to merge this new file into + file_format (str or None): Not used for MediaWiki + name (str or None): Optional user supplied identifier, by default uses filename + check_prerelease (bool): If True, allow the partnered standard schema to be a prerelease version. + """ + super().__init__(filename, schema_as_string, schema, file_format, name, check_prerelease) self._schema.source_format = ".mediawiki" def _open_file(self): diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py index 5a3b84a4..fe0eac4e 100644 --- a/hed/schema/schema_io/xml2schema.py +++ b/hed/schema/schema_io/xml2schema.py @@ -21,8 +21,18 @@ class SchemaLoaderXML(SchemaLoader): SchemaLoaderXML(filename) will load just the header_attributes """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): - super().__init__(filename, schema_as_string, schema, file_format, name) + def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name="", check_prerelease=False): + """Initialize the XML schema loader. + + Parameters: + filename (str or None): A valid filepath or None + schema_as_string (str or None): A full schema as text or None + schema (HedSchema or None): A HED schema to merge this new file into + file_format (str or None): Not used for XML + name (str or None): Optional user supplied identifier, by default uses filename + check_prerelease (bool): If True, allow the partnered standard schema to be a prerelease version. + """ + super().__init__(filename, schema_as_string, schema, file_format, name, check_prerelease) self._root_element = None self._parent_map = {} self._schema.source_format = ".xml" diff --git a/hed/scripts/check_schema_loading.py b/hed/scripts/check_schema_loading.py index 576cdac9..3765f073 100644 --- a/hed/scripts/check_schema_loading.py +++ b/hed/scripts/check_schema_loading.py @@ -131,18 +131,19 @@ def get_schema_files(self, root_dir, format_dir, prerelease=False): return sorted(schema_files) - def try_load_schema(self, schema_path, relative_path): + def try_load_schema(self, schema_path, relative_path, check_prerelease=False): """Try to load a single schema file. Parameters: schema_path (Path): Path to schema file/directory. relative_path (Path): Relative path for display purposes. + check_prerelease (bool): If True, allow the partnered standard schema to be a prerelease version. Returns: tuple: (success: bool, error_message: str or None) """ try: - schema = load_schema(str(schema_path)) + schema = load_schema(str(schema_path), check_prerelease=check_prerelease) if schema is None: return False, "Schema loaded as None" @@ -156,7 +157,7 @@ def try_load_schema(self, schema_path, relative_path): error_msg = f"{type(e).__name__}: {str(e)}" return False, error_msg - def _test_format_group(self, root_dir, format_name, prerelease=False, indent=""): + def _test_format_group(self, root_dir, format_name, prerelease=False, indent="", schema_files=None): """Test loading schemas for a single format in a directory. Parameters: @@ -164,12 +165,14 @@ def _test_format_group(self, root_dir, format_name, prerelease=False, indent="") format_name (str): Format to test (xml, mediawiki, json, tsv). prerelease (bool): If True, look in prerelease/ subdirectory. indent (str): Indentation prefix for output. + schema_files (list or None): Pre-fetched list of schema Paths. If None, fetched automatically. Returns: bool: True if any schemas were found and tested. """ - format_dir = FORMAT_DIR_MAP.get(format_name.lower(), format_name) - schema_files = self.get_schema_files(root_dir, format_dir, prerelease=prerelease) + if schema_files is None: + format_dir = FORMAT_DIR_MAP.get(format_name.lower(), format_name) + schema_files = self.get_schema_files(root_dir, format_dir, prerelease=prerelease) if not schema_files: return False @@ -181,7 +184,7 @@ def _test_format_group(self, root_dir, format_name, prerelease=False, indent="") relative_path = schema_path.relative_to(self.hed_schemas_root) self.results["total"] += 1 - success, error = self.try_load_schema(schema_path, relative_path) + success, error = self.try_load_schema(schema_path, relative_path, check_prerelease=prerelease) if success: self.results["passed"] += 1 @@ -299,7 +302,7 @@ def test_library_prereleases(self, format_filter=None, library_filter=None): library_has_schemas = True found_any = True - self._test_format_group(library_dir, format_name, prerelease=True, indent=" ") + self._test_format_group(library_dir, format_name, prerelease=True, indent=" ", schema_files=schema_files) if not found_any: print("[INFO] No prerelease schemas found") @@ -377,7 +380,16 @@ def run_loading_check( Returns: dict: Results dictionary with keys 'total', 'passed', 'failed', and 'failures' (list of dicts with 'path' and 'error'). + + Raises: + ValueError: If mutually exclusive flags are combined (e.g., --exclude-prereleases and + --prerelease-only, or --library and --standard-only). """ + if prerelease_only and exclude_prereleases: + raise ValueError("--exclude-prereleases and --prerelease-only are mutually exclusive") + if library_filter and standard_only: + raise ValueError("--library and --standard-only are mutually exclusive") + tester = SchemaLoadTester(hed_schemas_root, verbose=verbose) print("\n" + "=" * 80) diff --git a/hed/scripts/schema_script_util.py b/hed/scripts/schema_script_util.py index 1f2a6f25..45027ca8 100644 --- a/hed/scripts/schema_script_util.py +++ b/hed/scripts/schema_script_util.py @@ -1,6 +1,7 @@ import os.path from collections import defaultdict from hed.schema import from_string, load_schema, from_dataframes +from hed.schema import hed_cache from hed.errors import get_printable_issue_string, HedFileError from hed.errors.error_types import ErrorSeverity from hed.schema.schema_comparer import SchemaComparer @@ -8,6 +9,29 @@ all_extensions = [".tsv", ".mediawiki", ".xml", ".json"] +def _is_prerelease_partner(base_schema) -> bool: + """Return True if base_schema's withStandard partner is only resolvable from the prerelease cache. + + When a library schema serialised with ``save_merged=False`` is reloaded, the loader + re-fetches the standard schema named in the ``withStandard`` header attribute. If + that version lives only in the prerelease subdirectory of the cache, the reload will + fail unless ``check_prerelease=True`` is forwarded. This helper detects that + condition by asking the cache whether the version is found without the prerelease + flag (not found → prerelease required). + + Parameters: + base_schema (HedSchema): The schema to inspect. + + Returns: + bool: True if ``withStandard`` is set and the version is absent from the + regular (non-prerelease) cache directory. + """ + with_standard = base_schema.with_standard + if not with_standard: + return False + return hed_cache.get_hed_version_path(with_standard, check_prerelease=False) is None + + def validate_schema_object(base_schema, schema_name): """Validate a schema object by checking compliance and roundtrip conversion. @@ -30,25 +54,16 @@ def validate_schema_object(base_schema, schema_name): validation_issues.append(error_message) return validation_issues - mediawiki_string = base_schema.get_as_mediawiki_string(save_merged=True) - reloaded_schema = from_string(mediawiki_string, schema_format=".mediawiki") - - validation_issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "mediawiki") - - xml_string = base_schema.get_as_xml_string(save_merged=True) - reloaded_schema = from_string(xml_string, schema_format=".xml") - - validation_issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "xml") - - json_string = base_schema.get_as_json_string(save_merged=True) - reloaded_schema = from_string(json_string, schema_format=".json") - - validation_issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "json") - - tsv_dataframes = base_schema.get_as_dataframes(save_merged=True) - reloaded_schema = from_dataframes(tsv_dataframes) + # If the withStandard partner only exists in the prerelease cache, all unmerged + # reloads must pass check_prerelease=True or they will fail partner resolution. + check_prerelease = _is_prerelease_partner(base_schema) - validation_issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "tsv") + for save_merged in (True, False): + label = "merged" if save_merged else "unmerged" + tagged_name = f"{schema_name} ({label})" + validation_issues += _roundtrip_all_formats( + base_schema, tagged_name, save_merged=save_merged, check_prerelease=check_prerelease + ) except HedFileError as e: print(f"Saving/loading error: {schema_name} {e.message}") error_text = e.message @@ -306,6 +321,48 @@ def get_prerelease_path(repo_path, schema_name, schema_version): return os.path.join(base_path, "hedtsv", schema_filename) +def _roundtrip_all_formats(base_schema, schema_name, save_merged=True, check_prerelease=False): + """Roundtrip a schema through all four formats and compare to the original. + + Serializes the schema to mediawiki, XML, JSON, and TSV, reloads each, and + verifies the reloaded schema matches the original. + + Parameters: + base_schema (HedSchema): The schema object to roundtrip. + schema_name (str): Label for error reporting (should include merge context). + save_merged (bool): If True, save the merged (with-standard) form. + If False, save only the library-specific content. + check_prerelease (bool): If True, pass check_prerelease=True to all reload + calls. Required when the schema's withStandard partner exists only in + the prerelease cache directory; otherwise unmerged reloads will fail + partner resolution. Has no effect when save_merged=True because the + merged serialisation embeds the full standard content and no partner + lookup is performed on reload. + + Returns: + list: A list of validation issue strings. Empty if no issues found. + """ + issues = [] + + mediawiki_string = base_schema.get_as_mediawiki_string(save_merged=save_merged) + reloaded_schema = from_string(mediawiki_string, schema_format=".mediawiki", check_prerelease=check_prerelease) + issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "mediawiki") + + xml_string = base_schema.get_as_xml_string(save_merged=save_merged) + reloaded_schema = from_string(xml_string, schema_format=".xml", check_prerelease=check_prerelease) + issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "xml") + + json_string = base_schema.get_as_json_string(save_merged=save_merged) + reloaded_schema = from_string(json_string, schema_format=".json", check_prerelease=check_prerelease) + issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "json") + + tsv_dataframes = base_schema.get_as_dataframes(save_merged=save_merged) + reloaded_schema = from_dataframes(tsv_dataframes, check_prerelease=check_prerelease) + issues += _get_schema_comparison(base_schema, reloaded_schema, schema_name, "tsv") + + return issues + + def _get_schema_comparison(schema, schema_reload, file_path, file_format): """Compare two schema objects and generate error message if they differ. diff --git a/spec_tests/test_loading_schemas.py b/spec_tests/test_loading_schemas.py index cf7d9716..c5fd4009 100644 --- a/spec_tests/test_loading_schemas.py +++ b/spec_tests/test_loading_schemas.py @@ -38,5 +38,19 @@ def test_all_prerelease_schemas(self): # Prereleases may or may not exist — no assertGreater here +class TestRunLoadingCheckFlags(unittest.TestCase): + """Test that run_loading_check rejects mutually exclusive flag combinations.""" + + def test_prerelease_only_and_exclude_prereleases_raises(self): + """prerelease_only and exclude_prereleases together should raise ValueError.""" + with self.assertRaises(ValueError): + run_loading_check(HED_SCHEMAS_DIR, prerelease_only=True, exclude_prereleases=True) + + def test_library_filter_and_standard_only_raises(self): + """library_filter and standard_only together should raise ValueError.""" + with self.assertRaises(ValueError): + run_loading_check(HED_SCHEMAS_DIR, library_filter="score", standard_only=True) + + if __name__ == "__main__": unittest.main() diff --git a/tests/data/schema_tests/prerelease/HED9.9.9.mediawiki b/tests/data/schema_tests/prerelease/HED9.9.9.mediawiki new file mode 100644 index 00000000..4f7ad4da --- /dev/null +++ b/tests/data/schema_tests/prerelease/HED9.9.9.mediawiki @@ -0,0 +1,27 @@ +HED version="9.9.9" + +'''Prologue''' +Minimal prerelease-only standard schema for unit testing withStandard partner +resolution via load_schema/from_string. + +!# start schema + +'''Event''' [Something that happens at a given time and place.] + +'''Property''' [A characteristic of something.] + +!# end schema + +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + +'''Epilogue''' + +!# end hed diff --git a/tests/data/schema_tests/prerelease/HED_testpre_1.0.0.mediawiki b/tests/data/schema_tests/prerelease/HED_testpre_1.0.0.mediawiki new file mode 100644 index 00000000..c2d1ca1d --- /dev/null +++ b/tests/data/schema_tests/prerelease/HED_testpre_1.0.0.mediawiki @@ -0,0 +1,25 @@ +HED library="testpre" version="1.0.0" withStandard="9.9.9" unmerged="True" + +'''Prologue''' +Library schema whose withStandard partner (9.9.9) exists only in the prerelease +directory. Used to test check_prerelease propagation through load_schema/from_string. + +!# start schema + +'''Prerelease-partner-only-item''' [A test item in the testpre library schema.] + +!# end schema + +'''Unit classes''' + +'''Unit modifiers''' + +'''Value classes''' + +'''Schema attributes''' + +'''Properties''' + +'''Epilogue''' + +!# end hed diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index 5e79646c..4b678212 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -1,12 +1,14 @@ import unittest +from unittest.mock import patch from hed.errors import HedFileError from hed.errors.error_types import SchemaErrors from hed.schema import load_schema, HedSchemaGroup, load_schema_version, HedSchema -from hed.schema.hed_schema_io import parse_version_list, _load_schema_version +from hed.schema.hed_schema_io import parse_version_list, _load_schema_version, from_string from tests.schema.schema_test_helpers import with_temp_file, get_temp_filename import os +import tempfile from hed.errors import HedExceptions from hed.schema import HedKey from hed.schema import hed_cache @@ -672,3 +674,199 @@ def test_mixed_regular_and_prerelease_schemas(self): self.assertEqual(len(schemas._schemas), 2, "Should have two schemas") self.assertIn("base:", schemas._schemas, "Should have base namespace") self.assertIn("test:", schemas._schemas, "Should have test namespace") + + +class TestLoadSchemaWithPrereleasePartner(unittest.TestCase): + """Test that check_prerelease propagates correctly through load_schema() and from_string() + all the way into withStandard partner resolution. + + Background + ---------- + A library schema with ``withStandard="X.Y.Z"`` and ``unmerged="True"`` is a *partnered* + schema: when it is loaded, the loader (base2schema.SchemaLoader._load) automatically calls + ``load_schema_version("X.Y.Z")`` to fetch the standard schema and merges the library's tags + on top of it. If version X.Y.Z only exists in the *prerelease* subdirectory of the cache + (not in the regular cache root), that ``load_schema_version`` call will fail with + ``BAD_WITH_STANDARD`` unless ``check_prerelease=True`` is forwarded along the entire call + chain: + + load_schema / from_string + → SchemaLoaderWiki / SchemaLoaderXML (check_prerelease stored on loader) + → SchemaLoader._load + → load_schema_version(with_standard, check_prerelease=self.check_prerelease) + → hed_cache.get_hed_version_path(..., check_prerelease=...) + → looks in {cache}/prerelease/ when True + + These tests exercise both the success path (flag=True → schema loads and is merged correctly) + and the default/False failure path (flag omitted or False → BAD_WITH_STANDARD is raised + before any merge happens). + + Fixture design + -------------- + Source files are kept as human-editable MediaWiki so they are easy to update: + + tests/data/schema_tests/prerelease/HED9.9.9.mediawiki + A minimal standard schema at version 9.9.9. Version 9.9.9 is deliberately + chosen to be impossible to appear in any real HED release, so this fixture + can never collide with a legitimate cached schema. It exists *only* in the + prerelease subdirectory — there is no HED9.9.9.xml in the regular cache root + — which is exactly the condition under test. + + tests/data/schema_tests/prerelease/HED_testpre_1.0.0.mediawiki + A minimal library schema with ``library="testpre"``, ``version="1.0.0"``, + ``withStandard="9.9.9"``, and ``unmerged="True"``. Kept in the same + prerelease/ directory as HED9.9.9.mediawiki to make the association clear. + It declares one tag (Prerelease-partner-only-item) so the merged result + can be asserted. + + Cache isolation + --------------- + The real user cache lives at ``~/.hedtools/hed_cache/`` and is controlled by + ``hed_cache.HED_CACHE_DIRECTORY``. To prevent any interaction with it: + + 1. ``setUpClass`` loads HED9.9.9.mediawiki, converts it to an XML string via + ``get_as_xml_string()`` (XML is the only format the cache scanner recognises), + and writes ``{tmpdir}/prerelease/HED9.9.9.xml`` into a fresh + ``tempfile.TemporaryDirectory``. The ``tmpdir`` root has no HED9.9.9.xml, + only the ``prerelease/`` subdirectory does, which is precisely the layout that + requires ``check_prerelease=True`` to succeed. + + 2. Each test patches ``hed_cache.HED_CACHE_DIRECTORY`` to ``_cache_dir`` (the + temp dir root) for the duration of that test only. Outside the ``with`` + block the real constant is restored automatically by ``patch.object``. + + 3. ``_load_schema_version`` is an LRU-cached function. The cache is cleared in + ``setUp`` and ``tearDown`` so no patched-path entries can leak into subsequent + tests (or be inherited from earlier ones). + + 4. ``tearDownClass`` clears the LRU cache one final time and deletes the temp dir. + """ + + @classmethod + def setUpClass(cls): + """Build the synthetic cache directory used by all tests in this class. + + Loads HED9.9.9.mediawiki (the human-editable source), converts it to XML, + and writes it into {tmpdir}/prerelease/HED9.9.9.xml so that the cache + scanner can find it only when check_prerelease=True. + """ + fixture_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/schema_tests") + + # Path to the library schema fixture loaded directly by filepath in each test. + # Kept in prerelease/ alongside HED9.9.9.mediawiki to make the association clear. + cls.lib_schema_path = os.path.join(fixture_dir, "prerelease", "HED_testpre_1.0.0.mediawiki") + + # Convert the mediawiki standard schema to XML — the cache scanner only + # recognises .xml files, so this conversion is required even though the + # human-editable source is mediawiki. + standard_wiki_path = os.path.join(fixture_dir, "prerelease", "HED9.9.9.mediawiki") + standard_schema = load_schema(standard_wiki_path) + xml_string = standard_schema.get_as_xml_string() + + # Create an isolated temporary directory that serves as a fake cache root. + # Structure written: + # {tmpdir}/ ← patched as HED_CACHE_DIRECTORY + # prerelease/ + # HED9.9.9.xml ← only found when check_prerelease=True + # Nothing is placed in {tmpdir}/ directly, so a lookup without + # check_prerelease finds no 9.9.9 and raises BAD_WITH_STANDARD. + cls._tmpdir = tempfile.TemporaryDirectory() + prerelease_dir = os.path.join(cls._tmpdir.name, "prerelease") + os.makedirs(prerelease_dir) + with open(os.path.join(prerelease_dir, "HED9.9.9.xml"), "w") as f: + f.write(xml_string) + + cls._cache_dir = cls._tmpdir.name + + @classmethod + def tearDownClass(cls): + """Remove the temp dir and ensure no patched cache entries remain.""" + _load_schema_version.cache_clear() + cls._tmpdir.cleanup() + + def setUp(self): + """Clear the LRU cache before each test so no result from a previous test + (which may have used a patched HED_CACHE_DIRECTORY) is reused.""" + _load_schema_version.cache_clear() + + def tearDown(self): + """Clear the LRU cache after each test so patched-path entries cannot + bleed into tests that run after this class.""" + _load_schema_version.cache_clear() + + # ------------------------------------------------------------------ + # load_schema() tests + # ------------------------------------------------------------------ + + def test_load_schema_prerelease_partner_with_flag(self): + """load_schema(..., check_prerelease=True) successfully resolves a withStandard + partner that exists only in the prerelease subdirectory. + + Verifies that check_prerelease=True is forwarded from load_schema all the way + through to hed_cache.get_hed_version_path, allowing the prerelease standard + schema (9.9.9) to be found, merged, and the resulting HedSchema to contain + both the standard tags and the library's own tag. + """ + with patch.object(hed_cache, "HED_CACHE_DIRECTORY", self._cache_dir): + result = load_schema(self.lib_schema_path, check_prerelease=True) + self.assertIsInstance(result, HedSchema) + self.assertEqual(result.library, "testpre") + # The library-specific tag must be present in the merged schema. + self.assertIn("prerelease-partner-only-item", result.tags.all_names) + + def test_load_schema_prerelease_partner_default_raises(self): + """load_schema(...) with the default check_prerelease=False raises BAD_WITH_STANDARD + when the withStandard partner exists only in the prerelease subdirectory. + + This confirms the default is safe: users must explicitly opt in to prerelease + partner resolution; it does not happen silently. + """ + with patch.object(hed_cache, "HED_CACHE_DIRECTORY", self._cache_dir): + with self.assertRaises(HedFileError) as ctx: + load_schema(self.lib_schema_path) + self.assertEqual(ctx.exception.code, HedExceptions.BAD_WITH_STANDARD) + + def test_load_schema_prerelease_partner_explicit_false_raises(self): + """load_schema(..., check_prerelease=False) raises BAD_WITH_STANDARD when the + withStandard partner exists only in the prerelease subdirectory. + + Mirrors the default test but with the flag set explicitly, confirming that + passing False has the same effect as omitting it. + """ + with patch.object(hed_cache, "HED_CACHE_DIRECTORY", self._cache_dir): + with self.assertRaises(HedFileError) as ctx: + load_schema(self.lib_schema_path, check_prerelease=False) + self.assertEqual(ctx.exception.code, HedExceptions.BAD_WITH_STANDARD) + + # ------------------------------------------------------------------ + # from_string() tests + # ------------------------------------------------------------------ + + def test_from_string_prerelease_partner_with_flag(self): + """from_string(..., check_prerelease=True) successfully resolves a withStandard + partner that exists only in the prerelease subdirectory. + + Reads the library schema as a string (simulating receipt from a URL or + in-memory source) and confirms that check_prerelease=True propagates through + from_string → SchemaLoaderWiki → SchemaLoader._load → load_schema_version. + """ + with open(self.lib_schema_path) as f: + schema_str = f.read() + with patch.object(hed_cache, "HED_CACHE_DIRECTORY", self._cache_dir): + result = from_string(schema_str, schema_format=".mediawiki", check_prerelease=True) + self.assertIsInstance(result, HedSchema) + self.assertEqual(result.library, "testpre") + + def test_from_string_prerelease_partner_default_raises(self): + """from_string(...) with default check_prerelease=False raises BAD_WITH_STANDARD + when the withStandard partner exists only in the prerelease subdirectory. + + Confirms the same safe default behaviour as load_schema when the schema + content is supplied as a string rather than a filepath. + """ + with open(self.lib_schema_path) as f: + schema_str = f.read() + with patch.object(hed_cache, "HED_CACHE_DIRECTORY", self._cache_dir): + with self.assertRaises(HedFileError) as ctx: + from_string(schema_str, schema_format=".mediawiki") + self.assertEqual(ctx.exception.code, HedExceptions.BAD_WITH_STANDARD)