Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@
Python HEDTools
===============

Welcome to the Python HEDTools documentation! This package provides comprehensive tools for working with **Hierarchical Event Descriptors (HED)** - a standardized framework for annotating events and experimental metadata in neuroscience and beyond.

What is HED?
------------

HED is a standardized vocabulary and annotation framework designed to systematically describe events experimental data, particularly neuroimaging and behavioral data. It's integrated into major neuroimaging standards:
Welcome to the Python HEDTools documentation!
This package provides comprehensive tools for working with
**Hierarchical Event Descriptors (HED)** - a standardized framework
for annotating events and experimental metadata in neuroscience and beyond.
HED is integrated into major neuroimaging standards:

* `BIDS <https://bids.neuroimaging.io/>`_ (Brain Imaging Data Structure)
* `NWB <https://www.nwb.org/>`_ (Neurodata Without Borders)

Key features
------------
and this package enables you to validate, analyze, and manipulate HED annotations in various formats.

Python HEDTools features
------------------------

* **Validation**: Verify HED annotations against official schemas
* **Analysis**: Search, filter, and summarize HED-annotated data
Expand Down
11 changes: 10 additions & 1 deletion hed/schema/hed_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,16 @@ def get_hed_version_path(xml_version, library_name=None, local_hed_directory=Non
if not hed_versions or not xml_version:
return None
if xml_version in hed_versions:
return _create_xml_filename(xml_version, library_name, local_hed_directory, check_prerelease)
# Check regular directory first
regular_path = _create_xml_filename(xml_version, library_name, local_hed_directory, False)
if os.path.exists(regular_path):
return regular_path

# If check_prerelease is True, also check prerelease directory
if check_prerelease:
prerelease_path = _create_xml_filename(xml_version, library_name, local_hed_directory, True)
if os.path.exists(prerelease_path):
return prerelease_path
return None


Expand Down
30 changes: 21 additions & 9 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
MAX_MEMORY_CACHE = 40


def load_schema_version(xml_version=None, xml_folder=None) -> Union["HedSchema", "HedSchemaGroup"]:
def load_schema_version(xml_version=None, xml_folder=None, check_prerelease=False) -> Union["HedSchema", "HedSchemaGroup"]:
"""Return a HedSchema or HedSchemaGroup extracted from xml_version

Parameters:
Expand All @@ -31,6 +31,7 @@ def load_schema_version(xml_version=None, xml_folder=None) -> Union["HedSchema",
based on the output of HedSchema.get_formatted_version
Basic format: `[schema_namespace:][library_name_]X.Y.Z`.
xml_folder (str): Path to a folder containing schema.
check_prerelease (bool): If True, check the prerelease directory for schemas.

Returns:
Union[HedSchema, HedSchemaGroup]: The schema or schema group extracted.
Expand All @@ -49,14 +50,17 @@ def load_schema_version(xml_version=None, xml_folder=None) -> Union["HedSchema",
raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e
if xml_version and isinstance(xml_version, list):
xml_versions = parse_version_list(xml_version)
schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_versions.values()]
schemas = [
_load_schema_version(xml_version=version, xml_folder=xml_folder, check_prerelease=check_prerelease)
for version in xml_versions.values()
]
if len(schemas) == 1:
return schemas[0]

name = ",".join([schema.version for schema in schemas])
return HedSchemaGroup(schemas, name=name)
else:
return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder)
return _load_schema_version(xml_version=xml_version, xml_folder=xml_folder, check_prerelease=check_prerelease)


def load_schema(hed_path, schema_namespace=None, schema=None, name=None) -> "HedSchema":
Expand Down Expand Up @@ -246,7 +250,7 @@ def parse_version_list(xml_version_list) -> dict:


@functools.lru_cache(maxsize=MAX_MEMORY_CACHE)
def _load_schema_version(xml_version=None, xml_folder=None):
def _load_schema_version(xml_version=None, xml_folder=None, check_prerelease=False):
"""Return specified version

Parameters:
Expand All @@ -256,6 +260,7 @@ def _load_schema_version(xml_version=None, xml_folder=None):
The schema namespace must be the same and not repeated if loading multiple merged schemas.

xml_folder (str): Path to a folder containing schema.
check_prerelease (bool): If True, check the prerelease directory for schemas.

Returns:
Union[HedSchema, HedSchemaGroup]: The requested HedSchema object.
Expand All @@ -279,14 +284,18 @@ def _load_schema_version(xml_version=None, xml_folder=None):
else:
xml_versions = [""]

first_schema = _load_schema_version_sub(xml_versions[0], schema_namespace, xml_folder=xml_folder, name=name)
first_schema = _load_schema_version_sub(
xml_versions[0], schema_namespace, xml_folder=xml_folder, check_prerelease=check_prerelease, name=name
)
filenames = [os.path.basename(first_schema.filename)]

# Collect all duplicate issues for proper error reporting
all_duplicate_issues = []

for version in xml_versions[1:]:
_load_schema_version_sub(version, schema_namespace, xml_folder=xml_folder, schema=first_schema, name=name)
_load_schema_version_sub(
version, schema_namespace, xml_folder=xml_folder, check_prerelease=check_prerelease, schema=first_schema, name=name
)

# Collect duplicate errors when merging schemas in the same namespace
current_filename = os.path.basename(first_schema.filename)
Expand Down Expand Up @@ -319,13 +328,14 @@ def _load_schema_version(xml_version=None, xml_folder=None):
return first_schema


def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None, schema=None, name=""):
def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None, check_prerelease=False, schema=None, name=""):
"""Return specified version(single version only for this one)

Parameters:
xml_version (str): HED version format string. Expected format: '[library_name_]X.Y.Z'
schema_namespace (str): The prefix this will have
xml_folder (str): Path to a folder containing schema
check_prerelease (bool): If True, check the prerelease directory for schemas
schema (HedSchema or None): A HED schema to merge this new file into.
name (str): User supplied identifier for this schema

Expand Down Expand Up @@ -358,14 +368,16 @@ def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None,
)

hed_file_path = hed_cache.get_hed_version_path(
version_to_validate, library_name=library_name, local_hed_directory=xml_folder
version_to_validate, library_name=library_name, local_hed_directory=xml_folder, check_prerelease=check_prerelease
)

if hed_file_path:
hed_schema = load_schema(hed_file_path, schema_namespace=schema_namespace, schema=schema, name=name)
else:
library_string = f"for library '{library_name}'" if library_name else ""
known_versions = hed_cache.get_hed_versions(xml_folder, library_name=library_name if library_name else "all")
known_versions = hed_cache.get_hed_versions(
xml_folder, library_name=library_name if library_name else "all", check_prerelease=check_prerelease
)
raise HedFileError(
HedExceptions.FILE_NOT_FOUND,
f"HED version {library_string}: '{version_to_validate}' not found. Check {hed_cache.get_cache_directory(xml_folder)} for cache or https://github.com/hed-standard/hed-schemas/tree/main/library_schemas. "
Expand Down
19 changes: 19 additions & 0 deletions tests/data/schema_tests/prerelease/HED8.3.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" ?>
<HED version="8.3.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd">
<prologue>Test prerelease schema for unit testing prerelease functionality.</prologue>
<schema>
<node>
<name>Event</name>
<description>Something that happens at a given time and place.</description>
</node>
<node>
<name>Property</name>
<description>A characteristic of something.</description>
</node>
</schema>
<unitClassDefinitions/>
<unitModifierDefinitions/>
<valueClassDefinitions/>
<schemaAttributeDefinitions/>
<propertyDefinitions/>
</HED>
15 changes: 15 additions & 0 deletions tests/data/schema_tests/prerelease/HED_testlib_2.1.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" ?>
<HED library="testlib" version="2.1.0" withStandard="8.2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd">
<prologue>Test prerelease library schema for testing mixed regular and prerelease loading.</prologue>
<schema>
<node>
<name>Prerelease-item</name>
<description>A test item only in prerelease version.</description>
</node>
</schema>
<unitClassDefinitions/>
<unitModifierDefinitions/>
<valueClassDefinitions/>
<schemaAttributeDefinitions/>
<propertyDefinitions/>
</HED>
103 changes: 103 additions & 0 deletions tests/schema/test_hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,3 +569,106 @@ def test_triple_prefixes(self):
parse_version_list(["test:score", "ol:otherlib", "test:testlib", "abc:anotherlib"]),
{"test": "test:score,testlib", "ol": "ol:otherlib", "abc": "abc:anotherlib"},
)


class TestPrereleaseParameter(unittest.TestCase):
"""Test the check_prerelease parameter functionality."""

@classmethod
def setUpClass(cls):
"""Set up test fixtures."""
cls.schema_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data/schema_tests/")

def test_check_prerelease_parameter_exists(self):
"""Test that check_prerelease parameter is accepted by load_schema_version."""
# This should not raise an error about unexpected keyword argument
try:
# Try to load a nonexistent version with check_prerelease parameter
load_schema_version("99.99.99", xml_folder=self.schema_dir, check_prerelease=True)
except HedFileError:
# Expected - version doesn't exist, but parameter was accepted
pass
except TypeError as e:
self.fail(f"check_prerelease parameter not accepted: {e}")

def test_check_prerelease_default_false(self):
"""Test that check_prerelease defaults to False for backward compatibility."""
# Load a regular schema without the parameter (should work)
schema = load_schema_version("8.2.0", xml_folder=self.schema_dir)
self.assertIsInstance(schema, HedSchema, "Should load regular schema without check_prerelease")
self.assertEqual(schema.version_number, "8.2.0", "Should have correct version")

def test_check_prerelease_false_explicit(self):
"""Test that check_prerelease=False works explicitly."""
# Load a regular schema with check_prerelease explicitly set to False
schema = load_schema_version("8.2.0", xml_folder=self.schema_dir, check_prerelease=False)
self.assertIsInstance(schema, HedSchema, "Regular schema should load with check_prerelease=False")
self.assertEqual(schema.version_number, "8.2.0", "Should have correct version")

def test_check_prerelease_with_namespace(self):
"""Test that check_prerelease parameter works with namespace."""
# Load regular schema with namespace and check_prerelease=False
schema = load_schema_version("test:8.2.0", xml_folder=self.schema_dir, check_prerelease=False)
self.assertIsInstance(schema, HedSchema, "Should load with namespace")
self.assertEqual(schema._namespace, "test:", "Should have correct namespace")
self.assertEqual(schema.version_number, "8.2.0", "Should have correct version")

def test_nonexistent_version_error_message(self):
"""Test that error messages are consistent with/without check_prerelease."""
# Both should give similar error messages for nonexistent versions
with self.assertRaises(HedFileError) as context1:
load_schema_version("99.99.99", xml_folder=self.schema_dir, check_prerelease=False)

with self.assertRaises(HedFileError) as context2:
load_schema_version("99.99.99", xml_folder=self.schema_dir, check_prerelease=True)

# Both should mention "not found"
self.assertIn("not found", str(context1.exception).lower())
self.assertIn("not found", str(context2.exception).lower())

def test_check_prerelease_parameter_in_signature(self):
"""Test that check_prerelease is properly defined in function signature."""
import inspect

sig = inspect.signature(load_schema_version)
self.assertIn("check_prerelease", sig.parameters, "check_prerelease should be in function signature")
self.assertEqual(sig.parameters["check_prerelease"].default, False, "check_prerelease should default to False")

def test_check_prerelease_with_regular_schema(self):
"""Test that regular schemas load correctly with check_prerelease=True."""
# This tests the bug fix: regular schemas should still be found when check_prerelease=True
schema = load_schema_version("8.2.0", xml_folder=self.schema_dir, check_prerelease=True)
self.assertIsInstance(schema, HedSchema, "Regular schema should load with check_prerelease=True")
self.assertEqual(schema.version_number, "8.2.0", "Should have correct version")

def test_load_actual_prerelease_schema(self):
"""Test loading an actual prerelease schema from prerelease directory."""
# Load a schema that exists in the prerelease directory
schema = load_schema_version("8.3.0", xml_folder=self.schema_dir, check_prerelease=True)
self.assertIsInstance(schema, HedSchema, "Should load prerelease schema")
self.assertEqual(schema.version_number, "8.3.0", "Should have correct prerelease version")
self.assertIn("event", schema.tags.all_names, "Prerelease schema should have tags")

def test_prerelease_not_found_without_flag(self):
"""Test that prerelease schema is not found without check_prerelease=True."""
# Schema exists in prerelease directory but should not be found
with self.assertRaises(HedFileError) as context:
load_schema_version("8.3.0", xml_folder=self.schema_dir, check_prerelease=False)
self.assertIn("not found", str(context.exception).lower())

def test_load_prerelease_library(self):
"""Test loading a prerelease library schema."""
schema = load_schema_version("testlib_2.1.0", xml_folder=self.schema_dir, check_prerelease=True)
self.assertIsInstance(schema, HedSchema, "Should load prerelease library")
self.assertEqual(schema.version_number, "2.1.0", "Should have correct version")
self.assertEqual(schema.library, "testlib", "Should have correct library name")
self.assertIn("prerelease-item", schema.tags.all_names, "Should have prerelease library tags")

def test_mixed_regular_and_prerelease_schemas(self):
"""Test loading a mix of regular and prerelease schemas with different namespaces."""
# Load regular schema and prerelease library with different namespaces
schemas = load_schema_version(["base:8.2.0", "test:testlib_2.1.0"], xml_folder=self.schema_dir, check_prerelease=True)
self.assertIsInstance(schemas, HedSchemaGroup, "Should load as HedSchemaGroup")
self.assertEqual(len(schemas._schemas), 2, "Should have two schemas")
self.assertIn("base:", schemas._schemas, "Should have base namespace")
self.assertIn("test:", schemas._schemas, "Should have test namespace")