Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions hed/schema/schema_attribute_validator_hed_id.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from hed.schema.hed_cache import get_library_data
from hed.schema.schema_io.df_util import remove_prefix
from semantic_version import Version
from hed.schema.hed_schema_io import load_schema_version
from hed.schema.hed_cache import get_hed_versions
Expand Down Expand Up @@ -87,13 +86,13 @@ def verify_tag_id(self, hed_schema, tag_entry, attribute_name):

if old_id:
try:
old_id = int(remove_prefix(old_id, "HED_"))
old_id = int(old_id.removeprefix("HED_"))
except ValueError:
# Just silently ignore invalid old_id values(this shouldn't happen)
pass
if new_id:
try:
new_id = int(remove_prefix(new_id, "HED_"))
new_id = int(new_id.removeprefix("HED_"))
except ValueError:
return ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_HED_ID_INVALID, tag_entry.name, new_id)
# Nothing to verify
Expand Down
7 changes: 0 additions & 7 deletions hed/schema/schema_io/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,6 @@ def get_library_name_and_id(schema):
return name.capitalize(), starting_id


# todo: Replace this once we no longer support < python 3.10
def remove_prefix(text, prefix):
if text and text.startswith(prefix):
return text[len(prefix) :]
return text


def calculate_attribute_type(attribute_entry):
"""Returns the type of this attribute(annotation, object, data)

Expand Down
5 changes: 2 additions & 3 deletions hed/schema/schema_io/hed_id_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from hed.schema.schema_io import schema_util
from hed.errors.exceptions import HedFileError
from hed.schema.hed_schema_constants import HedKey
from hed.schema.schema_io.df_util import remove_prefix
from hed.schema.hed_cache import get_library_data
from hed.schema.schema_io import df_constants as constants

Expand Down Expand Up @@ -67,7 +66,7 @@ def get_all_ids(df):
Union[Set, None]: None if this has no HED column, otherwise all unique numbers as a set.
"""
if constants.hed_id in df.columns:
modified_df = df[constants.hed_id].apply(lambda x: remove_prefix(x, "HED_"))
modified_df = df[constants.hed_id].apply(lambda x: x.removeprefix("HED_") if isinstance(x, str) else x)
modified_df = pd.to_numeric(modified_df, errors="coerce").dropna().astype(int)
return set(modified_df.unique())
return None
Expand Down Expand Up @@ -171,7 +170,7 @@ def _verify_hedid_matches(section, df, unused_tag_ids):
row_number, row, f"'{label}' has an improperly formatted hedID in dataframe."
)
continue
id_value = remove_prefix(df_id, "HED_")
id_value = df_id.removeprefix("HED_")
try:
id_int = int(id_value)
if id_int not in unused_tag_ids:
Expand Down
3 changes: 1 addition & 2 deletions hed/schema/schema_io/schema2df.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from hed.schema.schema_io.df_util import (
create_empty_dataframes,
get_library_name_and_id,
remove_prefix,
calculate_attribute_type,
)
from hed.schema.schema_io.schema2base import Schema2Base
Expand Down Expand Up @@ -49,7 +48,7 @@ def _get_object_name_and_id(self, object_name, include_prefix=False):
- The full formatted hed_id.
"""
prefix, obj_id = get_library_name_and_id(self._schema)
name = f"{prefix}{remove_prefix(object_name, 'Hed')}"
name = f"{prefix}{object_name.removeprefix('Hed')}"
full_hed_id = self._get_object_id(object_name, obj_id, include_prefix)
return name, full_hed_id

Expand Down
197 changes: 0 additions & 197 deletions hed/tools/analysis/sequence_map.py

This file was deleted.

2 changes: 1 addition & 1 deletion spec_tests/hed-examples
Submodule hed-examples updated 131 files
26 changes: 26 additions & 0 deletions tests/schema/test_hed_id_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,32 @@ def test_get_all_ids_mixed_invalid(self):
result = get_all_ids(df)
self.assertEqual(result, {1, 3}) # Should ignore non-numeric and malformed IDs

def test_get_all_ids_with_nan(self):
# Test when hedId column contains NaN values (pandas null)
df = pd.DataFrame({"hedId": ["HED_0000001", pd.NA, "HED_0000003", None]})
result = get_all_ids(df)
self.assertEqual(result, {1, 3}) # Should handle NaN/None gracefully

def test_get_all_ids_with_numeric_types(self):
# Test when hedId column contains numeric types (edge case)
# pd.to_numeric will convert these numeric values as-is
df = pd.DataFrame({"hedId": ["HED_0000001", 123, "HED_0000003", 456]})
result = get_all_ids(df)
# Should extract from valid string entries with HED_ prefix AND numeric values
self.assertEqual(result, {1, 3, 123, 456})

def test_get_all_ids_empty_strings(self):
# Test when hedId column contains empty strings
df = pd.DataFrame({"hedId": ["HED_0000001", "", "HED_0000003", ""]})
result = get_all_ids(df)
self.assertEqual(result, {1, 3})

def test_get_all_ids_with_none(self):
# Test when hedId column contains None values
df = pd.DataFrame({"hedId": ["HED_0000001", None, "HED_0000003", None]})
result = get_all_ids(df)
self.assertEqual(result, {1, 3})

def test_assign_hed_ids_section(self):
df = pd.DataFrame(
{
Expand Down
63 changes: 0 additions & 63 deletions tests/tools/analysis/test_sequence_map.py

This file was deleted.