From 2f9474df7bfe9703e9c9f4806fec024572319eac Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 23 Jan 2026 18:04:49 +0900 Subject: [PATCH 1/4] fix: align to the tier4_perception_dataset document Signed-off-by: ktro2828 --- docs/schema/table.md | 20 +++++-- t4_devkit/schema/tables/autolabel_metadata.py | 58 ++++++++++++++++--- t4_devkit/schema/tables/sample_data.py | 12 ++-- tests/schema/conftest.py | 20 ++++--- .../schema/tables/test_autolabel_metadata.py | 43 +++++++++++--- 5 files changed, 118 insertions(+), 35 deletions(-) diff --git a/docs/schema/table.md b/docs/schema/table.md index 7deb593c..3a40312d 100644 --- a/docs/schema/table.md +++ b/docs/schema/table.md @@ -36,12 +36,22 @@ The `AutolabelModel` type used in `autolabel_metadata` fields has the following ```json AutolabelModel { - "name": -- Name of the model used for annotation. Can include version information. + "name": -- Name of the model used for auto-labeling. Can include version information. "score": -- Label score for the annotation from this model (range: 0.0–1.0). "uncertainty": -- Model-reported uncertainty for the annotation (range: 0.0–1.0). Lower values imply higher confidence. } ``` +#### `AutolabelMetadata` + +The `AutolabelMetadata` type consists of a list of `AutolabelModel`, and it has the following structure: + +```json +AutolabelMetadata { + "models": -- List of models used for auto-labeling. +} +``` + #### `Indicators` The `Indicators` represents the status of vehicle indicators: @@ -231,7 +241,7 @@ sample_annotation { "next": -- Foreign key to the `SampleAnnotation` table associated with the next annotation in the sequence. Empty string `""` if this is the last annotation. "prev": -- Foreign key to the `SampleAnnotation` table associated with the previous annotation in the sequence. Empty string `""` if this is the first annotation. "automatic_annotation": -- Indicates whether the annotation was automatically generated. Defaults to `false`. - "autolabel_metadata": -- List of models used for autolabeling. Required if `automatic_annotation` is `true`. + "autolabel_metadata": -- Metadata of models used in auto-labeling. Required if `automatic_annotation` is `true`. } ``` @@ -258,7 +268,7 @@ sample_data { "prev": -- Foreign key to the `SampleData` table associated with the previous data in the sequence. Empty string `""` if this is the first data. "is_valid": -- Indicates whether this data is valid. Defaults to `true`. "info_filename": -- Relative path to metadata-blob file. - "autolabel_metadata": -- List of models used for autolabeling applied to this entire sample_data item (e.g., image or scan). + "autolabel_metadata": -- Metadata of models used for auto-labeling applied to this entire sample_data item (e.g., image or scan). } ``` @@ -367,7 +377,7 @@ object_ann { "orientation": -- Orientation of the arrow shape within the bounding box, in radians. Present only for categories where `has_orientation` is true (e.g., traffic light arrows). "number": -- The digit displayed within the bounding box. Present only for categories where `has_number` is true (e.g., numeric traffic lights). "automatic_annotation": -- Whether the annotation was automatically generated. Defaults to `false`. - "autolabel_metadata": -- List of models used for autolabeling. Required if `automatic_annotation` is `true`. + "autolabel_metadata": -- Metadata of models used in auto-labeling. Required if `automatic_annotation` is `true`. } ``` @@ -384,7 +394,7 @@ surface_ann { "category_token": -- Foreign key to the `Category` table associated with the category of the surface. "mask": -- Run length encoding of instance mask. "automatic_annotation": -- Whether the annotation was automatically generated. Defaults to `false`. - "autolabel_metadata": -- List of models used for autolabeling. Required if `automatic_annotation` is `true`. + "autolabel_metadata": -- Metadata of models used in auto-labeling. Required if `automatic_annotation` is `true`. } ``` diff --git a/t4_devkit/schema/tables/autolabel_metadata.py b/t4_devkit/schema/tables/autolabel_metadata.py index c328e84e..b1e1b576 100644 --- a/t4_devkit/schema/tables/autolabel_metadata.py +++ b/t4_devkit/schema/tables/autolabel_metadata.py @@ -32,7 +32,7 @@ def to_autolabel_model(x: list[dict | AutolabelModel] | None) -> list[AutolabelM """Convert input to a list of AutolabelModel instances. Args: - x (list[dict | AutolabelModel] | None): Input to convert. Can be None, a list of dicts, or a list of AutolabelModel instances. + x (list[dict | AutolabelModel] | None): Can be None or a list of [dicts or AutolabelModel] instances. Returns: list[AutolabelModel] | None: Converted list of AutolabelModel instances or None. @@ -44,19 +44,63 @@ def to_autolabel_model(x: list[dict | AutolabelModel] | None) -> list[AutolabelM raise TypeError("Input must be None or a list of [dicts or AutolabelModel] instances.") +@define +class AutolabelMetadata: + """A dataclass to represent metadata of models used in auto-labeling. + + Attributes: + models (list[AutolabelModel]): List of AutolabelModel instances. + """ + + models: list[AutolabelModel] | None = field( + converter=AutolabelModel.to_autolabel_model, + validator=validators.optional( + validators.deep_iterable(validators.instance_of(AutolabelModel)) + ), + ) + + @staticmethod + def to_autolabel_metadata( + x: dict | AutolabelMetadata | list[dict | AutolabelModel] | None, + ) -> AutolabelMetadata | None: + """Convert input to an AutolabelMetadata instance. + + Args: + x (dict | AutolabelMetadata | list[dict | AutolabelModel] | None): + Input to convert. Can be None, a dict, or an AutolabelMetadata instance. + + Returns: + AutolabelMetadata | None: Converted AutolabelMetadata instance or None. + """ + if x is None: + return None + if isinstance(x, AutolabelMetadata): + return x + if isinstance(x, list): + return AutolabelMetadata(x) + if isinstance(x, dict): + return AutolabelMetadata(x.get("models", None)) + raise TypeError( + "Input must be None, a dict, an AutolabelMetadata instance, or a list of [dicts or AutolabelModel] instances." + ) + + @define(slots=False) class AutolabelMixin: - """Mixin class for schema tables that use autolabel metadata with automatic annotation.""" + """Mixin class for schema tables that use autolabel metadata with automatic annotation. + + Attributes: + automatic_annotation (bool, optional): Indicates whether the annotation is generated automatically. + autolabel_metadata (AutolabelMetadata | None, optional): Metadata of models used in auto-labeling. + """ automatic_annotation: bool = field( default=False, validator=validators.instance_of(bool), kw_only=True ) - autolabel_metadata: list[AutolabelModel] | None = field( + autolabel_metadata: AutolabelMetadata | None = field( default=None, - converter=AutolabelModel.to_autolabel_model, - validator=validators.optional( - validators.deep_iterable(validators.instance_of(AutolabelModel)) - ), + converter=AutolabelMetadata.to_autolabel_metadata, + validator=validators.optional(validators.instance_of(AutolabelMetadata)), kw_only=True, ) diff --git a/t4_devkit/schema/tables/sample_data.py b/t4_devkit/schema/tables/sample_data.py index 5675fc36..163d9c41 100644 --- a/t4_devkit/schema/tables/sample_data.py +++ b/t4_devkit/schema/tables/sample_data.py @@ -6,7 +6,7 @@ from attrs import define, field, validators from ..name import SchemaName -from .autolabel_metadata import AutolabelModel +from .autolabel_metadata import AutolabelMetadata from .base import SchemaBase, impossible_empty from .registry import SCHEMAS @@ -86,7 +86,7 @@ class SampleData(SchemaBase): Empty if start of scene. is_valid (bool): True if this data is valid, else False. Invalid data should be ignored. info_filename (str): Relative path to metainfo data-blob on disk. - autolabel_metadata (list[AutolabelModel] | None, optional): List of models used for autolabeling applied to this entire sample_data item (e.g., image or scan). + autolabel_metadata (AutolabelMetadata | None, optional): Metadata of models used for autolabeling applied to this entire sample_data item (e.g., image or scan). Shortcuts: --------- @@ -111,12 +111,10 @@ class SampleData(SchemaBase): info_filename: str | None = field( default=None, validator=validators.optional(validators.instance_of(str)) ) - autolabel_metadata: list[AutolabelModel] | None = field( + autolabel_metadata: AutolabelMetadata | None = field( default=None, - converter=AutolabelModel.to_autolabel_model, - validator=validators.optional( - validators.deep_iterable(validators.instance_of(AutolabelModel)) - ), + converter=AutolabelMetadata.to_autolabel_metadata, + validator=validators.optional(validators.instance_of(AutolabelMetadata)), ) # shortcuts diff --git a/tests/schema/conftest.py b/tests/schema/conftest.py index 7511c473..06e3ae20 100644 --- a/tests/schema/conftest.py +++ b/tests/schema/conftest.py @@ -181,7 +181,9 @@ def sample_annotation_dict() -> dict: "next": "7b0ae1dae7531b7b917f403cb22259e6", "prev": "", "automatic_annotation": True, - "autolabel_metadata": [{"name": "centerpoint_v1.2", "score": 0.95, "uncertainty": 0.1}], + "autolabel_metadata": { + "models": [{"name": "centerpoint_v1.2", "score": 0.95, "uncertainty": 0.1}] + }, } @@ -233,9 +235,9 @@ def sample_data_dict() -> dict: "is_valid": True, "next": "efe096cc01a610af846c29aaf4decc9a", "prev": "", - "autolabel_metadata": [ - {"name": "image_preprocessor_v2.1", "score": 0.99, "uncertainty": None} - ], + "autolabel_metadata": { + "models": [{"name": "image_preprocessor_v2.1", "score": 0.99, "uncertainty": None}] + }, } @@ -340,10 +342,12 @@ def object_ann_dict() -> dict: "bbox": [0, 408.0529355733727, 1920, 728.1832152454293], "mask": {"size": [1920, 1280], "counts": "UFBQWzI='"}, "automatic_annotation": True, - "autolabel_metadata": [ - {"name": "yolo_v8_segmentation", "score": 0.87, "uncertainty": 0.15}, - {"name": "mask_rcnn_v3.0", "score": 0.92, "uncertainty": None}, - ], + "autolabel_metadata": { + "models": [ + {"name": "yolo_v8_segmentation", "score": 0.87, "uncertainty": 0.15}, + {"name": "mask_rcnn_v3.0", "score": 0.92, "uncertainty": None}, + ] + }, "number": None, "orientation": None, } diff --git a/tests/schema/tables/test_autolabel_metadata.py b/tests/schema/tables/test_autolabel_metadata.py index ba3299c3..f3823e6f 100644 --- a/tests/schema/tables/test_autolabel_metadata.py +++ b/tests/schema/tables/test_autolabel_metadata.py @@ -1,5 +1,12 @@ +from __future__ import annotations + import pytest -from t4_devkit.schema.tables.autolabel_metadata import AutolabelModel, AutolabelMixin + +from t4_devkit.schema.tables.autolabel_metadata import ( + AutolabelMetadata, + AutolabelMixin, + AutolabelModel, +) class TestAutolabelModel: @@ -9,7 +16,7 @@ def test_to_autolabel_model_type_error(self): """Test to_autolabel_model raises TypeError for invalid input.""" with pytest.raises( TypeError, - match="Input must be None or a list of \\[dicts or AutolabelModel\\] instances.", + match="Input must be None or a list of \\[dicts or AutolabelModel\\] instances", ): AutolabelModel.to_autolabel_model("invalid_input") @@ -17,7 +24,7 @@ def test_to_autolabel_model_type_error_with_dict(self): """Test to_autolabel_model raises TypeError when input is a dict instead of list.""" with pytest.raises( TypeError, - match="Input must be None or a list of \\[dicts or AutolabelModel\\] instances.", + match="Input must be None or a list of \\[dicts or AutolabelModel\\] instances", ): AutolabelModel.to_autolabel_model({"name": "model1", "score": 0.8}) @@ -25,11 +32,31 @@ def test_to_autolabel_model_type_error_with_number(self): """Test to_autolabel_model raises TypeError when input is a number.""" with pytest.raises( TypeError, - match="Input must be None or a list of \\[dicts or AutolabelModel\\] instances.", + match="Input must be None or a list of \\[dicts or AutolabelModel\\] instances", ): AutolabelModel.to_autolabel_model(123) +class TestAutolabelMetadata: + """Test cases for AutolabelMetadata class that are not covered elsewhere.""" + + def test_to_autolabel_metadata_type_error(self): + """Test to_autolabel_metadata raises TypeError for invalid input.""" + with pytest.raises( + TypeError, + match="Input must be None, a dict, an AutolabelMetadata instance, or a list of \\[dicts or AutolabelModel\\] instances.", + ): + AutolabelMetadata.to_autolabel_metadata("invalid_input") + + def test_to_autolabel_metadata_type_error_with_number(self): + """Test to_autolabel_metadata raises TypeError when input is a number.""" + with pytest.raises( + TypeError, + match="Input must be None, a dict, an AutolabelMetadata instance, or a list of \\[dicts or AutolabelModel\\] instances.", + ): + AutolabelMetadata.to_autolabel_metadata(123) + + class TestAutolabelMixin: """Test cases for AutolabelMixin class that are not covered elsewhere.""" @@ -42,16 +69,16 @@ def test_autolabel_mixin_error_automatic_true_no_metadata(self): def test_autolabel_mixin_error_automatic_false_with_metadata(self): """Test AutolabelMixin raises TypeError when automatic_annotation=False but autolabel_metadata is provided.""" - models = [AutolabelModel(name="test_model", score=0.8)] + metadata = AutolabelMetadata(models=[AutolabelModel(name="test_model", score=0.8)]) with pytest.raises( TypeError, match="autolabel_metadata must be None when automatic_annotation is False" ): - AutolabelMixin(automatic_annotation=False, autolabel_metadata=models) + AutolabelMixin(automatic_annotation=False, autolabel_metadata=metadata) def test_autolabel_mixin_error_default_automatic_with_metadata(self): """Test AutolabelMixin raises TypeError when default automatic_annotation=False but autolabel_metadata is provided.""" - models = [AutolabelModel(name="test_model", score=0.8)] + metadata = AutolabelMetadata(models=[AutolabelModel(name="test_model", score=0.8)]) with pytest.raises( TypeError, match="autolabel_metadata must be None when automatic_annotation is False" ): - AutolabelMixin(autolabel_metadata=models) + AutolabelMixin(autolabel_metadata=metadata) From 0f3901d450e3dd22992b406daf748bb632260a52 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Sat, 24 Jan 2026 21:36:55 +0900 Subject: [PATCH 2/4] fix: update schema of surface_ann.json Signed-off-by: ktro2828 --- t4_devkit/schema/tables/surface_ann.py | 16 +++++++++++++++- tests/schema/conftest.py | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/t4_devkit/schema/tables/surface_ann.py b/t4_devkit/schema/tables/surface_ann.py index eb19dc13..428d02f3 100644 --- a/t4_devkit/schema/tables/surface_ann.py +++ b/t4_devkit/schema/tables/surface_ann.py @@ -23,7 +23,9 @@ class SurfaceAnn(SchemaBase, AutolabelMixin): token (str): Unique record identifier. sample_data_token (str): Foreign key pointing to the sample data, which must be a keyframe image. category_token (str): Foreign key pointing to the surface category. - mask (RLEMask): Segmentation mask using the COCO format compressed by RLE. + instance_token (str | None, optional): Foreign key pointing to the instance category. + attribute_tokens (list[str], optional): Foreign keys pointing to the attribute categories. + mask (RLEMask | None, optional): Segmentation mask using the COCO format compressed by RLE. Inherited from AutolabelMixin: automatic_annotation (bool, optional): Indicates if the annotation is fully generated by an ML model. @@ -37,6 +39,18 @@ class SurfaceAnn(SchemaBase, AutolabelMixin): sample_data_token: str = field(validator=(validators.instance_of(str), impossible_empty())) category_token: str = field(validator=(validators.instance_of(str), impossible_empty())) + instance_token: str | None = field( + default=None, + validator=validators.optional( + validators.and_(validators.instance_of(str), impossible_empty()) + ), + ) + attribute_tokens: list[str] = field( + factory=list, + validator=validators.deep_iterable( + validators.and_(validators.instance_of(str), impossible_empty()) + ), + ) mask: RLEMask | None = field( default=None, converter=lambda x: RLEMask(**x) if isinstance(x, dict) else x, diff --git a/tests/schema/conftest.py b/tests/schema/conftest.py index 06e3ae20..24983a9b 100644 --- a/tests/schema/conftest.py +++ b/tests/schema/conftest.py @@ -369,6 +369,8 @@ def surface_ann_dict() -> dict: "token": "4230e00708fb3f404d246ea97716f848", "sample_data_token": "a1d3257e11ec9d4a587ea7053b33f1c1", "category_token": "7864884179fb37bf9e973016b13a332c", + "instance_token": None, + "attribute_tokens": [], "mask": {"size": [1920, 1280], "counts": "UFBQWzI='"}, "automatic_annotation": False, "autolabel_metadata": None, From e246a95d0cc4dcafe17c91a4d1da77ba35580b61 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Sun, 25 Jan 2026 14:07:29 +0900 Subject: [PATCH 3/4] docs: update document Signed-off-by: ktro2828 --- docs/schema/table.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/schema/table.md b/docs/schema/table.md index 3a40312d..e4ede611 100644 --- a/docs/schema/table.md +++ b/docs/schema/table.md @@ -13,7 +13,7 @@ | `enum[X,Y,...]` | Enumerated type with possible values X, Y, ... | | `[T;N]` | Array of N elements of type T | | `[T;N,M,...]` | Array of type T with N, M, or other specified number of elements | -| `option[T]` | Optional value of type T | +| `option[T]` | Optional value of type T by default `None` | ### Special Types @@ -392,7 +392,9 @@ surface_ann { "token": -- Unique record identifier. "sample_data_token": -- Foreign key to the `SampleData` table associated with the sample data. "category_token": -- Foreign key to the `Category` table associated with the category of the surface. - "mask": -- Run length encoding of instance mask. + "instance_token": -- Foreign key to the `Instance` table associated with the instance of the surface. + "attribute_tokens": -- Foreign keys to the `Attribute` table associated with the attributes of the surface. Defaults to `[]`. + "mask": -- Run length encoding of instance mask. "automatic_annotation": -- Whether the annotation was automatically generated. Defaults to `false`. "autolabel_metadata": -- Metadata of models used in auto-labeling. Required if `automatic_annotation` is `true`. } From b7ec0409f954bedf733fc945d5bd40d6904710c3 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 13 Feb 2026 15:29:23 +0900 Subject: [PATCH 4/4] tmp: add uncertainty field to sample_data Signed-off-by: ktro2828 --- t4_devkit/schema/tables/sample_data.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/t4_devkit/schema/tables/sample_data.py b/t4_devkit/schema/tables/sample_data.py index 163d9c41..844c7a48 100644 --- a/t4_devkit/schema/tables/sample_data.py +++ b/t4_devkit/schema/tables/sample_data.py @@ -64,6 +64,11 @@ def as_ext(self) -> str: return f".{self.value}" +@define +class Uncertainty: + instance: float = field(validator=validators.instance_of(float)) + + @define(slots=False) @SCHEMAS.register(SchemaName.SAMPLE_DATA) class SampleData(SchemaBase): @@ -116,6 +121,12 @@ class SampleData(SchemaBase): converter=AutolabelMetadata.to_autolabel_metadata, validator=validators.optional(validators.instance_of(AutolabelMetadata)), ) + # NOTE: uncertainty maybe unused + uncertainty: Uncertainty | None = field( + default=None, + converter=lambda x: Uncertainty(**x) if x is not None else None, + validator=validators.optional(validators.instance_of(Uncertainty)), + ) # shortcuts modality: SensorModality | None = field(init=False, default=None)