trilogy-group · kumanday · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/configs/harnesses/openai-cli.yaml b/configs/harnesses/openai-cli.yaml
@@ -0,0 +1,10 @@
+name: openai-cli
+protocol_surface: openai_responses
+base_url_env: OPENAI_BASE_URL
+api_key_env: OPENAI_API_KEY
+model_env: OPENAI_MODEL
+extra_env: {}
+render_format: shell
+launch_checks:
+  - base URL points to local LiteLLM
+  - session API key is present
diff --git a/configs/harnesses/test-harness.yaml b/configs/harnesses/test-harness.yaml
@@ -0,0 +1,10 @@
+name: test-harness
+protocol_surface: openai_responses
+base_url_env: OPENAI_API_BASE
+api_key_env: OPENAI_API_KEY
+model_env: OPENAI_MODEL
+extra_env: {}
+render_format: shell
+launch_checks:
+  - base URL points to local LiteLLM
+  - session API key is present
diff --git a/configs/variants/openai-gpt-5.4-cli.yaml b/configs/variants/openai-gpt-5.4-cli.yaml
@@ -0,0 +1,11 @@
+name: openai-gpt-5.4-cli
+provider: openai
+provider_route: openai-main
+model_alias: gpt-5.4
+harness_profile: openai-cli
+harness_env_overrides: {}
+benchmark_tags:
+  harness: openai-cli
+  provider: openai
+  model: gpt-5.4
+  config: default
diff --git a/src/__init__.py b/src/__init__.py
@@ -0,0 +1,3 @@
+"""StackPerf benchmarking system."""
+
+__version__ = "0.1.0"
diff --git a/src/benchmark_core/repositories/artifact_repository.py b/src/benchmark_core/repositories/artifact_repository.py
@@ -115,7 +115,7 @@ async def delete(self, id: UUID) -> bool:
         Returns:
             True if deleted, False if not found.
         """
-        return await super().delete(id)
+        return await super().delete(id)  # type: ignore[no-any-return]
 
     async def list_by_session(
         self, session_id: UUID, limit: int = 100, offset: int = 0

diff --git a/src/benchmark_core/repositories/experiment_repository.py b/src/benchmark_core/repositories/experiment_repository.py
@@ -192,7 +192,7 @@ async def delete(self, id: UUID) -> bool:
             ReferentialIntegrityError: If the experiment is referenced by existing sessions.
         """
         try:
-            return await super().delete(id)
+            return await super().delete(id)  # type: ignore[no-any-return]
         except IntegrityError as e:
             self._session.rollback()
             if "FOREIGN KEY constraint failed" in str(e) or "sessions" in str(e):

diff --git a/src/benchmark_core/repositories/harness_profile_repository.py b/src/benchmark_core/repositories/harness_profile_repository.py
@@ -85,7 +85,7 @@ async def delete(self, id: UUID) -> bool:
         Returns:
             True if deleted, False if not found.
         """
-        return await super().delete(id)
+        return await super().delete(id)  # type: ignore[no-any-return]
 
     async def list_all(self, limit: int = 100, offset: int = 0) -> list[HarnessProfileORM]:
         """List all harness profiles with pagination.
@@ -97,7 +97,7 @@ async def list_all(self, limit: int = 100, offset: int = 0) -> list[HarnessProfi
         Returns:
             List of harness profiles.
         """
-        return await super().list_all(limit, offset)
+        return await super().list_all(limit, offset)  # type: ignore[no-any-return]
 
     async def list_by_protocol(self, protocol: str, limit: int = 100) -> list[HarnessProfileORM]:
         """List all harness profiles for a specific protocol surface.

diff --git a/src/benchmark_core/repositories/provider_repository.py b/src/benchmark_core/repositories/provider_repository.py
@@ -124,7 +124,7 @@ async def delete(self, id: UUID) -> bool:
             True if deleted, False if not found.
         """
         # Cascading delete is handled by the ORM relationship
-        return await super().delete(id)
+        return await super().delete(id)  # type: ignore[no-any-return]
 
     async def list_all(self, limit: int = 100, offset: int = 0) -> list[ProviderORM]:
         """List all providers with their models loaded.

diff --git a/src/benchmark_core/repositories/request_repository.py b/src/benchmark_core/repositories/request_repository.py
@@ -252,7 +252,7 @@ async def delete(self, id: UUID) -> bool:
         Returns:
             True if deleted, False if not found.
         """
-        return await super().delete(id)
+        return await super().delete(id)  # type: ignore[no-any-return]
 
     async def delete_by_session(self, session_id: UUID) -> int:
         """Delete all requests for a session.

diff --git a/src/benchmark_core/repositories/rollup_repository.py b/src/benchmark_core/repositories/rollup_repository.py
@@ -197,4 +197,4 @@ def delete_by_dimension(
             MetricRollupORM.dimension_id == dimension_id,
         )
         result = self._session.execute(stmt)
-        return result.rowcount  # type: ignore[attr-defined, no-any-return]
+        return result.rowcount  # type: ignore[no-any-return]
diff --git a/src/benchmark_core/repositories/session_repository.py b/src/benchmark_core/repositories/session_repository.py
@@ -256,7 +256,7 @@ async def delete(self, id: UUID) -> bool:
         Returns:
             True if deleted, False if not found.
         """
-        return await super().delete(id)
+        return await super().delete(id)  # type: ignore[no-any-return]
 
     async def exists_by_harness_session_id(self, harness_session_id: str) -> bool:
         """Check if a session exists with the given harness session identifier.

diff --git a/src/benchmark_core/repositories/task_card_repository.py b/src/benchmark_core/repositories/task_card_repository.py
@@ -93,7 +93,7 @@ async def delete(self, id: UUID) -> bool:
             ReferentialIntegrityError: If the task card is referenced by existing sessions.
         """
         try:
-            return await super().delete(id)
+            return await super().delete(id)  # type: ignore[no-any-return]
         except IntegrityError as e:
             self._session.rollback()
             if "FOREIGN KEY constraint failed" in str(e) or "sessions" in str(e):
@@ -112,7 +112,7 @@ async def list_all(self, limit: int = 100, offset: int = 0) -> list[TaskCardORM]
         Returns:
             List of task cards.
         """
-        return await super().list_all(limit, offset)
+        return await super().list_all(limit, offset)  # type: ignore[no-any-return]
 
     async def search_by_goal(self, query: str, limit: int = 20) -> list[TaskCardORM]:
         """Search task cards by goal text.

diff --git a/src/benchmark_core/repositories/variant_repository.py b/src/benchmark_core/repositories/variant_repository.py
@@ -94,7 +94,7 @@ async def delete(self, id: UUID) -> bool:
             ReferentialIntegrityError: If the variant is referenced by existing sessions.
         """
         try:
-            return await super().delete(id)
+            return await super().delete(id)  # type: ignore[no-any-return]
         except IntegrityError as e:
             self._session.rollback()
             if "FOREIGN KEY constraint failed" in str(e) or "sessions" in str(e):

diff --git a/src/benchmark_core/retention/__init__.py b/src/benchmark_core/retention/__init__.py
@@ -0,0 +1,151 @@
+"""Retention policy management for benchmark data.
+
+This module provides retention controls for managing the lifecycle
+of benchmark data, ensuring compliance with data governance requirements.
+"""
+
+from dataclasses import dataclass
+from datetime import UTC, datetime, timedelta
+from enum import StrEnum
+from typing import Any
+
+
+class DataType(StrEnum):
+    """Types of benchmark data with retention policies."""
+
+    RAW_INGESTION = "raw_ingestion"
+    NORMALIZED_REQUESTS = "normalized_requests"
+    SESSION_CREDENTIALS = "session_credentials"
+    ARTIFACTS = "artifacts"
+    ROLLUPS = "rollups"
+
+
+@dataclass
+class RetentionPolicy:
+    """Retention policy for a specific data type.
+
+    Attributes:
+        data_type: Type of data this policy applies to.
+        retention_days: Number of days to retain data.
+        delete_after_retention: Whether to delete data after retention period.
+        archive_before_delete: Whether to archive data before deletion.
+    """
+
+    data_type: DataType
+    retention_days: int
+    delete_after_retention: bool = True
+    archive_before_delete: bool = False
+
+    def is_expired(self, created_at: datetime) -> bool:
+        """Check if data with the given creation timestamp is expired.
+
+        Args:
+            created_at: Creation timestamp of the data.
+
+        Returns:
+            True if the data is past its retention period.
+        """
+        # Ensure both datetimes are timezone-aware for comparison
+        expiration = created_at + timedelta(days=self.retention_days)
+        now = datetime.now(UTC)
+        if created_at.tzinfo is None:
+            # If created_at is naive, assume UTC
+            expiration = expiration.replace(tzinfo=UTC)
+        return now > expiration
+
+    def get_expiration_date(self, created_at: datetime) -> datetime:
+        """Get the expiration date for data with the given creation timestamp.
+
+        Args:
+            created_at: Creation timestamp of the data.
+
+        Returns:
+            Expiration datetime.
+        """
+        return created_at + timedelta(days=self.retention_days)
+
+
+@dataclass
+class RetentionSettings:
+    """Complete retention settings for all benchmark data types.
+
+    This class defines default retention policies that can be customized
+    per deployment. Default values are designed for typical benchmarking
+    workflows while maintaining auditability.
+    """
+
+    policies: dict[DataType, RetentionPolicy]
+
+    @classmethod
+    def defaults(cls) -> "RetentionSettings":
+        """Create retention settings with default policies.
+
+        Default retention periods:
+        - Raw ingestion: 7 days (short-lived, high volume)
+        - Normalized requests: 30 days (queryable for recent sessions)
+        - Session credentials: 1 day (security best practice)
+        - Artifacts: 90 days (exported reports may be needed for audits)
+        - Rollups: 365 days (aggregated data for long-term trends)
+        """
+        return cls(
+            policies={
+                DataType.RAW_INGESTION: RetentionPolicy(
+                    data_type=DataType.RAW_INGESTION,
+                    retention_days=7,
+                    delete_after_retention=True,
+                ),
+                DataType.NORMALIZED_REQUESTS: RetentionPolicy(
+                    data_type=DataType.NORMALIZED_REQUESTS,
+                    retention_days=30,
+                    delete_after_retention=True,
+                ),
+                DataType.SESSION_CREDENTIALS: RetentionPolicy(
+                    data_type=DataType.SESSION_CREDENTIALS,
+                    retention_days=1,
+                    delete_after_retention=True,
+                ),
+                DataType.ARTIFACTS: RetentionPolicy(
+                    data_type=DataType.ARTIFACTS,
+                    retention_days=90,
+                    delete_after_retention=False,
+                    archive_before_delete=True,
+                ),
+                DataType.ROLLUPS: RetentionPolicy(
+                    data_type=DataType.ROLLUPS,
+                    retention_days=365,
+                    delete_after_retention=False,
+                ),
+            }
+        )
+
+    def get_policy(self, data_type: DataType) -> RetentionPolicy:
+        """Get retention policy for a specific data type.
+
+        Args:
+            data_type: Type of data.
+
+        Returns:
+            Retention policy for the data type.
+        """
+        return self.policies.get(
+            data_type,
+            RetentionPolicy(data_type=data_type, retention_days=30),
+        )
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert retention settings to a dictionary.
+
+        Returns:
+            Dictionary representation of retention settings.
+        """
+        return {
+            "policies": {
+                dt.value: {
+                    "data_type": policy.data_type.value,
+                    "retention_days": policy.retention_days,
+                    "delete_after_retention": policy.delete_after_retention,
+                    "archive_before_delete": policy.archive_before_delete,
+                }
+                for dt, policy in self.policies.items()
+            }
+        }
diff --git a/src/benchmark_core/security/__init__.py b/src/benchmark_core/security/__init__.py
@@ -0,0 +1,75 @@
+"""Security utilities for redaction, secret handling, and audit controls.
+
+This package provides security utilities for redaction, secret detection,
+content capture, and retention management.
+"""
+
+# Package submodule exports (package security module interface)
+# Import directly from module file to avoid circular import
+import importlib.util
+import sys
+from pathlib import Path
+
+from .redaction import (
+    REDACTION_PATTERNS,
+    RedactionConfig,
+    redact_dict,
+    redact_string,
+    redact_value,
+)
+from .secrets import (
+    SecretDetector,
+    detect_secrets,
+    is_likely_secret,
+    scan_dict_for_secrets,
+)
+
+# Load legacy security.py module for backward compatibility
+_security_spec = importlib.util.spec_from_file_location(
+    "_legacy_security", str(Path(__file__).parent.parent / "security.py")
+)
+assert _security_spec is not None, "Failed to load legacy security module spec"
+_legacy_security = importlib.util.module_from_spec(_security_spec)
+sys.modules["_legacy_security"] = _legacy_security
+if _security_spec.loader is not None:
+    _security_spec.loader.exec_module(_legacy_security)
+
+# Re-export legacy module classes (for backward compatibility with existing tests/code)
+# These override the package exports for legacy compatibility
+ContentCaptureConfig = _legacy_security.ContentCaptureConfig
+DEFAULT_CONTENT_CAPTURE_CONFIG = _legacy_security.DEFAULT_CONTENT_CAPTURE_CONFIG
+DEFAULT_REDACTION_CONFIG = _legacy_security.DEFAULT_REDACTION_CONFIG
+DEFAULT_RETENTION_SETTINGS = _legacy_security.DEFAULT_RETENTION_SETTINGS
+RedactionConfig = _legacy_security.RedactionConfig  # type: ignore[misc]  # noqa: F811
+RedactionFilter = _legacy_security.RedactionFilter
+RetentionPolicy = _legacy_security.RetentionPolicy
+RetentionSettings = _legacy_security.RetentionSettings
+SecretPattern = _legacy_security.SecretPattern
+get_redaction_filter = _legacy_security.get_redaction_filter
+redact_for_logging = _legacy_security.redact_for_logging
+should_capture_content = _legacy_security.should_capture_content
+
+__all__ = [
+    # Legacy module exports (primary interface for backward compatibility)
+    "ContentCaptureConfig",
+    "DEFAULT_CONTENT_CAPTURE_CONFIG",
+    "DEFAULT_REDACTION_CONFIG",
+    "DEFAULT_RETENTION_SETTINGS",
+    "RedactionConfig",
+    "RedactionFilter",
+    "RetentionPolicy",
+    "RetentionSettings",
+    "SecretPattern",
+    "get_redaction_filter",
+    "redact_for_logging",
+    "should_capture_content",
+    # Package submodule exports (package security module interface)
+    "REDACTION_PATTERNS",
+    "redact_dict",
+    "redact_string",
+    "redact_value",
+    "SecretDetector",
+    "detect_secrets",
+    "is_likely_secret",
+    "scan_dict_for_secrets",
+]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""StackPerf benchmarking system."""

		__version__ = "0.1.0"