From 60d48162da9e6736ceedd7e038f08fe72c021f18 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Fri, 24 Apr 2026 16:25:56 +0530 Subject: [PATCH 01/27] api deployment notification init --- backend/api_v2/notification.py | 31 +- backend/notification_v2/enums.py | 12 + .../migrations/0002_notification_notify_on.py | 30 ++ backend/notification_v2/models.py | 13 +- backend/notification_v2/serializers.py | 7 +- backend/notification_v2/tests/__init__.py | 0 .../tests/test_notification_filter.py | 321 ++++++++++++++++++ backend/pipeline_v2/notification.py | 31 +- .../notification-modal/CreateNotification.jsx | 13 + 9 files changed, 448 insertions(+), 10 deletions(-) create mode 100644 backend/notification_v2/migrations/0002_notification_notify_on.py create mode 100644 backend/notification_v2/tests/__init__.py create mode 100644 backend/notification_v2/tests/test_notification_filter.py diff --git a/backend/api_v2/notification.py b/backend/api_v2/notification.py index 733084c637..a41ebc3000 100644 --- a/backend/api_v2/notification.py +++ b/backend/api_v2/notification.py @@ -1,8 +1,10 @@ import logging +from notification_v2.enums import NotificationTrigger from notification_v2.helper import NotificationHelper from notification_v2.models import Notification from pipeline_v2.dto import PipelineStatusPayload +from workflow_manager.workflow_v2.enums import ExecutionStatus from workflow_manager.workflow_v2.models.execution import WorkflowExecution from api_v2.models import APIDeployment @@ -16,11 +18,32 @@ def __init__(self, api: APIDeployment, workflow_execution: WorkflowExecution) -> self.api = api self.workflow_execution = workflow_execution - def send(self): - if not self.notifications.count(): - logger.info(f"No notifications found for api {self.api}") + def send(self) -> None: + # Partition notifications by the run outcome so each row's notify_on + # preference is honored. STOPPED and any other non-terminal status + # fire only for ALL — explicit opt-ins to FAILURES/SUCCESS shouldn't. + status = self.workflow_execution.status + if status == ExecutionStatus.ERROR.value: + self.notifications = self.notifications.exclude( + notify_on=NotificationTrigger.SUCCESS_ONLY.value + ) + elif status == ExecutionStatus.COMPLETED.value: + self.notifications = self.notifications.exclude( + notify_on=NotificationTrigger.FAILURES_ONLY.value + ) + else: + self.notifications = self.notifications.filter( + notify_on=NotificationTrigger.ALL.value + ) + + if not self.notifications.exists(): + logger.info( + "No notifications to dispatch for api %s (status=%s)", + self.api, + status, + ) return - logger.info(f"Sending api status notification for api {self.api}") + logger.info("Sending api status notification for api %s", self.api) payload_dto = PipelineStatusPayload( type="API", diff --git a/backend/notification_v2/enums.py b/backend/notification_v2/enums.py index 991b08cac9..516b34074b 100644 --- a/backend/notification_v2/enums.py +++ b/backend/notification_v2/enums.py @@ -36,3 +36,15 @@ class PlatformType(Enum): @classmethod def choices(cls): return [(e.value, e.name.replace("_", " ").capitalize()) for e in cls] + + +class NotificationTrigger(Enum): + """Controls which run outcomes fire a notification.""" + + ALL = "ALL" + FAILURES_ONLY = "FAILURES_ONLY" + SUCCESS_ONLY = "SUCCESS_ONLY" + + @classmethod + def choices(cls): + return [(e.value, e.name.replace("_", " ").capitalize()) for e in cls] diff --git a/backend/notification_v2/migrations/0002_notification_notify_on.py b/backend/notification_v2/migrations/0002_notification_notify_on.py new file mode 100644 index 0000000000..53c1180126 --- /dev/null +++ b/backend/notification_v2/migrations/0002_notification_notify_on.py @@ -0,0 +1,30 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("notification_v2", "0001_initial"), + ] + + operations = [ + migrations.AddField( + model_name="notification", + name="notify_on", + field=models.CharField( + max_length=50, + choices=[ + ("ALL", "All"), + ("FAILURES_ONLY", "Failures only"), + ("SUCCESS_ONLY", "Success only"), + ], + default="ALL", + db_comment=( + "Controls which run outcomes trigger this notification. ALL " + "(default) preserves the historical 'notify on every " + "completion' behavior; FAILURES_ONLY fires only on failed " + "runs (ERROR for API deployments, FAILURE for ETL " + "pipelines); SUCCESS_ONLY fires only on successful runs." + ), + ), + ), + ] diff --git a/backend/notification_v2/models.py b/backend/notification_v2/models.py index 489a8c827e..4fe21cd6c4 100644 --- a/backend/notification_v2/models.py +++ b/backend/notification_v2/models.py @@ -5,7 +5,7 @@ from pipeline_v2.models import Pipeline from utils.models.base_model import BaseModel -from .enums import AuthorizationType, NotificationType, PlatformType +from .enums import AuthorizationType, NotificationTrigger, NotificationType, PlatformType NOTIFICATION_NAME_MAX_LENGTH = 255 @@ -47,6 +47,17 @@ class Notification(BaseModel): default=True, db_comment="Flag indicating whether the notification is active or not.", ) + notify_on = models.CharField( + max_length=50, + choices=NotificationTrigger.choices(), + default=NotificationTrigger.ALL.value, + db_comment=( + "Controls which run outcomes trigger this notification. ALL (default) " + "preserves the historical 'notify on every completion' behavior; " + "FAILURES_ONLY fires only on failed runs (ERROR for API deployments, " + "FAILURE for ETL pipelines); SUCCESS_ONLY fires only on successful runs." + ), + ) # Foreign keys to specific models pipeline = models.ForeignKey( Pipeline, diff --git a/backend/notification_v2/serializers.py b/backend/notification_v2/serializers.py index 115487c481..784ec75413 100644 --- a/backend/notification_v2/serializers.py +++ b/backend/notification_v2/serializers.py @@ -1,7 +1,7 @@ from rest_framework import serializers from utils.input_sanitizer import validate_name_field -from .enums import AuthorizationType, NotificationType, PlatformType +from .enums import AuthorizationType, NotificationTrigger, NotificationType, PlatformType from .models import Notification @@ -12,6 +12,11 @@ class NotificationSerializer(serializers.ModelSerializer): max_retries = serializers.IntegerField( max_value=4, min_value=0, default=0, required=False ) + notify_on = serializers.ChoiceField( + choices=NotificationTrigger.choices(), + default=NotificationTrigger.ALL.value, + required=False, + ) class Meta: model = Notification diff --git a/backend/notification_v2/tests/__init__.py b/backend/notification_v2/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/notification_v2/tests/test_notification_filter.py b/backend/notification_v2/tests/test_notification_filter.py new file mode 100644 index 0000000000..7f887be500 --- /dev/null +++ b/backend/notification_v2/tests/test_notification_filter.py @@ -0,0 +1,321 @@ +"""Unit tests for the ``notify_on`` dispatch partition on notifications. + +Covers both dispatch paths that fan out to ``NotificationHelper.send_notification``: + +* ``APINotification.send`` — keyed on ``ExecutionStatus`` (ERROR, COMPLETED, STOPPED) +* ``PipelineNotification.send`` — keyed on ``Pipeline.PipelineStatus`` + (FAILURE, SUCCESS, INPROGRESS) + +Follows the repo convention (see ``usage_v2/tests/test_helper.py``) of stubbing +Django-heavy modules at import time so the tests run without a live DB. +""" + +from __future__ import annotations + +import sys +import types +from unittest.mock import MagicMock, patch + + +# --------------------------------------------------------------------------- +# Module-level stubs — must be installed BEFORE importing the modules under +# test so Django's ORM imports resolve to our MagicMock-backed fakes. +# --------------------------------------------------------------------------- + + +def _ensure_mod(name: str) -> types.ModuleType: + """Force-install a fresh stub module in ``sys.modules``.""" + mod = types.ModuleType(name) + sys.modules[name] = mod + return mod + + +def _install_stubs() -> None: + # Only stub leaf modules that pull in Django ORM. Parent packages + # (api_v2, pipeline_v2, notification_v2, workflow_manager*) load normally. + + exec_enums = _ensure_mod("workflow_manager.workflow_v2.enums") + + class _ExecStatusNS: + class ERROR: + value = "ERROR" + + class COMPLETED: + value = "COMPLETED" + + class STOPPED: + value = "STOPPED" + + exec_enums.ExecutionStatus = _ExecStatusNS # type: ignore[attr-defined] + + exec_models = _ensure_mod("workflow_manager.workflow_v2.models.execution") + exec_models.WorkflowExecution = MagicMock(name="WorkflowExecution") # type: ignore[attr-defined] + + api_models = _ensure_mod("api_v2.models") + api_models.APIDeployment = MagicMock(name="APIDeployment") # type: ignore[attr-defined] + + # notification_v2.models.Notification with a patchable ``objects``. + notif_models = _ensure_mod("notification_v2.models") + + class _FakeNotification: + objects = MagicMock(name="Notification.objects") + + notif_models.Notification = _FakeNotification # type: ignore[attr-defined] + + # notification_v2.helper.NotificationHelper + notif_helper = _ensure_mod("notification_v2.helper") + + class _FakeHelper: + send_notification = MagicMock(name="NotificationHelper.send_notification") + + notif_helper.NotificationHelper = _FakeHelper # type: ignore[attr-defined] + + # pipeline_v2.dto.PipelineStatusPayload + pipeline_dto = _ensure_mod("pipeline_v2.dto") + pipeline_dto.PipelineStatusPayload = MagicMock(name="PipelineStatusPayload") # type: ignore[attr-defined] + + # pipeline_v2.models.Pipeline with a PipelineStatus text-choices surface. + pipeline_models = _ensure_mod("pipeline_v2.models") + + class _PipelineStatus: + SUCCESS = "SUCCESS" + FAILURE = "FAILURE" + INPROGRESS = "INPROGRESS" + + class _FakePipeline: + PipelineStatus = _PipelineStatus + + pipeline_models.Pipeline = _FakePipeline # type: ignore[attr-defined] + + +_install_stubs() + + +# Now safe to import the modules under test. +from api_v2 import notification as api_notification_mod # noqa: E402 +from notification_v2.enums import NotificationTrigger # noqa: E402 +from notification_v2.helper import NotificationHelper # noqa: E402 +from notification_v2.models import Notification # noqa: E402 +from pipeline_v2 import notification as pipeline_notification_mod # noqa: E402 +from pipeline_v2.models import Pipeline # noqa: E402 + + +# --------------------------------------------------------------------------- +# Test helpers +# --------------------------------------------------------------------------- + + +def _make_queryset(notifications: list[MagicMock]) -> MagicMock: + """Return a MagicMock that mimics the chained QuerySet surface we use. + + Supports: + qs.filter(notify_on=) -> qs with matching rows + qs.exclude(notify_on=) -> qs with non-matching rows + qs.exists() -> bool based on contents + iter(qs) -> notifications + """ + qs = MagicMock(name="qs") + qs.__iter__ = lambda self: iter(notifications) + + def _filter(**kwargs): + if "notify_on" in kwargs: + target = kwargs["notify_on"] + kept = [n for n in notifications if n.notify_on == target] + return _make_queryset(kept) + return _make_queryset(notifications) + + def _exclude(**kwargs): + if "notify_on" in kwargs: + target = kwargs["notify_on"] + kept = [n for n in notifications if n.notify_on != target] + return _make_queryset(kept) + return _make_queryset(notifications) + + qs.filter.side_effect = _filter + qs.exclude.side_effect = _exclude + qs.exists.return_value = bool(notifications) + qs.count.return_value = len(notifications) + return qs + + +def _make_notification(*, notify_on: str) -> MagicMock: + n = MagicMock(name="Notification") + n.notify_on = notify_on + return n + + +# --------------------------------------------------------------------------- +# APINotification — 3 modes × 3 statuses +# --------------------------------------------------------------------------- + + +class TestAPINotificationFilter: + def _setup(self, *, status: str, notifications: list[MagicMock]): + Notification.objects.filter.reset_mock() + Notification.objects.filter.side_effect = None + Notification.objects.filter.return_value = _make_queryset(notifications) + NotificationHelper.send_notification.reset_mock() + + api = MagicMock(name="APIDeployment") + api.api_name = "test-api" + api.id = "api-uuid" + + execution = MagicMock(name="WorkflowExecution") + execution.status = status + execution.id = "exec-uuid" + execution.error_message = "boom" if status == "ERROR" else None + + return api_notification_mod.APINotification(api=api, workflow_execution=execution) + + # --- ALL: fires on every status --- + def test_all_fires_on_completed(self): + n = _make_notification(notify_on=NotificationTrigger.ALL.value) + self._setup(status="COMPLETED", notifications=[n]).send() + assert NotificationHelper.send_notification.call_count == 1 + + def test_all_fires_on_error(self): + n = _make_notification(notify_on=NotificationTrigger.ALL.value) + self._setup(status="ERROR", notifications=[n]).send() + assert NotificationHelper.send_notification.call_count == 1 + + def test_all_fires_on_stopped(self): + n = _make_notification(notify_on=NotificationTrigger.ALL.value) + self._setup(status="STOPPED", notifications=[n]).send() + assert NotificationHelper.send_notification.call_count == 1 + + # --- FAILURES_ONLY: fires on ERROR only --- + def test_failures_only_suppressed_on_completed(self): + n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) + self._setup(status="COMPLETED", notifications=[n]).send() + NotificationHelper.send_notification.assert_not_called() + + def test_failures_only_fires_on_error(self): + n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) + self._setup(status="ERROR", notifications=[n]).send() + assert NotificationHelper.send_notification.call_count == 1 + + def test_failures_only_suppressed_on_stopped(self): + n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) + self._setup(status="STOPPED", notifications=[n]).send() + NotificationHelper.send_notification.assert_not_called() + + # --- SUCCESS_ONLY: fires on COMPLETED only --- + def test_success_only_fires_on_completed(self): + n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) + self._setup(status="COMPLETED", notifications=[n]).send() + assert NotificationHelper.send_notification.call_count == 1 + + def test_success_only_suppressed_on_error(self): + n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) + self._setup(status="ERROR", notifications=[n]).send() + NotificationHelper.send_notification.assert_not_called() + + def test_success_only_suppressed_on_stopped(self): + n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) + self._setup(status="STOPPED", notifications=[n]).send() + NotificationHelper.send_notification.assert_not_called() + + # --- Mixed partition on a COMPLETED run: ALL + SUCCESS_ONLY fire, FAILURES_ONLY doesn't --- + def test_mixed_partition_on_completed(self): + all_mode = _make_notification(notify_on=NotificationTrigger.ALL.value) + failures_only = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) + success_only = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) + notifier = self._setup( + status="COMPLETED", notifications=[all_mode, failures_only, success_only] + ) + with patch.object(api_notification_mod, "PipelineStatusPayload") as payload_cls: + payload_cls.return_value.to_dict.return_value = {} + notifier.send() + + assert NotificationHelper.send_notification.call_count == 1 + kwargs = NotificationHelper.send_notification.call_args.kwargs + dispatched = sorted(n.notify_on for n in kwargs["notifications"]) + assert dispatched == ["ALL", "SUCCESS_ONLY"] + + +# --------------------------------------------------------------------------- +# PipelineNotification — 3 modes × 3 statuses +# --------------------------------------------------------------------------- + + +class TestPipelineNotificationFilter: + def _setup(self, *, last_run_status: str, notifications: list[MagicMock]): + Notification.objects.filter.reset_mock() + Notification.objects.filter.side_effect = None + Notification.objects.filter.return_value = _make_queryset(notifications) + NotificationHelper.send_notification.reset_mock() + + pipeline = MagicMock(name="Pipeline") + pipeline.id = "pipeline-uuid" + pipeline.pipeline_name = "test-pipeline" + pipeline.pipeline_type = "ETL" + pipeline.last_run_status = last_run_status + + return pipeline_notification_mod.PipelineNotification( + pipeline=pipeline, execution_id="exec-uuid", error_message=None + ) + + # --- ALL --- + def test_all_fires_on_success(self): + n = _make_notification(notify_on=NotificationTrigger.ALL.value) + self._setup( + last_run_status=Pipeline.PipelineStatus.SUCCESS, notifications=[n] + ).send() + assert NotificationHelper.send_notification.call_count == 1 + + def test_all_fires_on_failure(self): + n = _make_notification(notify_on=NotificationTrigger.ALL.value) + self._setup( + last_run_status=Pipeline.PipelineStatus.FAILURE, notifications=[n] + ).send() + assert NotificationHelper.send_notification.call_count == 1 + + # --- FAILURES_ONLY --- + def test_failures_only_suppressed_on_success(self): + n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) + self._setup( + last_run_status=Pipeline.PipelineStatus.SUCCESS, notifications=[n] + ).send() + NotificationHelper.send_notification.assert_not_called() + + def test_failures_only_fires_on_failure(self): + n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) + self._setup( + last_run_status=Pipeline.PipelineStatus.FAILURE, notifications=[n] + ).send() + assert NotificationHelper.send_notification.call_count == 1 + + # --- SUCCESS_ONLY --- + def test_success_only_fires_on_success(self): + n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) + self._setup( + last_run_status=Pipeline.PipelineStatus.SUCCESS, notifications=[n] + ).send() + assert NotificationHelper.send_notification.call_count == 1 + + def test_success_only_suppressed_on_failure(self): + n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) + self._setup( + last_run_status=Pipeline.PipelineStatus.FAILURE, notifications=[n] + ).send() + NotificationHelper.send_notification.assert_not_called() + + # --- Mixed partition on a SUCCESS run --- + def test_mixed_partition_on_success(self): + all_mode = _make_notification(notify_on=NotificationTrigger.ALL.value) + failures_only = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) + success_only = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) + notifier = self._setup( + last_run_status=Pipeline.PipelineStatus.SUCCESS, + notifications=[all_mode, failures_only, success_only], + ) + with patch.object( + pipeline_notification_mod, "PipelineStatusPayload" + ) as payload_cls: + payload_cls.return_value.to_dict.return_value = {} + notifier.send() + + assert NotificationHelper.send_notification.call_count == 1 + kwargs = NotificationHelper.send_notification.call_args.kwargs + dispatched = sorted(n.notify_on for n in kwargs["notifications"]) + assert dispatched == ["ALL", "SUCCESS_ONLY"] diff --git a/backend/pipeline_v2/notification.py b/backend/pipeline_v2/notification.py index dbfc0dea52..64df6f1126 100644 --- a/backend/pipeline_v2/notification.py +++ b/backend/pipeline_v2/notification.py @@ -1,5 +1,6 @@ import logging +from notification_v2.enums import NotificationTrigger from notification_v2.helper import NotificationHelper from notification_v2.models import Notification @@ -23,11 +24,33 @@ def __init__( self.error_message = error_message self.execution_id = execution_id - def send(self): - if not self.notifications.count(): - logger.info(f"No notifications found for pipeline {self.pipeline}") + def send(self) -> None: + # Partition notifications by the run outcome so each row's notify_on + # preference is honored. PipelineUtils.update_pipeline_status collapses + # both ERROR and STOPPED execution statuses into PipelineStatus.FAILURE, + # so FAILURES_ONLY subscribers get alerts for both on the pipeline side. + status = self.pipeline.last_run_status + if status == Pipeline.PipelineStatus.FAILURE: + self.notifications = self.notifications.exclude( + notify_on=NotificationTrigger.SUCCESS_ONLY.value + ) + elif status == Pipeline.PipelineStatus.SUCCESS: + self.notifications = self.notifications.exclude( + notify_on=NotificationTrigger.FAILURES_ONLY.value + ) + else: + self.notifications = self.notifications.filter( + notify_on=NotificationTrigger.ALL.value + ) + + if not self.notifications.exists(): + logger.info( + "No notifications to dispatch for pipeline %s (status=%s)", + self.pipeline, + status, + ) return - logger.info(f"Sending pipeline status notification for pipeline {self.pipeline}") + logger.info("Sending pipeline status notification for pipeline %s", self.pipeline) payload_dto = PipelineStatusPayload( type=self.pipeline.pipeline_type, pipeline_id=str(self.pipeline.id), diff --git a/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx b/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx index 9c207bd1a9..9302f9f1dc 100644 --- a/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx +++ b/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx @@ -12,6 +12,7 @@ const DEFAULT_FORM_DETAILS = { authorization_key: "", is_active: false, max_retries: 0, + notify_on: "ALL", pipeline: "", api: "", url: "", @@ -54,6 +55,12 @@ const AUTHORIZATION_TYPES = [ }, ]; +const NOTIFY_ON_OPTIONS = [ + { value: "ALL", label: "On every completion" }, + { value: "FAILURES_ONLY", label: "On failures only" }, + { value: "SUCCESS_ONLY", label: "On success only" }, +]; + function CreateNotification({ setIsForm, type, @@ -192,6 +199,12 @@ function CreateNotification({ tooltip: "Specify the maximum number of times the notification should be retried if it fails.", }, + { + label: "Notify on", + name: "notify_on", + component: , - tooltip: "Choose which run outcomes should trigger this webhook.", - }, ]; return ( @@ -233,6 +221,13 @@ function CreateNotification({ ), )} + + Notify on failures only + diff --git a/unstract/core/src/unstract/core/data_models.py b/unstract/core/src/unstract/core/data_models.py index 7e8e984a04..82da16a72e 100644 --- a/unstract/core/src/unstract/core/data_models.py +++ b/unstract/core/src/unstract/core/data_models.py @@ -510,6 +510,12 @@ class NotificationPayload: error_message: str | None = None organization_id: str | None = None + # Per-run file aggregates surfaced into webhook payloads. + # Default 0 lets receivers switch on a numeric value without None-checks. + total_files: int = 0 + successful_files: int = 0 + failed_files: int = 0 + # Metadata timestamp: datetime = field(default_factory=lambda: datetime.now(UTC)) additional_data: dict[str, Any] = field(default_factory=dict) @@ -565,6 +571,9 @@ def from_execution_status( error_message: str | None = None, organization_id: str | None = None, additional_data: dict[str, Any] | None = None, + total_files: int = 0, + successful_files: int = 0, + failed_files: int = 0, ) -> "NotificationPayload": """Create notification payload from execution status. @@ -607,6 +616,9 @@ def from_execution_status( execution_id=execution_id, error_message=error_message, organization_id=organization_id, + total_files=total_files, + successful_files=successful_files, + failed_files=failed_files, additional_data=additional_data or {}, _source=source, ) diff --git a/workers/callback/tasks.py b/workers/callback/tasks.py index 42599f0659..eee5b97da3 100644 --- a/workers/callback/tasks.py +++ b/workers/callback/tasks.py @@ -381,12 +381,16 @@ def _update_execution_status_unified( try: # Consistent workflow execution status update across all callback types total_files = aggregated_results.get("total_files", 0) + successful_files = aggregated_results.get("successful_files", 0) + failed_files = aggregated_results.get("failed_files", 0) # Make the unified API call api_client.update_workflow_execution_status( execution_id=execution_id, status=final_status, total_files=total_files, + successful_files=successful_files, + failed_files=failed_files, organization_id=organization_id, error_message=error_message, ) diff --git a/workers/scheduler/tasks.py b/workers/scheduler/tasks.py index 2e22946a58..65e5bd7af1 100644 --- a/workers/scheduler/tasks.py +++ b/workers/scheduler/tasks.py @@ -81,6 +81,7 @@ def _send_pipeline_status_notification( pipeline_id=pipeline_id, pipeline_name=pipeline_name, notification_payload=notification, + execution_id=execution_id, ) logger.info(f"Notification sent successfully for {pipeline_type} {pipeline_id}") except Exception as notification_error: diff --git a/workers/shared/api/internal_client.py b/workers/shared/api/internal_client.py index def90bd86a..4964a2b377 100644 --- a/workers/shared/api/internal_client.py +++ b/workers/shared/api/internal_client.py @@ -603,6 +603,8 @@ def update_workflow_execution_status( status: str, error_message: str | None = None, total_files: int | None = None, + successful_files: int | None = None, + failed_files: int | None = None, attempts: int | None = None, execution_time: float | None = None, organization_id: str | None = None, @@ -613,6 +615,8 @@ def update_workflow_execution_status( status, error_message, total_files, + successful_files, + failed_files, attempts, execution_time, organization_id, diff --git a/workers/shared/clients/execution_client.py b/workers/shared/clients/execution_client.py index 80e9ca6568..e1373eb9f5 100644 --- a/workers/shared/clients/execution_client.py +++ b/workers/shared/clients/execution_client.py @@ -265,6 +265,8 @@ def update_workflow_execution_status( status: str | TaskStatus, error_message: str | None = None, total_files: int | None = None, + successful_files: int | None = None, + failed_files: int | None = None, attempts: int | None = None, execution_time: float | None = None, organization_id: str | None = None, @@ -276,6 +278,8 @@ def update_workflow_execution_status( status: New status (TaskStatus enum or string) error_message: Optional error message total_files: Optional total files count + successful_files: Optional count of files that completed successfully + failed_files: Optional count of files that errored attempts: Optional attempts count execution_time: Optional execution time organization_id: Optional organization ID override @@ -292,6 +296,10 @@ def update_workflow_execution_status( data["error_message"] = error_message if total_files is not None: data["total_files"] = total_files + if successful_files is not None: + data["successful_files"] = successful_files + if failed_files is not None: + data["failed_files"] = failed_files if attempts is not None: data["attempts"] = attempts if execution_time is not None: diff --git a/workers/shared/patterns/notification/helper.py b/workers/shared/patterns/notification/helper.py index 977f5a875f..c4f4c5d518 100644 --- a/workers/shared/patterns/notification/helper.py +++ b/workers/shared/patterns/notification/helper.py @@ -104,6 +104,7 @@ def trigger_notification( pipeline_id: str, pipeline_name: str, notification_payload: NotificationPayload, + execution_id: str | None = None, ) -> None: """Trigger notifications for pipeline status updates. @@ -111,10 +112,13 @@ def trigger_notification( Uses API client to fetch notification configuration. """ try: - # Fetch pipeline notifications via API + # Pass execution_id so the backend filter respects notify_on_failures + # (see trigger_pipeline_notifications for the rationale). + params = {"execution_id": execution_id} if execution_id else None response_data = api_client._make_request( method="GET", endpoint=f"v1/webhook/pipeline/{pipeline_id}/notifications/", + params=params, timeout=10, ) @@ -176,10 +180,14 @@ def trigger_pipeline_notifications( return try: - # Fetch pipeline notifications via API + # Pass execution_id so the backend can drop notify_on_failures=True rows + # on success runs. Without it the endpoint is a no-op and we'd fire on + # every active row regardless of trigger preference. + params = {"execution_id": execution_id} if execution_id else None response_data = api_client._make_request( method="GET", endpoint=f"v1/webhook/pipeline/{pipeline_id}/notifications/", + params=params, timeout=10, ) @@ -204,7 +212,9 @@ def trigger_pipeline_notifications( else: workflow_type = WorkflowType.ETL # Default fallback - # Create notification payload using dataclass + # File counts come from WorkflowExecution via the same endpoint so + # webhook receivers (Slack, raw API) see partial-success breakdowns. + counts = response_data.get("execution_counts") or {} payload = NotificationPayload.from_execution_status( pipeline_id=pipeline_id, pipeline_name=pipeline_name, @@ -213,6 +223,9 @@ def trigger_pipeline_notifications( source=NotificationSource.CALLBACK_WORKER, execution_id=execution_id, error_message=error_message, + total_files=counts.get("total_files", 0), + successful_files=counts.get("successful_files", 0), + failed_files=counts.get("failed_files", 0), ) logger.info( @@ -261,9 +274,14 @@ def trigger_api_notifications( return try: - # Fetch API notifications via API + # See trigger_pipeline_notifications: execution_id powers the backend + # filter that respects notify_on_failures. + params = {"execution_id": execution_id} if execution_id else None response_data = api_client._make_request( - method="GET", endpoint=f"v1/webhook/api/{api_id}/notifications/", timeout=10 + method="GET", + endpoint=f"v1/webhook/api/{api_id}/notifications/", + params=params, + timeout=10, ) # _make_request already handles status codes and returns parsed data @@ -277,7 +295,7 @@ def trigger_api_notifications( logger.info(f"No active notifications found for API {api_id}") return - # Create notification payload using dataclass + counts = response_data.get("execution_counts") or {} payload = NotificationPayload.from_execution_status( pipeline_id=api_id, pipeline_name=api_name, @@ -286,6 +304,9 @@ def trigger_api_notifications( source=NotificationSource.CALLBACK_WORKER, execution_id=execution_id, error_message=error_message, + total_files=counts.get("total_files", 0), + successful_files=counts.get("successful_files", 0), + failed_files=counts.get("failed_files", 0), ) logger.info( From 9cd8eb1984891231e9cb1e673c0225be24534a55 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Tue, 5 May 2026 22:39:48 +0530 Subject: [PATCH 07/27] slack webhook payload --- .../tests/test_notification_filter.py | 321 ------------------ backend/pipeline_v2/dto.py | 14 +- .../core/src/unstract/core/data_models.py | 24 +- .../notification/providers/slack_webhook.py | 9 +- .../shared/patterns/notification/helper.py | 3 +- 5 files changed, 30 insertions(+), 341 deletions(-) delete mode 100644 backend/notification_v2/tests/test_notification_filter.py diff --git a/backend/notification_v2/tests/test_notification_filter.py b/backend/notification_v2/tests/test_notification_filter.py deleted file mode 100644 index 7f887be500..0000000000 --- a/backend/notification_v2/tests/test_notification_filter.py +++ /dev/null @@ -1,321 +0,0 @@ -"""Unit tests for the ``notify_on`` dispatch partition on notifications. - -Covers both dispatch paths that fan out to ``NotificationHelper.send_notification``: - -* ``APINotification.send`` — keyed on ``ExecutionStatus`` (ERROR, COMPLETED, STOPPED) -* ``PipelineNotification.send`` — keyed on ``Pipeline.PipelineStatus`` - (FAILURE, SUCCESS, INPROGRESS) - -Follows the repo convention (see ``usage_v2/tests/test_helper.py``) of stubbing -Django-heavy modules at import time so the tests run without a live DB. -""" - -from __future__ import annotations - -import sys -import types -from unittest.mock import MagicMock, patch - - -# --------------------------------------------------------------------------- -# Module-level stubs — must be installed BEFORE importing the modules under -# test so Django's ORM imports resolve to our MagicMock-backed fakes. -# --------------------------------------------------------------------------- - - -def _ensure_mod(name: str) -> types.ModuleType: - """Force-install a fresh stub module in ``sys.modules``.""" - mod = types.ModuleType(name) - sys.modules[name] = mod - return mod - - -def _install_stubs() -> None: - # Only stub leaf modules that pull in Django ORM. Parent packages - # (api_v2, pipeline_v2, notification_v2, workflow_manager*) load normally. - - exec_enums = _ensure_mod("workflow_manager.workflow_v2.enums") - - class _ExecStatusNS: - class ERROR: - value = "ERROR" - - class COMPLETED: - value = "COMPLETED" - - class STOPPED: - value = "STOPPED" - - exec_enums.ExecutionStatus = _ExecStatusNS # type: ignore[attr-defined] - - exec_models = _ensure_mod("workflow_manager.workflow_v2.models.execution") - exec_models.WorkflowExecution = MagicMock(name="WorkflowExecution") # type: ignore[attr-defined] - - api_models = _ensure_mod("api_v2.models") - api_models.APIDeployment = MagicMock(name="APIDeployment") # type: ignore[attr-defined] - - # notification_v2.models.Notification with a patchable ``objects``. - notif_models = _ensure_mod("notification_v2.models") - - class _FakeNotification: - objects = MagicMock(name="Notification.objects") - - notif_models.Notification = _FakeNotification # type: ignore[attr-defined] - - # notification_v2.helper.NotificationHelper - notif_helper = _ensure_mod("notification_v2.helper") - - class _FakeHelper: - send_notification = MagicMock(name="NotificationHelper.send_notification") - - notif_helper.NotificationHelper = _FakeHelper # type: ignore[attr-defined] - - # pipeline_v2.dto.PipelineStatusPayload - pipeline_dto = _ensure_mod("pipeline_v2.dto") - pipeline_dto.PipelineStatusPayload = MagicMock(name="PipelineStatusPayload") # type: ignore[attr-defined] - - # pipeline_v2.models.Pipeline with a PipelineStatus text-choices surface. - pipeline_models = _ensure_mod("pipeline_v2.models") - - class _PipelineStatus: - SUCCESS = "SUCCESS" - FAILURE = "FAILURE" - INPROGRESS = "INPROGRESS" - - class _FakePipeline: - PipelineStatus = _PipelineStatus - - pipeline_models.Pipeline = _FakePipeline # type: ignore[attr-defined] - - -_install_stubs() - - -# Now safe to import the modules under test. -from api_v2 import notification as api_notification_mod # noqa: E402 -from notification_v2.enums import NotificationTrigger # noqa: E402 -from notification_v2.helper import NotificationHelper # noqa: E402 -from notification_v2.models import Notification # noqa: E402 -from pipeline_v2 import notification as pipeline_notification_mod # noqa: E402 -from pipeline_v2.models import Pipeline # noqa: E402 - - -# --------------------------------------------------------------------------- -# Test helpers -# --------------------------------------------------------------------------- - - -def _make_queryset(notifications: list[MagicMock]) -> MagicMock: - """Return a MagicMock that mimics the chained QuerySet surface we use. - - Supports: - qs.filter(notify_on=) -> qs with matching rows - qs.exclude(notify_on=) -> qs with non-matching rows - qs.exists() -> bool based on contents - iter(qs) -> notifications - """ - qs = MagicMock(name="qs") - qs.__iter__ = lambda self: iter(notifications) - - def _filter(**kwargs): - if "notify_on" in kwargs: - target = kwargs["notify_on"] - kept = [n for n in notifications if n.notify_on == target] - return _make_queryset(kept) - return _make_queryset(notifications) - - def _exclude(**kwargs): - if "notify_on" in kwargs: - target = kwargs["notify_on"] - kept = [n for n in notifications if n.notify_on != target] - return _make_queryset(kept) - return _make_queryset(notifications) - - qs.filter.side_effect = _filter - qs.exclude.side_effect = _exclude - qs.exists.return_value = bool(notifications) - qs.count.return_value = len(notifications) - return qs - - -def _make_notification(*, notify_on: str) -> MagicMock: - n = MagicMock(name="Notification") - n.notify_on = notify_on - return n - - -# --------------------------------------------------------------------------- -# APINotification — 3 modes × 3 statuses -# --------------------------------------------------------------------------- - - -class TestAPINotificationFilter: - def _setup(self, *, status: str, notifications: list[MagicMock]): - Notification.objects.filter.reset_mock() - Notification.objects.filter.side_effect = None - Notification.objects.filter.return_value = _make_queryset(notifications) - NotificationHelper.send_notification.reset_mock() - - api = MagicMock(name="APIDeployment") - api.api_name = "test-api" - api.id = "api-uuid" - - execution = MagicMock(name="WorkflowExecution") - execution.status = status - execution.id = "exec-uuid" - execution.error_message = "boom" if status == "ERROR" else None - - return api_notification_mod.APINotification(api=api, workflow_execution=execution) - - # --- ALL: fires on every status --- - def test_all_fires_on_completed(self): - n = _make_notification(notify_on=NotificationTrigger.ALL.value) - self._setup(status="COMPLETED", notifications=[n]).send() - assert NotificationHelper.send_notification.call_count == 1 - - def test_all_fires_on_error(self): - n = _make_notification(notify_on=NotificationTrigger.ALL.value) - self._setup(status="ERROR", notifications=[n]).send() - assert NotificationHelper.send_notification.call_count == 1 - - def test_all_fires_on_stopped(self): - n = _make_notification(notify_on=NotificationTrigger.ALL.value) - self._setup(status="STOPPED", notifications=[n]).send() - assert NotificationHelper.send_notification.call_count == 1 - - # --- FAILURES_ONLY: fires on ERROR only --- - def test_failures_only_suppressed_on_completed(self): - n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) - self._setup(status="COMPLETED", notifications=[n]).send() - NotificationHelper.send_notification.assert_not_called() - - def test_failures_only_fires_on_error(self): - n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) - self._setup(status="ERROR", notifications=[n]).send() - assert NotificationHelper.send_notification.call_count == 1 - - def test_failures_only_suppressed_on_stopped(self): - n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) - self._setup(status="STOPPED", notifications=[n]).send() - NotificationHelper.send_notification.assert_not_called() - - # --- SUCCESS_ONLY: fires on COMPLETED only --- - def test_success_only_fires_on_completed(self): - n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) - self._setup(status="COMPLETED", notifications=[n]).send() - assert NotificationHelper.send_notification.call_count == 1 - - def test_success_only_suppressed_on_error(self): - n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) - self._setup(status="ERROR", notifications=[n]).send() - NotificationHelper.send_notification.assert_not_called() - - def test_success_only_suppressed_on_stopped(self): - n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) - self._setup(status="STOPPED", notifications=[n]).send() - NotificationHelper.send_notification.assert_not_called() - - # --- Mixed partition on a COMPLETED run: ALL + SUCCESS_ONLY fire, FAILURES_ONLY doesn't --- - def test_mixed_partition_on_completed(self): - all_mode = _make_notification(notify_on=NotificationTrigger.ALL.value) - failures_only = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) - success_only = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) - notifier = self._setup( - status="COMPLETED", notifications=[all_mode, failures_only, success_only] - ) - with patch.object(api_notification_mod, "PipelineStatusPayload") as payload_cls: - payload_cls.return_value.to_dict.return_value = {} - notifier.send() - - assert NotificationHelper.send_notification.call_count == 1 - kwargs = NotificationHelper.send_notification.call_args.kwargs - dispatched = sorted(n.notify_on for n in kwargs["notifications"]) - assert dispatched == ["ALL", "SUCCESS_ONLY"] - - -# --------------------------------------------------------------------------- -# PipelineNotification — 3 modes × 3 statuses -# --------------------------------------------------------------------------- - - -class TestPipelineNotificationFilter: - def _setup(self, *, last_run_status: str, notifications: list[MagicMock]): - Notification.objects.filter.reset_mock() - Notification.objects.filter.side_effect = None - Notification.objects.filter.return_value = _make_queryset(notifications) - NotificationHelper.send_notification.reset_mock() - - pipeline = MagicMock(name="Pipeline") - pipeline.id = "pipeline-uuid" - pipeline.pipeline_name = "test-pipeline" - pipeline.pipeline_type = "ETL" - pipeline.last_run_status = last_run_status - - return pipeline_notification_mod.PipelineNotification( - pipeline=pipeline, execution_id="exec-uuid", error_message=None - ) - - # --- ALL --- - def test_all_fires_on_success(self): - n = _make_notification(notify_on=NotificationTrigger.ALL.value) - self._setup( - last_run_status=Pipeline.PipelineStatus.SUCCESS, notifications=[n] - ).send() - assert NotificationHelper.send_notification.call_count == 1 - - def test_all_fires_on_failure(self): - n = _make_notification(notify_on=NotificationTrigger.ALL.value) - self._setup( - last_run_status=Pipeline.PipelineStatus.FAILURE, notifications=[n] - ).send() - assert NotificationHelper.send_notification.call_count == 1 - - # --- FAILURES_ONLY --- - def test_failures_only_suppressed_on_success(self): - n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) - self._setup( - last_run_status=Pipeline.PipelineStatus.SUCCESS, notifications=[n] - ).send() - NotificationHelper.send_notification.assert_not_called() - - def test_failures_only_fires_on_failure(self): - n = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) - self._setup( - last_run_status=Pipeline.PipelineStatus.FAILURE, notifications=[n] - ).send() - assert NotificationHelper.send_notification.call_count == 1 - - # --- SUCCESS_ONLY --- - def test_success_only_fires_on_success(self): - n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) - self._setup( - last_run_status=Pipeline.PipelineStatus.SUCCESS, notifications=[n] - ).send() - assert NotificationHelper.send_notification.call_count == 1 - - def test_success_only_suppressed_on_failure(self): - n = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) - self._setup( - last_run_status=Pipeline.PipelineStatus.FAILURE, notifications=[n] - ).send() - NotificationHelper.send_notification.assert_not_called() - - # --- Mixed partition on a SUCCESS run --- - def test_mixed_partition_on_success(self): - all_mode = _make_notification(notify_on=NotificationTrigger.ALL.value) - failures_only = _make_notification(notify_on=NotificationTrigger.FAILURES_ONLY.value) - success_only = _make_notification(notify_on=NotificationTrigger.SUCCESS_ONLY.value) - notifier = self._setup( - last_run_status=Pipeline.PipelineStatus.SUCCESS, - notifications=[all_mode, failures_only, success_only], - ) - with patch.object( - pipeline_notification_mod, "PipelineStatusPayload" - ) as payload_cls: - payload_cls.return_value.to_dict.return_value = {} - notifier.send() - - assert NotificationHelper.send_notification.call_count == 1 - kwargs = NotificationHelper.send_notification.call_args.kwargs - dispatched = sorted(n.notify_on for n in kwargs["notifications"]) - assert dispatched == ["ALL", "SUCCESS_ONLY"] diff --git a/backend/pipeline_v2/dto.py b/backend/pipeline_v2/dto.py index b8ad7c8707..d0f27e7943 100644 --- a/backend/pipeline_v2/dto.py +++ b/backend/pipeline_v2/dto.py @@ -25,15 +25,21 @@ def __init__( self.failed_files = failed_files def to_dict(self) -> dict[str, Any]: - """Convert the payload DTO to a dictionary.""" + """Convert the payload DTO to a dictionary. + + File counts are nested in `additional_data` to match the worker-path + payload shape (NotificationPayload.from_execution_status). + """ payload: dict[str, Any] = { "type": self.type, "pipeline_id": str(self.pipeline_id), "pipeline_name": self.pipeline_name, "status": self.status, - "total_files": self.total_files or 0, - "successful_files": self.successful_files or 0, - "failed_files": self.failed_files or 0, + "additional_data": { + "total_files": self.total_files or 0, + "successful_files": self.successful_files or 0, + "failed_files": self.failed_files or 0, + }, } if self.execution_id: payload["execution_id"] = str(self.execution_id) diff --git a/unstract/core/src/unstract/core/data_models.py b/unstract/core/src/unstract/core/data_models.py index 82da16a72e..7642aa9ce1 100644 --- a/unstract/core/src/unstract/core/data_models.py +++ b/unstract/core/src/unstract/core/data_models.py @@ -510,14 +510,10 @@ class NotificationPayload: error_message: str | None = None organization_id: str | None = None - # Per-run file aggregates surfaced into webhook payloads. - # Default 0 lets receivers switch on a numeric value without None-checks. - total_files: int = 0 - successful_files: int = 0 - failed_files: int = 0 - # Metadata timestamp: datetime = field(default_factory=lambda: datetime.now(UTC)) + # Per-run file aggregates (total/successful/failed) are nested here + # so receivers see them grouped rather than as top-level keys. additional_data: dict[str, Any] = field(default_factory=dict) # Internal tracking (not sent to external webhooks) @@ -608,6 +604,17 @@ def from_execution_status( f"Cannot create notification for non-final status: {execution_status}" ) + # File counts are bundled inside additional_data so receivers see + # them grouped (e.g. Slack renders one "Additional Data" section). + # Caller-supplied additional_data takes precedence on key conflict. + merged_additional = { + "total_files": total_files, + "successful_files": successful_files, + "failed_files": failed_files, + } + if additional_data: + merged_additional.update(additional_data) + return cls( type=workflow_type, pipeline_id=pipeline_id, @@ -616,10 +623,7 @@ def from_execution_status( execution_id=execution_id, error_message=error_message, organization_id=organization_id, - total_files=total_files, - successful_files=successful_files, - failed_files=failed_files, - additional_data=additional_data or {}, + additional_data=merged_additional, _source=source, ) diff --git a/workers/notification/providers/slack_webhook.py b/workers/notification/providers/slack_webhook.py index 89206646a5..04e3532fa3 100644 --- a/workers/notification/providers/slack_webhook.py +++ b/workers/notification/providers/slack_webhook.py @@ -199,11 +199,10 @@ def _format_value(self, value: Any) -> str: elif isinstance(value, (list, tuple)): return "\n• " + "\n• ".join(str(item) for item in value) elif isinstance(value, dict): - # Format nested dictionary - items = [] - for k, v in value.items(): - items.append(f" • {self._format_key(k)}: {v}") - return "\n" + "\n".join(items) + # Inline {Key: Value, Key: Value} so the receiver sees the + # whole dict on one line instead of a bulleted block. + items = [f"{self._format_key(k)}: {v}" for k, v in value.items()] + return "{" + ", ".join(items) + "}" elif value is None: return "_Not specified_" else: diff --git a/workers/shared/patterns/notification/helper.py b/workers/shared/patterns/notification/helper.py index c4f4c5d518..19f78fbb99 100644 --- a/workers/shared/patterns/notification/helper.py +++ b/workers/shared/patterns/notification/helper.py @@ -5,6 +5,7 @@ """ import logging +from typing import Any from celery import current_app @@ -335,7 +336,7 @@ def trigger_api_notifications( def handle_status_notifications( - api_client, + api_client: Any, pipeline_id: str, status: str, execution_id: str | None = None, From aad0fa99bc5f1e3b6ad547f68207c381594f830f Mon Sep 17 00:00:00 2001 From: Kirtiman Mishra <110175055+kirtimanmishrazipstack@users.noreply.github.com> Date: Thu, 7 May 2026 23:49:29 +0530 Subject: [PATCH 08/27] Uns 611 clubbed notification dispatch (#1951) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * UN-3439 [FIX] Accept wildcard subdomain origins in SocketIO and Django CORS (#1938) * UN-3439 [FIX] Accept wildcard subdomain origins in SocketIO and Django CORS Production socket connections were failing for `*.env.us-central.unstract.com` because python-socketio does exact-string comparison on `cors_allowed_origins`, so a literal `*` pattern silently rejected every real subdomain. - Add `CORS_ALLOWED_ORIGIN_REGEXES` derived from `WEB_APP_ORIGIN_URL_WITH_WILD_CARD`. - Wire SocketIO via `_RegexOrigin` whose `__eq__` does the regex match — single list entry covers all wildcard subdomains, no library subclass needed. - Normalize `WEB_APP_ORIGIN_URL` through `urlparse` so trailing slashes / paths in env are stripped (also fixes the `…com//oauth-status/` double-slash). - Add startup guard for malformed env values. Resolves item #1 of UN-3439. Items #2/#3 (decoupling indexing from Socket.io, fallback) are owned separately. Co-Authored-By: Claude Opus 4.7 (1M context) * UN-3439 [FIX] Address PR review: canonical origin, fullmatch, unhashable RegexOrigin, tests Addresses five review comments on #1938: 1. coderabbitai (Major) — RFC 6454 canonicalization. Browsers serialize `Origin` headers with a lowercase host and no explicit default ports; `parsed_url.netloc` preserved both, so `https://APP.EXAMPLE.COM:443` would silently fail to match the browser's `https://app.example.com`. Switch to `parsed_url.hostname` + drop default ports, and reject non-http(s) schemes at startup. 2. greptile (P2) — `re.fullmatch` instead of `re.match`. With `re.match` plus `$`, a candidate ending in `\n` matches because `$` is allowed before an optional trailing newline. `fullmatch` removes the ambiguity. 3. self — `_RegexOrigin.__hash__` violated `a == b ⇒ hash(a) == hash(b)` (one fixed pattern hash vs. many matching strings). Today this is masked because python-socketio uses linear `__eq__` on a list, but if the allow-list is ever wrapped in a set, every legitimate subdomain would silently be rejected — exactly the failure mode UN-3439 closes. Make instances unhashable so the contract can't be broken. 4. self — No regression tests. Add `backend/utils/tests/test_cors_origin.py` (33 cases) covering: regex match/no-match, lookalike spoofing, scheme mismatch, trailing-newline rejection, non-string equality protocol, unhashability, ReDoS bounds, URL normalization (case, default ports, trailing slash, paths, queries), startup-guard rejections (empty, no-scheme, non-browser-scheme, no-host), and end-to-end via the same `RegexOrigin` path SocketIO uses. 5. self — Over-clever wildcard-to-regex builder. The `split('*').join(re.escape, ...)` construction generalised to N wildcards but the input has exactly one; replace with a direct rf-string that's self-evident on review. Refactor for testability: extract `RegexOrigin` and `normalize_web_app_origin` into `backend/utils/cors_origin.py` (Django-free, importable from settings and tests). Settings now delegates to one helper call; `log_events.py` imports `RegexOrigin`. No behavioural change beyond what each comment fixes. Co-Authored-By: Claude Opus 4.7 (1M context) * UN-3439 [FIX] Address SonarCloud quality gate The Sonar quality gate failed with C reliability + 5 security hotspots, all on the new test file: - S905 (Bug, Major) — `{ro}` flagged as no-side-effect statement (Sonar doesn't see the implicit `__hash__` call). Drove the C reliability rating. Fix: use `len({ro})` so the side effect is via an explicit function call; test still asserts the same `TypeError`. - S5727 (Code Smell, Critical) — `assert ro != None` is tautological and doesn't exercise `__eq__`. Switch to `(ro == None) is False` which directly tests that `NotImplemented` falls back to identity-equality. - S5332 × 5 (Hotspots) — `http://` and `ftp://` literals in test data. These are intentional inputs proving the rejection logic. Annotate with `# NOSONAR` and an explanatory comment so the hotspots can be marked reviewed. No production code changed; tests still 33/33 passing. Co-Authored-By: Claude Opus 4.7 (1M context) * UN-3439 [FIX] Remove last S5727 code smell — test __eq__ via dunder Sonar S5727 correctly inferred that ``ro == None`` is statically always False (NotImplemented falls back to identity), making the assertion look tautological. The intent is to lock the protocol contract: ``__eq__`` must return the ``NotImplemented`` sentinel for non-strings. Test that directly via ``ro.__eq__(None) is NotImplemented`` instead of going through ``==``. Co-Authored-By: Claude Opus 4.7 (1M context) * UN-3439 [FIX] Address remaining CodeRabbit nits — port validation, ReDoS bound Two minor follow-ups from the second CodeRabbit pass: - `parsed.port` is a property that raises ValueError on malformed/out-of-range inputs (e.g. `:abc`, `:99999`). That bypassed our normalized config-error message and surfaced as a stack trace. Wrap the access and re-raise with the same actionable text. Adds two test cases (`https://example.com:abc`, `https://example.com:99999`) to lock the new behaviour. - The 50ms ReDoS timing bound is too tight for noisy CI runners. Loosen to 500ms — still orders of magnitude below what catastrophic backtracking would produce. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) * ReverseMerge: V0.161.4 hotfix (#1943) * Change csp to report only * [HOTFIX] Bool-parse ENABLE_HIGHLIGHT_API_DEPLOYMENT env var (v0.161.4) (#1939) [HOTFIX] Bool-parse ENABLE_HIGHLIGHT_API_DEPLOYMENT env var (#1937) [FIX] Bool-parse ENABLE_HIGHLIGHT_API_DEPLOYMENT env var os.environ.get returns the raw string when the variable is set, so ENABLE_HIGHLIGHT_API_DEPLOYMENT="False" was truthy in Python (any non-empty string is truthy). Wrap in CommonUtils.str_to_bool so "False" / "false" / "0" actually evaluate to False. The setting is consumed by the cloud configuration plugin's spec default (ConfigSpec.default in plugins/configuration/cloud_config.py) on cloud and on-prem builds. With this fix, an admin who explicitly sets the env var to a falsy string sees highlight data stripped as expected. Co-authored-by: vishnuszipstack <117254672+vishnuszipstack@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) --------- Co-authored-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com> Co-authored-by: vishnuszipstack <117254672+vishnuszipstack@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) * UN-3448 [FIX] Remove vestigial `uv pip install` line in uv-lock-automation workflow (#1941) * UN-3448 [FIX] Add --system flag to uv pip install in uv-lock-automation workflow Modern uv requires uv pip install to run inside a virtual environment OR with the explicit --system flag. The workflow currently has neither, so it errors out: error: No virtual environment found for Python 3.12.9; run `uv venv` to create an environment, or pass `--system` to install into a non-virtual environment This breaks every PR that touches a pyproject.toml (the workflow's paths filter triggers on those). Last successful run was 2026-04-01, before a behaviour change in uv or astral-sh/setup-uv@v7. The --system flag is exactly what the error message suggests and is correct here — we install pip into the runner's system Python; the downstream uv-lock.sh script creates its own venvs as needed. Co-Authored-By: Claude Opus 4.7 (1M context) * UN-3448 [FIX] Remove vestigial `uv pip install` line per review Per @jaseemjaskp's review: the pre-step `uv pip install ... pip` does nothing useful for this workflow. The downstream uv-lock.sh script uses uv sync at line 74, which manages its own venvs internally and never invokes pip directly: $ grep -rn 'pip' docker/scripts/uv-lock-gen/ docker/scripts/uv-lock-gen/uv-lock.sh:2:set -o pipefail Only match is pipefail (shell option), no real pip references. Removing the line entirely is cleaner than papering over with --system. The line was likely copy-pasted from a sibling workflow that legitimately needed pip in the system Python. Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) * ReverseMerge: V0.163.2 hotfix (#1946) * [HOTFIX] Use importlib.util.find_spec for pluggable worker discovery (#1918) * [FIX] Use importlib.util.find_spec for pluggable worker discovery _verify_pluggable_worker_exists() previously checked for the literal file `pluggable_worker//worker.py` on disk, which breaks when the plugin has been compiled to a .so (Nuitka, Cython, or any C extension) — the module is perfectly importable but the pre-check rejects it because only the .py extension is considered. Replace the filesystem check with importlib.util.find_spec(), which is Python's standard way to ask "is this module resolvable by the import system?". It honors every registered finder — source .py, compiled .so, bytecode .pyc, namespace packages, zipimports — so the function now matches what its docstring claims: verifying the module can be loaded, not that a specific file extension is present. Behavior is preserved for existing deployments: - Images with no `pluggable_worker//` subpackage → find_spec raises ModuleNotFoundError (ImportError subclass) → returns False. - Images with source .py → find_spec resolves the .py → returns True. - Images with compiled .so → find_spec resolves the .so → returns True. Co-Authored-By: Claude Opus 4.7 (1M context) * [FIX] Handle ValueError from find_spec in pluggable worker verification Greptile-flagged edge case: importlib.util.find_spec() can raise ValueError (not just ImportError) when sys.modules has a partially initialised module entry with __spec__ = None from a prior failed import. Broaden the except to catch both. Co-Authored-By: Claude Opus 4.7 (1M context) * [FIX] Resolve api-deployment worker directory from enum import path worker.py:452 did worker_type.value.replace("-", "_") to derive the on-disk dir name. All WorkerType enum values already use underscores, so the replace was a no-op; for API_DEPLOYMENT whose dir is "api-deployment" (hyphen), it resolved to "api_deployment" and the os.path.exists() check failed. Boot then logged a spurious "❌ Worker directory not found: /app/api_deployment" at ERROR level. The task registration path (builder + celery autodiscover via to_import_path) is unaffected, so this was purely log noise — but noise at ERROR level that masks real failures in log scans. Fix: derive the directory from the authoritative to_import_path() which already handles the hyphen case (api_deployment -> api-deployment). Co-Authored-By: Claude Opus 4.7 (1M context) --------- Co-authored-by: Claude Opus 4.7 (1M context) * [HOTFIX] Add IAM Role / Instance Profile auth mode to AWS Bedrock adapter (#1944) * [FEAT] Allow Bedrock to fall through to boto3's default credential chain Match the S3/MinIO connector pattern: when AWS access keys are left blank on the Bedrock LLM and embedding adapter forms, drop them from the kwargs dict so boto3's default credential chain handles authentication. This unlocks IAM role / instance profile / IRSA / AWS Profile scenarios on hosts that already have ambient AWS credentials (e.g. EKS workers with IRSA, EC2 with an instance profile). - llm1/static/bedrock.json: clarify access-key descriptions to mention IRSA and instance profile (already non-required at v0.163.2 base). - embedding1/static/bedrock.json: drop aws_access_key_id and aws_secret_access_key from top-level required; same description fix; expose aws_profile_name for parity with the LLM form. - base1.py: AWSBedrockLLMParameters and AWSBedrockEmbeddingParameters now strip empty access-key values from the validated kwargs before returning, so empty strings don't override boto3's default chain. AWSBedrockEmbeddingParameters fields gain explicit None defaults and an aws_profile_name field. Backward-compatible: existing adapters with access keys filled in continue to work unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) * [FEAT] Add Authentication Type selector to Bedrock adapter form Add an explicit `auth_type` selector with two options, making the auth choice clear to users: - "Access Keys" (default): existing flow, keys required - "IAM Role / Instance Profile (on-prem AWS only)": no fields; relies on boto3's default credential chain (IRSA on EKS, task role on ECS, instance profile on EC2). Description on the selector explicitly notes this option is only for AWS-hosted Unstract deployments. The form-only auth_type field is stripped before LiteLLM validation in both AWSBedrockLLMParameters.validate() and AWSBedrockEmbeddingParameters. validate(). Empty access keys continue to be stripped so boto3 falls through to the default chain even when the access_keys arm is selected without values (matches the S3/MinIO connector pattern). Backward-compatible: legacy adapters without auth_type behave as "Access Keys" mode (the default), and existing keys are forwarded unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) * [REVIEW] Address Bedrock auth_type review feedback Fixes the P0/P1 issues raised by greptile-apps and jaseemjaskp on PR #1944. Behaviour fixes: - Stale-key leak in IAM Role mode: switching an existing adapter from Access Keys to IAM Role would carry truthy stored access keys through the strip-empty-only loop, so boto3 silently authenticated with the old long-lived credentials instead of falling through to the host's IRSA / instance-profile identity. Both LLM and embedding paths were affected. - Silent acceptance of unknown auth_type: a typo (e.g. "access_key") or a malformed payload from a non-UI client passed through the dict comprehension untouched, with no enum guard. - Cross-field validation gap: explicit Access Keys mode with blank or whitespace-only values silently fell through to the default credential chain instead of surfacing the misconfiguration. Implementation: - Add a module-level _resolve_bedrock_aws_credentials helper used by both AWSBedrockLLMParameters.validate() and AWSBedrock EmbeddingParameters.validate(), so the auth-type contract is expressed once. - Validates auth_type against an allowlist (None | "access_keys" | "iam_role"); raises ValueError on anything else. - iam_role: unconditionally drops aws_access_key_id and aws_secret_access_key. - access_keys (explicit): requires non-blank values; raises ValueError if either is empty or whitespace-only. - Legacy (auth_type absent): retains the lenient strip behaviour so pre-PR adapter configurations continue to deserialise unchanged. - Restore aws_region_name as required (no `= None` default) on AWSBedrockEmbeddingParameters; only credentials may legitimately be absent. - Drop the orphan aws_profile_name field from embedding1/static/bedrock.json: it was added for parity with the LLM form but lives outside the auth_type oneOf and contradicts the selector's "no further input" semantics. The LLM form already had aws_profile_name pre-PR and is left alone for backwards compatibility. Tests: - New tests/test_bedrock_adapter.py covers 15 cases across LLM and embedding adapters: legacy-no-auth-type, explicit access_keys with valid/blank/whitespace keys, iam_role with stale/no keys, unknown auth_type rejection, cross-field validation, and preservation of unrelated params (model_id, aws_profile_name, region, thinking). Skipped (P2 nice-to-have): - Comment-scope clarification, MinIO reference rewording, validate-mutates-caller'\''s-dict, and the LLM form description nit about aws_profile_name visibility. These don'\''t change behaviour and can be addressed in a follow-up. Co-Authored-By: Claude Opus 4.7 (1M context) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Claude Opus 4.7 (1M context) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --------- Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Athul <89829560+athul-rs@users.noreply.github.com> * batch notification --------- Co-authored-by: ali <117142933+muhammad-ali-e@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) Co-authored-by: Ritwik G <100672805+ritwik-g@users.noreply.github.com> Co-authored-by: Deepak K <89829542+Deepak-Kesavan@users.noreply.github.com> Co-authored-by: vishnuszipstack <117254672+vishnuszipstack@users.noreply.github.com> Co-authored-by: Praveen Kumar Co-authored-by: Chandrasekharan M <117059509+chandrasekharan-zipstack@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Athul <89829560+athul-rs@users.noreply.github.com> --- .github/workflows/uv-lock-automation.yaml | 2 - backend/api_v2/notification.py | 8 +- backend/backend/settings/base.py | 30 +- backend/configuration/enums.py | 6 + backend/notification_v2/clubbed_renderer.py | 138 +++++++++ backend/notification_v2/enums.py | 34 +++ backend/notification_v2/helper.py | 228 +++++++++++++-- backend/notification_v2/internal_api_views.py | 265 +++++++++++++++++- .../notification_v2/internal_serializers.py | 1 + backend/notification_v2/internal_urls.py | 11 + .../0003_add_notification_buffer.py | 148 ++++++++++ backend/notification_v2/models.py | 109 ++++++- backend/notification_v2/serializers.py | 15 +- backend/notification_v2/tasks.py | 51 ++++ backend/notification_v2/tests/__init__.py | 0 backend/notification_v2/urls.py | 9 +- backend/notification_v2/views.py | 60 +++- backend/pipeline_v2/notification.py | 9 +- backend/utils/cors_origin.py | 81 ++++++ backend/utils/log_events.py | 8 +- backend/utils/tests/test_cors_origin.py | 197 +++++++++++++ .../notification-modal/CreateNotification.jsx | 63 ++++- .../settings/platform/PlatformSettings.jsx | 95 ++++++- .../sdk1/src/unstract/sdk1/adapters/base1.py | 93 +++++- .../adapters/embedding1/static/bedrock.json | 68 ++++- .../sdk1/adapters/llm1/static/bedrock.json | 66 ++++- unstract/sdk1/tests/test_bedrock_adapter.py | 246 ++++++++++++++++ .../process_notification_buffer.py | 83 ++++++ workers/log_consumer/scheduler.sh | 48 ++-- .../shared/patterns/notification/helper.py | 129 ++++++--- 30 files changed, 2168 insertions(+), 133 deletions(-) create mode 100644 backend/notification_v2/clubbed_renderer.py create mode 100644 backend/notification_v2/migrations/0003_add_notification_buffer.py create mode 100644 backend/notification_v2/tasks.py delete mode 100644 backend/notification_v2/tests/__init__.py create mode 100644 backend/utils/cors_origin.py create mode 100644 backend/utils/tests/test_cors_origin.py create mode 100644 unstract/sdk1/tests/test_bedrock_adapter.py create mode 100755 workers/log_consumer/process_notification_buffer.py diff --git a/.github/workflows/uv-lock-automation.yaml b/.github/workflows/uv-lock-automation.yaml index 8cfc2f9ca2..52c3364386 100644 --- a/.github/workflows/uv-lock-automation.yaml +++ b/.github/workflows/uv-lock-automation.yaml @@ -36,8 +36,6 @@ jobs: version: "0.6.14" python-version: 3.12.9 - - run: uv pip install --python=3.12.9 pip - - name: Generate UV lockfiles env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/backend/api_v2/notification.py b/backend/api_v2/notification.py index 57e810c0e9..4cefe92d24 100644 --- a/backend/api_v2/notification.py +++ b/backend/api_v2/notification.py @@ -1,6 +1,6 @@ import logging -from notification_v2.helper import NotificationHelper +from notification_v2.helper import dispatch_with_delivery_mode from notification_v2.models import Notification from pipeline_v2.dto import PipelineStatusPayload from workflow_manager.workflow_v2.enums import ExecutionStatus @@ -60,6 +60,8 @@ def send(self) -> None: failed_files=failed_files, ) - NotificationHelper.send_notification( - notifications=self.notifications, payload=payload_dto.to_dict() + dispatch_with_delivery_mode( + list(self.notifications), + payload_dto.to_dict(), + error_context=f"api={self.api.id}", ) diff --git a/backend/backend/settings/base.py b/backend/backend/settings/base.py index 2d493a91c3..f20109203f 100644 --- a/backend/backend/settings/base.py +++ b/backend/backend/settings/base.py @@ -12,11 +12,12 @@ import logging import os from pathlib import Path -from urllib.parse import quote, urlparse +from urllib.parse import quote import httpx from dotenv import find_dotenv, load_dotenv from utils.common_utils import CommonUtils +from utils.cors_origin import normalize_web_app_origin missing_settings = [] @@ -68,10 +69,18 @@ def get_required_setting(setting_key: str, default: str | None = None) -> str | ) # Maximum number of files allowed per workflow page execution WORKFLOW_PAGE_MAX_FILES = int(os.environ.get("WORKFLOW_PAGE_MAX_FILES", 2)) -WEB_APP_ORIGIN_URL = os.environ.get("WEB_APP_ORIGIN_URL", "http://localhost:3000") -parsed_url = urlparse(WEB_APP_ORIGIN_URL) -WEB_APP_ORIGIN_URL_WITH_WILD_CARD = f"{parsed_url.scheme}://*.{parsed_url.netloc}" +( + WEB_APP_ORIGIN_URL, + WEB_APP_ORIGIN_URL_WITH_WILD_CARD, + _CORS_SUBDOMAIN_REGEX, +) = normalize_web_app_origin( + os.environ.get("WEB_APP_ORIGIN_URL", "http://localhost:3000") +) CORS_ALLOWED_ORIGINS = [WEB_APP_ORIGIN_URL] +# Wildcard subdomain regex consumed by django-cors-headers and (via the +# RegexOrigin wrapper in utils/log_events.py) by the SocketIO engine.io +# handshake — a single source of truth so HTTP and WS CORS cannot diverge. +CORS_ALLOWED_ORIGIN_REGEXES = [_CORS_SUBDOMAIN_REGEX] DJANGO_APP_BACKEND_URL = os.environ.get("DJANGO_APP_BACKEND_URL", "http://localhost:8000") INTERNAL_SERVICE_API_KEY = os.environ.get("INTERNAL_SERVICE_API_KEY") @@ -210,6 +219,15 @@ def get_required_setting(setting_key: str, default: str | None = None) -> str | INDEXING_FLAG_TTL = int(get_required_setting("INDEXING_FLAG_TTL")) NOTIFICATION_TIMEOUT = int(get_required_setting("NOTIFICATION_TIMEOUT", "5")) +# Window for clubbing BATCHED notifications — also the flush cadence (seconds). +# Default 1800 (30 min). Per-notification buffer rows precompute flush_after at +# enqueue time, so changing this only affects rows enqueued after the restart. +NOTIFICATION_CLUB_INTERVAL = int(os.environ.get("NOTIFICATION_CLUB_INTERVAL", "1800")) +# Retention for terminal NotificationBuffer rows (DISPATCHED / DEAD_LETTER). +# PENDING rows are never GC'd regardless of age. +NOTIFICATION_BUFFER_RETENTION_DAYS = int( + os.environ.get("NOTIFICATION_BUFFER_RETENTION_DAYS", "7") +) ATOMIC_REQUESTS = CommonUtils.str_to_bool( os.environ.get("DJANGO_ATOMIC_REQUESTS", "False") ) @@ -689,4 +707,6 @@ def filter(self, record): ) raise ValueError(ERROR_MESSAGE) -ENABLE_HIGHLIGHT_API_DEPLOYMENT = os.environ.get("ENABLE_HIGHLIGHT_API_DEPLOYMENT", False) +ENABLE_HIGHLIGHT_API_DEPLOYMENT = CommonUtils.str_to_bool( + os.environ.get("ENABLE_HIGHLIGHT_API_DEPLOYMENT", "False") +) diff --git a/backend/configuration/enums.py b/backend/configuration/enums.py index f35033a9f7..a11a0b58e7 100644 --- a/backend/configuration/enums.py +++ b/backend/configuration/enums.py @@ -61,6 +61,12 @@ class ConfigKey(Enum): max_value=settings.MAX_PARALLEL_FILE_BATCHES_MAX_VALUE, ) + NOTIFICATION_CLUB_INTERVAL = ConfigSpec( + default=settings.NOTIFICATION_CLUB_INTERVAL, + value_type=ConfigType.INT, + help_text="Window (seconds) for clubbing BATCHED notifications.", + ) + def cast_value(self, raw_value: Any): converters = { ConfigType.INT: int, diff --git a/backend/notification_v2/clubbed_renderer.py b/backend/notification_v2/clubbed_renderer.py new file mode 100644 index 0000000000..6fd0de0a84 --- /dev/null +++ b/backend/notification_v2/clubbed_renderer.py @@ -0,0 +1,138 @@ +"""Clubbed notification renderer. + +Builds one canonical JSON envelope from a group of buffered execution events +and emits the platform-appropriate dispatch payload. Stays separate from the +single-event SlackWebhook / APIWebhook providers so immediate-dispatch behavior +stays untouched. + +Envelope shape (always the same — single-event groups use this too so consumers +never need to branch on "is this batched?"): + + { + "kind": "batch", + "summary": { + "pipeline": "", + "interval_minutes": 30, + "total": N, "succeeded": S, "failed": F + }, + "events": [{"execution_id": ..., "status": ..., "error": ...?}, ...] + } +""" + +from __future__ import annotations + +import logging +from typing import Any + +from notification_v2.enums import PlatformType + +logger = logging.getLogger(__name__) + +# Hard cap on events per dispatch — extras roll over to the next flush tick. +# Bounds memory + payload size and prevents a runaway backlog from creating an +# unbounded HTTP body. +MAX_BATCH_SIZE = 500 +# How many events Slack renders inline before collapsing the rest under a +# "… and K more" footer. Slack tolerates much larger payloads, but readability +# tanks past ~25 lines. +SLACK_MAX_DISPLAY_EVENTS = 25 + +_SUCCESS_STATUSES = {"COMPLETED", "SUCCESS"} + + +def _is_success(status: str | None) -> bool: + if not status: + return False + return status.upper() in _SUCCESS_STATUSES + + +def _event_from_payload(payload: dict[str, Any]) -> dict[str, Any]: + event: dict[str, Any] = { + "execution_id": payload.get("execution_id"), + "status": payload.get("status"), + } + error_message = payload.get("error_message") + if error_message: + event["error"] = error_message + return event + + +def build_envelope( + payloads: list[dict[str, Any]], interval_seconds: int +) -> dict[str, Any]: + """Build the canonical batch envelope. + + Caps the events list at MAX_BATCH_SIZE; oldest-first ordering is the + caller's responsibility (the flush job sorts by created_at). + """ + capped = payloads[:MAX_BATCH_SIZE] + succeeded = sum(1 for p in capped if _is_success(p.get("status"))) + failed = len(capped) - succeeded + # Multiple pipelines can share an (org, url, auth_sig) group; we surface + # the first one's name as a representative. Mixed-pipeline batches are + # rare in practice and a v2 enhancement would aggregate distinct names. + pipeline_name = capped[0].get("pipeline_name") if capped else None + return { + "kind": "batch", + "summary": { + "pipeline": pipeline_name, + "interval_minutes": max(1, interval_seconds // 60), + "total": len(capped), + "succeeded": succeeded, + "failed": failed, + }, + "events": [_event_from_payload(p) for p in capped], + } + + +def _slack_event_line(event: dict[str, Any]) -> str: + parts = [f"— {event.get('execution_id') or 'unknown'}: {event.get('status')}"] + if event.get("error"): + parts.append(f"({event['error']})") + return " ".join(parts) + + +def render_for_slack(envelope: dict[str, Any]) -> dict[str, Any]: + """Format the envelope as a Slack-compatible payload dict. + + Returns the body shape Slack incoming webhooks expect (`text` field with + mrkdwn). Truncates inline events at SLACK_MAX_DISPLAY_EVENTS. + """ + summary = envelope["summary"] + events: list[dict[str, Any]] = envelope["events"] + pipeline = summary.get("pipeline") or "pipeline" + + header = f"*[Unstract] {summary['total']} executions for `{pipeline}`*" + counts = f"✅ {summary['succeeded']} succeeded ❌ {summary['failed']} failed" + + visible = events[:SLACK_MAX_DISPLAY_EVENTS] + lines = [_slack_event_line(e) for e in visible] + overflow = len(events) - len(visible) + if overflow > 0: + lines.append(f"… and {overflow} more executions") + + body = "\n".join([header, counts, *lines]) + return {"text": body} + + +def render_clubbed_message( + payloads: list[dict[str, Any]], platform: str, interval_seconds: int +) -> dict[str, Any]: + """Top-level entry point — returns the dispatch body for ``platform``. + + Slack receives the rendered text payload; raw API webhooks receive the + canonical envelope unchanged so downstream consumers can parse it + programmatically. + """ + envelope = build_envelope(payloads, interval_seconds) + if platform == PlatformType.SLACK.value: + return render_for_slack(envelope) + if platform == PlatformType.API.value: + return envelope + # Unknown platform — fall back to the raw envelope and warn so misrouted + # rows don't drop silently. + logger.warning( + "Unknown platform %s for clubbed dispatch; returning raw envelope", + platform, + ) + return envelope diff --git a/backend/notification_v2/enums.py b/backend/notification_v2/enums.py index 991b08cac9..d6fed8b485 100644 --- a/backend/notification_v2/enums.py +++ b/backend/notification_v2/enums.py @@ -36,3 +36,37 @@ class PlatformType(Enum): @classmethod def choices(cls): return [(e.value, e.name.replace("_", " ").capitalize()) for e in cls] + + +class DeliveryMode(Enum): + """Per-notification dispatch mode. + + IMMEDIATE fires on every workflow completion (pre-existing behavior). + BATCHED buffers events into NotificationBuffer and flushes them as one + clubbed message per (org, webhook_url, auth_sig) every + NOTIFICATION_CLUB_INTERVAL seconds. + """ + + IMMEDIATE = "IMMEDIATE" + BATCHED = "BATCHED" + + @classmethod + def choices(cls): + return [(e.value, e.name.replace("_", " ").capitalize()) for e in cls] + + +class BufferStatus(Enum): + """Lifecycle states for a NotificationBuffer row. + + PENDING — waiting for the next flush tick. + DISPATCHED — successfully sent as part of a clubbed message. + DEAD_LETTER — Celery exhausted retries; terminal, never re-picked. + """ + + PENDING = "PENDING" + DISPATCHED = "DISPATCHED" + DEAD_LETTER = "DEAD_LETTER" + + @classmethod + def choices(cls): + return [(e.value, e.name.replace("_", " ").capitalize()) for e in cls] diff --git a/backend/notification_v2/helper.py b/backend/notification_v2/helper.py index a454b9d82b..f217f2da18 100644 --- a/backend/notification_v2/helper.py +++ b/backend/notification_v2/helper.py @@ -1,35 +1,223 @@ +import hashlib import logging +from collections.abc import Iterable +from datetime import timedelta from typing import Any -from notification_v2.enums import NotificationType, PlatformType -from notification_v2.models import Notification +from account_v2.models import Organization +from django.utils import timezone + +from notification_v2.enums import ( + AuthorizationType, + BufferStatus, + DeliveryMode, + NotificationType, + PlatformType, +) +from notification_v2.models import Notification, NotificationBuffer from notification_v2.provider.notification_provider import NotificationProvider from notification_v2.provider.registry import get_notification_provider logger = logging.getLogger(__name__) +# Used as a stable salt-free input for SHA-256 grouping; collisions are +# vanishingly improbable and the digest is never used as a security primitive. +_AUTH_SIG_NONE = "" + + +def compute_auth_sig(notification: Notification) -> str: + """SHA-256 hex of (auth_type + auth_key + auth_header) — never raw creds. + + Identical auth configs produce the same sig (so grouping clubs them); + differing configs split into separate groups. + """ + raw = "|".join( + [ + notification.authorization_type or _AUTH_SIG_NONE, + notification.authorization_key or _AUTH_SIG_NONE, + notification.authorization_header or _AUTH_SIG_NONE, + ] + ) + return hashlib.sha256(raw.encode("utf-8")).hexdigest() + + +def webhook_url_hash(url: str | None) -> str: + """Short, log-safe fingerprint of a webhook URL (first 8 chars of SHA-256).""" + if not url: + return "none" + return hashlib.sha256(url.encode("utf-8")).hexdigest()[:8] + + +def get_org_club_interval_seconds(organization: Organization) -> int: + """Per-org override of NOTIFICATION_CLUB_INTERVAL, falling back to env default. + + Reads from the generic configuration KV table; returns the env-derived + default when the org has no override. The value is read at enqueue time + and baked into the row's flush_after — see mfbt §EC-2 / §EC-8: changing + the override only affects rows enqueued after the change. + """ + # Local import: configuration depends on Django settings at import time + # and notification_v2.helper is imported during app boot. + from configuration.enums import ConfigKey + from configuration.models import Configuration + + return int( + Configuration.get_value_by_organization( + ConfigKey.NOTIFICATION_CLUB_INTERVAL, organization + ) + ) + + +def build_webhook_headers(notification: Notification) -> dict[str, str]: + """Build HTTP headers for a webhook dispatch from the notification's auth. + + Mirrors the logic in ``provider/webhook/webhook.py`` and the worker-side + ``get_webhook_headers`` so the clubbed dispatcher and the immediate path + produce identical headers for the same auth config. + """ + headers = {"Content-Type": "application/json"} + auth_type_raw = (notification.authorization_type or "").upper() + auth_key = notification.authorization_key + auth_header = notification.authorization_header + if auth_type_raw == AuthorizationType.BEARER.value and auth_key: + headers["Authorization"] = f"Bearer {auth_key}" + elif auth_type_raw == AuthorizationType.API_KEY.value and auth_key: + headers["Authorization"] = auth_key + elif ( + auth_type_raw == AuthorizationType.CUSTOM_HEADER.value + and auth_header + and auth_key + ): + headers[auth_header] = auth_key + return headers + + +def _resolve_organization(notification: Notification) -> Organization | None: + """Walk pipeline/api FK to find the owning org. Notification has no direct FK.""" + pipeline = notification.pipeline + if pipeline and pipeline.organization_id: + return pipeline.organization + api = notification.api + if api and api.organization_id: + return api.organization + return None + + +def split_by_delivery_mode( + notifications: "Iterable[Notification]", +) -> tuple[list[Notification], list[Notification]]: + """Partition into (IMMEDIATE, BATCHED). Unknown modes default to IMMEDIATE.""" + immediate: list[Notification] = [] + batched: list[Notification] = [] + for n in notifications: + if n.delivery_mode == DeliveryMode.BATCHED.value: + batched.append(n) + else: + immediate.append(n) + return immediate, batched + + +def dispatch_with_delivery_mode( + notifications: "Iterable[Notification]", + payload: dict[str, Any], + *, + error_context: str = "", +) -> None: + """Single-call entry point that splits IMMEDIATE / BATCHED and dispatches. + + IMMEDIATE rows fire synchronously via NotificationHelper. BATCHED rows + enqueue into NotificationBuffer; an enqueue failure is logged but does + not abort the loop — other notifications still get their chance. + + ``error_context`` lets callers tag failures with their dispatch source + (pipeline id, api id) for easier triage. + """ + immediate, batched = split_by_delivery_mode(notifications) + if immediate: + NotificationHelper.send_notification(notifications=immediate, payload=payload) + for notification in batched: + try: + enqueue(notification, payload) + except Exception: + logger.exception( + "Failed to enqueue BATCHED notification %s%s", + notification.id, + f" ({error_context})" if error_context else "", + ) + + +def enqueue(notification: Notification, payload: dict[str, Any]) -> NotificationBuffer: + """Buffer a single execution event for a BATCHED notification. + + Computes auth_sig and flush_after at write time so existing PENDING rows + keep their original cadence even if NOTIFICATION_CLUB_INTERVAL or the + notification's auth changes mid-window. Returns the persisted row. + + Raises ValueError if the notification has no resolvable organization + (defensive — the FK chain via pipeline/api always provides one in practice). + """ + organization = _resolve_organization(notification) + if organization is None: + raise ValueError( + f"Notification {notification.id} has no resolvable organization " + "(neither pipeline nor api FK populated)" + ) + + interval_seconds = get_org_club_interval_seconds(organization) + flush_after = timezone.now() + timedelta(seconds=interval_seconds) + auth_sig = compute_auth_sig(notification) + platform = notification.platform or PlatformType.API.value + + buffer_row = NotificationBuffer.objects.create( + notification=notification, + organization=organization, + webhook_url=notification.url, + payload=payload, + platform=platform, + auth_sig=auth_sig, + flush_after=flush_after, + status=BufferStatus.PENDING.value, + ) + + # Structured log: org + URL fingerprint only — never the raw URL or any + # part of the auth tuple. Downstream metrics consumers grep on metric=. + logger.info( + "metric=notification_buffer_enqueued_total platform=%s org_id=%s " + "webhook_url_hash=%s notification_id=%s buffer_id=%s flush_after=%s", + platform, + organization.organization_id, + webhook_url_hash(notification.url), + notification.id, + buffer_row.id, + flush_after.isoformat(), + ) + return buffer_row + class NotificationHelper: @classmethod def send_notification(cls, notifications: list[Notification], payload: Any) -> None: - """Send notification Sends notifications using the appropriate provider - based on the notification type and platform. + """Dispatch IMMEDIATE notifications via the registered provider. - This method iterates through a list of `Notification` objects, determines the - appropriate notification provider based on the notification's type and - platform, and sends the notification with the provided payload. If an error - occurs due to an invalid notification type or platform, it logs the error. + Iterates over notifications, resolves the provider for each + (notification_type, platform) pair, and fires the webhook task. BATCHED + notifications must be routed to ``enqueue()`` instead — callers branch + on ``notification.delivery_mode`` before reaching this method. Args: - notifications (list[Notification]): A list of `Notification` instances to - be processed and sent. - payload (Any): The data to be sent with the notification. This can be any - format expected by the provider - - Returns: - None + notifications: Active Notification rows to dispatch synchronously. + payload: Provider-specific payload (typically a dict). """ for notification in notifications: + if notification.delivery_mode == DeliveryMode.BATCHED.value: + # Callers should not reach here for BATCHED — log loudly so + # routing regressions are visible without breaking dispatch. + logger.warning( + "BATCHED notification %s reached IMMEDIATE dispatch path; " + "skipping. Caller must branch on delivery_mode.", + notification.id, + ) + continue notification_type = NotificationType(notification.notification_type) platform_type = PlatformType(notification.platform) try: @@ -40,9 +228,13 @@ def send_notification(cls, notifications: list[Notification], payload: Any) -> N notification=notification, payload=payload ) notifier.send() - logger.info(f"Sending notification to {notification}") + logger.info("Sending notification to %s", notification) except ValueError as e: logger.error( - f"Error in notification type {notification_type} and platform " - f"{platform_type} for notification {notification}: {e}" + "Error in notification type %s and platform %s for " + "notification %s: %s", + notification_type, + platform_type, + notification, + e, ) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 3e3f386b17..396bfb2081 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -9,13 +9,18 @@ - These endpoints are not accessible from browsers and don't use session cookies """ +import json import logging +from datetime import timedelta from typing import Any, cast from api_v2.models import APIDeployment -from django.db.models import QuerySet +from django.conf import settings +from django.db import transaction +from django.db.models import Min, QuerySet from django.http import HttpRequest, JsonResponse from django.shortcuts import get_object_or_404 +from django.utils import timezone from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import require_http_methods from pipeline_v2.models import Pipeline @@ -23,7 +28,16 @@ from workflow_manager.workflow_v2.enums import ExecutionStatus from workflow_manager.workflow_v2.models.execution import WorkflowExecution -from notification_v2.models import Notification +from backend.celery_service import app as celery_app +from notification_v2.clubbed_renderer import render_clubbed_message +from notification_v2.enums import BufferStatus, DeliveryMode +from notification_v2.helper import ( + build_webhook_headers, + enqueue, + get_org_club_interval_seconds, + webhook_url_hash, +) +from notification_v2.models import Notification, NotificationBuffer logger = logging.getLogger(__name__) @@ -89,6 +103,9 @@ def _serialize_notification(n: Notification) -> dict[str, Any]: "max_retries": n.max_retries, "is_active": n.is_active, "notify_on_failures": n.notify_on_failures, + # Drives the worker-side IMMEDIATE-vs-BATCHED branch in + # workers/shared/patterns/notification/helper.py. + "delivery_mode": n.delivery_mode, } @@ -271,3 +288,247 @@ def get_api_data(request: HttpRequest, api_id: str) -> JsonResponse: return JsonResponse( {"status": "error", "message": INTERNAL_SERVER_ERROR_MSG}, status=500 ) + + +# Required fields on the enqueue endpoint body. Worker-side serialization +# guarantees these — keep this list in sync with +# workers/shared/patterns/notification/helper.py. +_ENQUEUE_REQUIRED_FIELDS = ( + "notification_id", + "execution_id", + "pipeline_id", + "pipeline_name", + "status", + "platform", +) + + +@csrf_exempt # Safe: Internal API with Bearer token auth, service-to-service only +@require_http_methods(["POST"]) +def enqueue_notification_buffer(request: HttpRequest) -> JsonResponse: + """Buffer one execution event from a callback worker. + + Worker code is model-free: it forwards a notification_id + structured + payload here and lets the backend write the NotificationBuffer row. + Rejects rows whose source notification is not BATCHED so a worker + routing bug cannot silently divert IMMEDIATE traffic into the buffer. + """ + try: + body = json.loads(request.body.decode("utf-8") or "{}") + except json.JSONDecodeError: + return JsonResponse( + {"status": "error", "message": "Invalid JSON body"}, status=400 + ) + + missing = [f for f in _ENQUEUE_REQUIRED_FIELDS if not body.get(f)] + if missing: + return JsonResponse( + { + "status": "error", + "message": f"Missing required fields: {', '.join(missing)}", + }, + status=400, + ) + + try: + notification = Notification.objects.get(id=body["notification_id"]) + except Notification.DoesNotExist: + return JsonResponse( + {"status": "error", "message": "Notification not found"}, status=404 + ) + + if notification.delivery_mode != DeliveryMode.BATCHED.value: + # Hard-fail rather than silently auto-correcting — surfaces worker + # routing regressions instead of letting them drain into the buffer. + return JsonResponse( + { + "status": "error", + "message": ( + "Notification delivery_mode is not BATCHED; refuse to enqueue" + ), + }, + status=409, + ) + + payload = { + "execution_id": body["execution_id"], + "pipeline_id": body["pipeline_id"], + "pipeline_name": body["pipeline_name"], + "status": body["status"], + "error_message": body.get("error_message"), + "platform": body["platform"], + } + try: + buffer_row = enqueue(notification, payload) + except ValueError as e: + return JsonResponse({"status": "error", "message": str(e)}, status=400) + + return JsonResponse( + {"status": "success", "buffer_row_id": str(buffer_row.id)}, status=201 + ) + + +def _gc_terminal_rows() -> int: + """Delete DISPATCHED / DEAD_LETTER rows older than the retention window. + + PENDING rows are intentionally untouched regardless of age — they + represent live work the flush job still owns. + """ + cutoff = timezone.now() - timedelta(days=settings.NOTIFICATION_BUFFER_RETENTION_DAYS) + deleted_count, _ = NotificationBuffer.objects.filter( + status__in=[BufferStatus.DISPATCHED.value, BufferStatus.DEAD_LETTER.value], + created_at__lt=cutoff, + ).delete() + return int(deleted_count) + + +def _dispatch_group( + org_id: Any, + webhook_url: str, + auth_sig: str, +) -> tuple[int, int]: + """Dispatch a single (org, url, auth_sig) group; returns (rows, succeeded). + + Caller already filtered groups to MIN(flush_after) <= now. Locks rows + with SKIP LOCKED so a sibling replica skips them rather than blocking. + Re-fetches the source Notification each time for live auth (record may + have been edited between enqueue and flush). + """ + with transaction.atomic(): + rows = list( + NotificationBuffer.objects.select_for_update(skip_locked=True) + .filter( + status=BufferStatus.PENDING.value, + organization_id=org_id, + webhook_url=webhook_url, + auth_sig=auth_sig, + ) + .order_by("created_at")[:_PROCESS_BUFFER_CAP] + ) + if not rows: + # Either another replica claimed the rows (SKIP LOCKED) or they + # transitioned out of PENDING between the GROUP BY scan and the + # row-level lock. Either way: nothing to do here. + return 0, 0 + + # Live auth — read from the FIRST row's notification. If multiple + # notifications collide on (url, auth_sig) we have, by definition, + # identical auth, so this is safe. + first_notification = rows[0].notification + platform = rows[0].platform + payloads = [r.payload for r in rows] + # Per-org interval read here is cosmetic — used only for the + # `interval_minutes` field in the rendered message body. The + # cadence-controlling read happened at enqueue time and is + # already baked into each row's flush_after (mfbt §EC-2). + interval_seconds = get_org_club_interval_seconds(rows[0].organization) + body = render_clubbed_message(payloads, platform, interval_seconds) + headers = build_webhook_headers(first_notification) + + buffer_ids = [str(r.id) for r in rows] + try: + celery_app.send_task( + "send_webhook_notification", + args=[ + first_notification.url, + body, + headers, + settings.NOTIFICATION_TIMEOUT, + ], + kwargs={ + "max_retries": first_notification.max_retries, + "retry_delay": 10, + "platform": platform, + }, + queue="notifications", + link_error=celery_app.signature( + "notification_v2.mark_buffer_dead_letter", + kwargs={"buffer_row_ids": buffer_ids}, + ), + ) + except Exception: + # Broker hiccup — leave rows PENDING for the next tick rather + # than mark them DEAD_LETTER. `exception` keeps stack context. + logger.exception( + "Broker dispatch failed for group org=%s url_hash=%s", + org_id, + webhook_url_hash(webhook_url), + ) + return 0, 0 + + now = timezone.now() + NotificationBuffer.objects.filter(id__in=buffer_ids).update( + status=BufferStatus.DISPATCHED.value, + dispatched_at=now, + ) + logger.info( + "metric=notification_batch_dispatched_total platform=%s result=success " + "org_id=%s webhook_url_hash=%s rows=%d", + platform, + org_id, + webhook_url_hash(webhook_url), + len(rows), + ) + return len(rows), len(rows) + + +# Per-group cap; matches the renderer's MAX_BATCH_SIZE so the rendered +# events list and the dispatched row set stay in lock-step. Anything beyond +# this rolls into the next flush tick. +_PROCESS_BUFFER_CAP = 500 + + +@csrf_exempt # Safe: Internal API with Bearer token auth, service-to-service only +@require_http_methods(["POST"]) +def process_notification_buffer(request: HttpRequest) -> JsonResponse: + """Flush PENDING groups that have hit their flush_after; then GC. + + Algorithm: + 1. GROUP BY (org, url, auth_sig), HAVING MIN(flush_after) <= NOW() + 2. For each group, in its own transaction: lock-skip-locked rows, + render, dispatch a single Celery task, mark rows DISPATCHED. + 3. Sweep terminal rows older than NOTIFICATION_BUFFER_RETENTION_DAYS. + + Concurrency: SELECT FOR UPDATE SKIP LOCKED makes parallel calls safe — + each replica skips groups another worker is already dispatching. + """ + now = timezone.now() + groups = list( + NotificationBuffer.objects.filter(status=BufferStatus.PENDING.value) + .values("organization_id", "webhook_url", "auth_sig") + .annotate(earliest_flush=Min("flush_after")) + .filter(earliest_flush__lte=now) + ) + + dispatched_groups = 0 + dispatched_rows = 0 + for group in groups: + try: + rows, _succeeded = _dispatch_group( + org_id=group["organization_id"], + webhook_url=group["webhook_url"], + auth_sig=group["auth_sig"], + ) + except Exception: + logger.exception( + "Failed dispatching group org=%s url_hash=%s", + group["organization_id"], + webhook_url_hash(group["webhook_url"]), + ) + continue + if rows > 0: + dispatched_groups += 1 + dispatched_rows += rows + + gc_deleted = _gc_terminal_rows() + return JsonResponse( + { + "status": "success", + "dispatched_groups": dispatched_groups, + "dispatched_rows": dispatched_rows, + # DEAD_LETTER transitions are async (Celery link_error) — this + # response only covers transitions visible to this request. + "dead_letter_rows": 0, + "gc_deleted_rows": gc_deleted, + } + ) diff --git a/backend/notification_v2/internal_serializers.py b/backend/notification_v2/internal_serializers.py index 94669d64a4..db7a35ab32 100644 --- a/backend/notification_v2/internal_serializers.py +++ b/backend/notification_v2/internal_serializers.py @@ -23,6 +23,7 @@ class Meta: "platform", "max_retries", "is_active", + "delivery_mode", "created_at", "modified_at", "pipeline", diff --git a/backend/notification_v2/internal_urls.py b/backend/notification_v2/internal_urls.py index 0414761089..a0f87f0250 100644 --- a/backend/notification_v2/internal_urls.py +++ b/backend/notification_v2/internal_urls.py @@ -21,6 +21,17 @@ router.register(r"", WebhookInternalViewSet, basename="webhook-internal") urlpatterns = [ + # Buffered (clubbed) notification dispatch endpoints + path( + "buffer/enqueue/", + internal_api_views.enqueue_notification_buffer, + name="enqueue_notification_buffer", + ), + path( + "buffer/process/", + internal_api_views.process_notification_buffer, + name="process_notification_buffer", + ), # Notification data endpoints for workers path( "pipeline//notifications/", diff --git a/backend/notification_v2/migrations/0003_add_notification_buffer.py b/backend/notification_v2/migrations/0003_add_notification_buffer.py new file mode 100644 index 0000000000..7f5224849f --- /dev/null +++ b/backend/notification_v2/migrations/0003_add_notification_buffer.py @@ -0,0 +1,148 @@ +import uuid + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("account_v2", "0001_initial"), + ("notification_v2", "0002_notification_notify_on_failures"), + ] + + operations = [ + migrations.AddField( + model_name="notification", + name="delivery_mode", + field=models.CharField( + choices=[("IMMEDIATE", "Immediate"), ("BATCHED", "Batched")], + default="IMMEDIATE", + max_length=16, + db_comment=( + "IMMEDIATE fires on every completion (default, unchanged " + "behavior). BATCHED buffers events and dispatches a single " + "clubbed message per (org, webhook_url, auth_sig) every " + "NOTIFICATION_CLUB_INTERVAL." + ), + ), + ), + migrations.CreateModel( + name="NotificationBuffer", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("modified_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ( + "webhook_url", + models.URLField( + db_comment="Denormalized destination URL; grouping key.", + ), + ), + ( + "payload", + models.JSONField( + db_comment=( + "Pre-structured execution data (execution_id, status, " + "error_message, pipeline_name, pipeline_type) — NOT a " + "final rendered message. The renderer formats this at " + "dispatch time." + ), + ), + ), + ( + "platform", + models.CharField( + choices=[("SLACK", "Slack"), ("API", "Api")], + max_length=50, + db_comment=( + "SLACK / API — drives renderer selection at flush time." + ), + ), + ), + ( + "auth_sig", + models.CharField( + max_length=64, + db_comment=( + "SHA-256 hex of (auth_type + auth_key + auth_header), " + "computed at enqueue time. Grouping key — never store " + "raw credentials here." + ), + ), + ), + ( + "flush_after", + models.DateTimeField( + db_comment=( + "created_at + NOTIFICATION_CLUB_INTERVAL, precomputed " + "at enqueue. Read-at-enqueue contract: changing the " + "env var only affects rows enqueued after the restart." + ), + ), + ), + ("dispatched_at", models.DateTimeField(blank=True, null=True)), + ( + "status", + models.CharField( + choices=[ + ("PENDING", "Pending"), + ("DISPATCHED", "Dispatched"), + ("DEAD_LETTER", "Dead letter"), + ], + default="PENDING", + max_length=16, + db_comment=( + "PENDING -> DISPATCHED on success, " + "PENDING -> DEAD_LETTER on retry exhaustion." + ), + ), + ), + ( + "notification", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="buffer_rows", + to="notification_v2.notification", + db_comment=( + "Source Notification. Cascade-delete is intentional: " + "removing a Notification expresses intent to stop all " + "future deliveries, including buffered ones." + ), + ), + ), + ( + "organization", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="notification_buffer_rows", + to="account_v2.organization", + db_comment=( + "Tenant scope. Mandatory grouping key — prevents " + "cross-tenant leakage at flush time." + ), + ), + ), + ], + options={ + "verbose_name": "Notification Buffer", + "verbose_name_plural": "Notification Buffers", + "db_table": "notification_buffer", + }, + ), + migrations.AddIndex( + model_name="notificationbuffer", + index=models.Index( + condition=models.Q(("status", "PENDING")), + fields=["organization", "webhook_url", "auth_sig", "flush_after"], + name="idx_notif_buffer_pending", + ), + ), + ] diff --git a/backend/notification_v2/models.py b/backend/notification_v2/models.py index e5238ec176..a8b077f339 100644 --- a/backend/notification_v2/models.py +++ b/backend/notification_v2/models.py @@ -1,13 +1,21 @@ import uuid +from account_v2.models import Organization from api_v2.models import APIDeployment from django.db import models from pipeline_v2.models import Pipeline from utils.models.base_model import BaseModel -from .enums import AuthorizationType, NotificationType, PlatformType +from .enums import ( + AuthorizationType, + BufferStatus, + DeliveryMode, + NotificationType, + PlatformType, +) NOTIFICATION_NAME_MAX_LENGTH = 255 +AUTH_SIG_LENGTH = 64 # SHA-256 hex digest class Notification(BaseModel): @@ -55,6 +63,16 @@ class Notification(BaseModel): "(default), fire on every terminal completion." ), ) + delivery_mode = models.CharField( + max_length=16, + choices=DeliveryMode.choices(), + default=DeliveryMode.IMMEDIATE.value, + db_comment=( + "IMMEDIATE fires on every completion (default, unchanged behavior). " + "BATCHED buffers events and dispatches a single clubbed message per " + "(org, webhook_url, auth_sig) every NOTIFICATION_CLUB_INTERVAL." + ), + ) # Foreign keys to specific models pipeline = models.ForeignKey( Pipeline, @@ -100,3 +118,92 @@ def __str__(self): f"Notification {self.id}: (Type: {self.notification_type}, " f"Platform: {self.platform}, Url: {self.url}))" ) + + +class NotificationBuffer(BaseModel): + """Per-execution event buffered for a BATCHED notification. + + One row is written per workflow completion when the source Notification + has delivery_mode=BATCHED. The flush job groups rows by + (organization, webhook_url, auth_sig), renders one clubbed message per + group, and dispatches via the existing send_webhook_notification Celery + task. Group key includes auth_sig because two notifications may share the + same URL but use different credentials — they must dispatch separately. + """ + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + notification = models.ForeignKey( + Notification, + on_delete=models.CASCADE, + related_name="buffer_rows", + db_comment=( + "Source Notification. Cascade-delete is intentional: removing a " + "Notification expresses intent to stop all future deliveries, " + "including buffered ones." + ), + ) + organization = models.ForeignKey( + Organization, + on_delete=models.CASCADE, + related_name="notification_buffer_rows", + db_comment=( + "Tenant scope. Mandatory grouping key — prevents cross-tenant " + "leakage at flush time." + ), + ) + webhook_url = models.URLField( + db_comment="Denormalized destination URL; grouping key.", + ) + payload = models.JSONField( + db_comment=( + "Pre-structured execution data (execution_id, status, error_message, " + "pipeline_name, pipeline_type) — NOT a final rendered message. The " + "renderer formats this at dispatch time." + ), + ) + platform = models.CharField( + max_length=50, + choices=PlatformType.choices(), + db_comment="SLACK / API — drives renderer selection at flush time.", + ) + auth_sig = models.CharField( + max_length=AUTH_SIG_LENGTH, + db_comment=( + "SHA-256 hex of (auth_type + auth_key + auth_header), computed at " + "enqueue time. Grouping key — never store raw credentials here." + ), + ) + flush_after = models.DateTimeField( + db_comment=( + "created_at + NOTIFICATION_CLUB_INTERVAL, precomputed at enqueue. " + "Read-at-enqueue contract: changing the env var only affects rows " + "enqueued after the restart." + ), + ) + dispatched_at = models.DateTimeField(null=True, blank=True) + status = models.CharField( + max_length=16, + choices=BufferStatus.choices(), + default=BufferStatus.PENDING.value, + db_comment="PENDING -> DISPATCHED on success, PENDING -> DEAD_LETTER on retry exhaustion.", + ) + + class Meta: + verbose_name = "Notification Buffer" + verbose_name_plural = "Notification Buffers" + db_table = "notification_buffer" + indexes = [ + # Partial covering index — supports Index Only Scans on the flush + # GROUP BY query and bounds index size to live PENDING backlog. + models.Index( + fields=["organization", "webhook_url", "auth_sig", "flush_after"], + name="idx_notif_buffer_pending", + condition=models.Q(status=BufferStatus.PENDING.value), + ), + ] + + def __str__(self) -> str: + return ( + f"NotificationBuffer {self.id}: status={self.status} " + f"flush_after={self.flush_after.isoformat() if self.flush_after else 'n/a'}" + ) diff --git a/backend/notification_v2/serializers.py b/backend/notification_v2/serializers.py index 4cb4f3c4cb..b2602929fe 100644 --- a/backend/notification_v2/serializers.py +++ b/backend/notification_v2/serializers.py @@ -1,10 +1,18 @@ from rest_framework import serializers from utils.input_sanitizer import validate_name_field -from .enums import AuthorizationType, NotificationType, PlatformType +from .enums import AuthorizationType, DeliveryMode, NotificationType, PlatformType from .models import Notification +class NotificationSettingsSerializer(serializers.Serializer): + """Org-scoped notification batching settings (UNS-611 v2).""" + + # No min/max here: mfbt is silent on bounds. Backend ConfigSpec accepts + # any int; constraining is a follow-up if/when product gives a number. + club_interval_seconds = serializers.IntegerField() + + class NotificationSerializer(serializers.ModelSerializer): notification_type = serializers.ChoiceField(choices=NotificationType.choices()) authorization_type = serializers.ChoiceField(choices=AuthorizationType.choices()) @@ -13,6 +21,11 @@ class NotificationSerializer(serializers.ModelSerializer): max_value=4, min_value=0, default=0, required=False ) notify_on_failures = serializers.BooleanField(default=False, required=False) + delivery_mode = serializers.ChoiceField( + choices=DeliveryMode.choices(), + default=DeliveryMode.IMMEDIATE.value, + required=False, + ) class Meta: model = Notification diff --git a/backend/notification_v2/tasks.py b/backend/notification_v2/tasks.py new file mode 100644 index 0000000000..a143f9295e --- /dev/null +++ b/backend/notification_v2/tasks.py @@ -0,0 +1,51 @@ +"""Celery tasks owned by notification_v2. + +Currently hosts ``mark_buffer_dead_letter`` — a thin task attached as a +Celery ``link_error`` to the clubbed dispatch chain. When the underlying +``send_webhook_notification`` task exhausts retries, this task converts +the buffered rows from PENDING/DISPATCHED to terminal DEAD_LETTER so the +flush job will not re-pick them. +""" + +from __future__ import annotations + +import logging +from collections.abc import Iterable +from typing import Any + +from backend.celery_service import app as celery_app +from notification_v2.enums import BufferStatus +from notification_v2.models import NotificationBuffer + +logger = logging.getLogger(__name__) + + +@celery_app.task(name="notification_v2.mark_buffer_dead_letter") +def mark_buffer_dead_letter( + request: Any, + exc: Any = None, + traceback: Any = None, + *, + buffer_row_ids: Iterable[str] | None = None, +) -> int: + """Mark a clubbed dispatch's rows as DEAD_LETTER on terminal failure. + + Celery's ``link_error`` signature passes ``(request, exc, traceback)`` to + the callback; the actual buffer ids are bound at dispatch time via task + kwargs. Returns the row count for visibility in flower. + """ + if not buffer_row_ids: + logger.warning( + "mark_buffer_dead_letter invoked without buffer_row_ids — nothing to do" + ) + return 0 + ids = list(buffer_row_ids) + updated: int = NotificationBuffer.objects.filter(id__in=ids).update( + status=BufferStatus.DEAD_LETTER.value + ) + logger.warning( + "metric=notification_batch_dispatched_total result=dead_letter rows=%d " "exc=%r", + updated, + exc, + ) + return updated diff --git a/backend/notification_v2/tests/__init__.py b/backend/notification_v2/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/backend/notification_v2/urls.py b/backend/notification_v2/urls.py index 2e356b4003..e41bb00f3a 100644 --- a/backend/notification_v2/urls.py +++ b/backend/notification_v2/urls.py @@ -1,7 +1,7 @@ from django.urls import path from rest_framework.urlpatterns import format_suffix_patterns -from .views import NotificationViewSet +from .views import NotificationSettingsView, NotificationViewSet notification_list = NotificationViewSet.as_view({"get": "list", "post": "create"}) notification_detail = NotificationViewSet.as_view( @@ -16,6 +16,13 @@ urlpatterns = format_suffix_patterns( [ path("", notification_list, name="notification-list"), + # Org-scoped notification batching settings (UNS-611 v2). Mounted + # before the route so "settings" is not interpreted as a UUID. + path( + "settings/", + NotificationSettingsView.as_view(), + name="notification-settings", + ), path("/", notification_detail, name="notification-detail"), path( "pipeline//", diff --git a/backend/notification_v2/views.py b/backend/notification_v2/views.py index 1410256ab6..207d4acc6b 100644 --- a/backend/notification_v2/views.py +++ b/backend/notification_v2/views.py @@ -1,14 +1,26 @@ +import logging + from api_v2.deployment_helper import DeploymentHelper from api_v2.exceptions import APINotFound +from configuration.enums import ConfigKey +from configuration.models import Configuration from pipeline_v2.exceptions import PipelineNotFound from pipeline_v2.models import Pipeline from pipeline_v2.pipeline_processor import PipelineProcessor -from rest_framework import viewsets +from platform_api.permissions import IsOrganizationAdmin +from rest_framework import status, viewsets +from rest_framework.permissions import IsAuthenticated +from rest_framework.request import Request +from rest_framework.response import Response +from rest_framework.views import APIView +from utils.user_context import UserContext from notification_v2.constants import NotificationUrlConstant from .models import Notification -from .serializers import NotificationSerializer +from .serializers import NotificationSerializer, NotificationSettingsSerializer + +logger = logging.getLogger(__name__) class NotificationViewSet(viewsets.ModelViewSet): @@ -39,3 +51,47 @@ def get_queryset(self): queryset = queryset.filter(api=api) return queryset + + +class NotificationSettingsView(APIView): + """Org-scoped notification batching settings — currently just the club interval. + + GET returns the org's effective interval (override or env-derived default). + PATCH writes/updates the override via the generic configuration KV table. + + Read-at-enqueue contract (mfbt §EC-2 / §EC-8): updates take effect for + notifications enqueued after the change. Existing PENDING buffer rows + keep their original flush_after. + """ + + permission_classes = [IsAuthenticated, IsOrganizationAdmin] + + def get(self, request: Request) -> Response: + organization = UserContext.get_organization() + value = Configuration.get_value_by_organization( + ConfigKey.NOTIFICATION_CLUB_INTERVAL, organization + ) + return Response({"club_interval_seconds": int(value)}) + + def patch(self, request: Request) -> Response: + serializer = NotificationSettingsSerializer(data=request.data, partial=True) + serializer.is_valid(raise_exception=True) + organization = UserContext.get_organization() + new_value = serializer.validated_data.get("club_interval_seconds") + if new_value is None: + return Response( + {"detail": "club_interval_seconds is required."}, + status=status.HTTP_400_BAD_REQUEST, + ) + # ConfigKey.cast_value enforces type + any future bounds; bubble its + # ValueError up as a 400 instead of letting it 500. + try: + ConfigKey.NOTIFICATION_CLUB_INTERVAL.cast_value(new_value) + except ValueError as exc: + return Response({"detail": str(exc)}, status=status.HTTP_400_BAD_REQUEST) + Configuration.objects.update_or_create( + organization=organization, + key=ConfigKey.NOTIFICATION_CLUB_INTERVAL.name, + defaults={"value": str(new_value), "enabled": True}, + ) + return Response({"club_interval_seconds": int(new_value)}) diff --git a/backend/pipeline_v2/notification.py b/backend/pipeline_v2/notification.py index ec82145054..5a40a37506 100644 --- a/backend/pipeline_v2/notification.py +++ b/backend/pipeline_v2/notification.py @@ -1,6 +1,6 @@ import logging -from notification_v2.helper import NotificationHelper +from notification_v2.helper import dispatch_with_delivery_mode from notification_v2.models import Notification from workflow_manager.workflow_v2.enums import ExecutionStatus from workflow_manager.workflow_v2.models.execution import WorkflowExecution @@ -90,7 +90,8 @@ def send(self) -> None: successful_files=successful_files, failed_files=failed_files, ) - - NotificationHelper.send_notification( - notifications=self.notifications, payload=payload_dto.to_dict() + dispatch_with_delivery_mode( + list(self.notifications), + payload_dto.to_dict(), + error_context=f"pipeline={self.pipeline.id}", ) diff --git a/backend/utils/cors_origin.py b/backend/utils/cors_origin.py new file mode 100644 index 0000000000..aa640c6bc6 --- /dev/null +++ b/backend/utils/cors_origin.py @@ -0,0 +1,81 @@ +"""CORS origin helpers used by Django settings and SocketIO log events. + +Kept free of Django imports so the matching/normalization logic can be unit +tested without bootstrapping the full project. +""" + +from __future__ import annotations + +import re +from urllib.parse import urlparse + + +class RegexOrigin: + """Origin pattern that compares to strings via regex match. + + python-socketio enforces CORS with ``origin in allowed_origins`` during the + engine.io handshake — overriding ``__eq__`` lets a single list entry cover + a wildcard subdomain so bad origins are rejected before ``connect`` runs. + + Instances are intentionally unhashable: a hashable object must satisfy + ``a == b ⇒ hash(a) == hash(b)``, but ``__eq__`` here is asymmetric across + types (matches many strings, hashes only one pattern). Any code that put + one in a ``set``/``frozenset`` would silently break the CORS gate, so + ``__hash__`` is disabled to fail loud at construction time instead. + + ``fullmatch`` (not ``match``) is used so ``$`` doesn't permit a trailing + newline — defense in depth even though WSGI strips them upstream. + """ + + def __init__(self, pattern: str) -> None: + self._regex = re.compile(pattern) + + def __eq__(self, other: object) -> bool: + if isinstance(other, str): + return self._regex.fullmatch(other) is not None + return NotImplemented + + __hash__ = None # see class docstring + + +def normalize_web_app_origin(env_value: str) -> tuple[str, str, str]: + """Parse and canonicalize ``WEB_APP_ORIGIN_URL`` for CORS/CSRF allow-lists. + + Returns ``(origin, wildcard_origin, subdomain_regex)``: + + - ``origin``: ``scheme://host[:port]`` form. Hostname is lowercased and + explicit default ports (:80 for http, :443 for https) are dropped, so + it matches what browsers serialize per RFC 6454. + - ``wildcard_origin``: same with a literal ``*.`` subdomain prefix, for + Django's ``CSRF_TRUSTED_ORIGINS`` (which fnmatches ``*``). + - ``subdomain_regex``: anchored pattern matching any subdomain of the + configured netloc, for ``CORS_ALLOWED_ORIGIN_REGEXES`` and SocketIO + via ``RegexOrigin``. + + Raises: + ValueError: if the env value is not an http(s) URL with a host. + """ + parsed = urlparse(env_value) + # `parsed.port` is a property that raises ValueError on malformed/out-of-range + # ports (e.g. `:abc`, `:99999`). Catch it here so misconfig surfaces with the + # same actionable message as every other validation failure. + try: + port = parsed.port + except ValueError as exc: + raise ValueError( + f"WEB_APP_ORIGIN_URL must be of the form http(s)://host[:port], " + f"got: {parsed.geturl()!r}" + ) from exc + if parsed.scheme not in {"http", "https"} or not parsed.hostname: + raise ValueError( + f"WEB_APP_ORIGIN_URL must be of the form http(s)://host[:port], " + f"got: {parsed.geturl()!r}" + ) + default_port = {"http": 80, "https": 443}[parsed.scheme] + netloc = parsed.hostname + if port and port != default_port: + netloc = f"{netloc}:{port}" + origin = f"{parsed.scheme}://{netloc}" + wildcard = f"{parsed.scheme}://*.{netloc}" + subdomain_regex = rf"^{re.escape(parsed.scheme)}://[^/]+\.{re.escape(netloc)}$" + return origin, wildcard, subdomain_regex diff --git a/backend/utils/log_events.py b/backend/utils/log_events.py index f2ff725820..dcb66ed81c 100644 --- a/backend/utils/log_events.py +++ b/backend/utils/log_events.py @@ -11,9 +11,15 @@ from unstract.core.data_models import LogDataDTO from unstract.core.log_utils import get_validated_log_data, store_execution_log from utils.constants import ExecutionLogConstants +from utils.cors_origin import RegexOrigin logger = logging.getLogger(__name__) + +_cors_allowed_origins: list[Any] = list(settings.CORS_ALLOWED_ORIGINS) +for _pattern in getattr(settings, "CORS_ALLOWED_ORIGIN_REGEXES", []): + _cors_allowed_origins.append(RegexOrigin(_pattern)) + _kombu_kwargs: dict[str, Any] = {"url": settings.SOCKET_IO_MANAGER_URL} if getattr(settings, "SOCKET_IO_TRANSPORT_OPTIONS", None): _kombu_kwargs["connection_options"] = { @@ -23,7 +29,7 @@ sio = socketio.Server( # Allowed values: {threading, eventlet, gevent, gevent_uwsgi} async_mode="threading", - cors_allowed_origins=settings.CORS_ALLOWED_ORIGINS, + cors_allowed_origins=_cors_allowed_origins, logger=False, engineio_logger=False, always_connect=True, diff --git a/backend/utils/tests/test_cors_origin.py b/backend/utils/tests/test_cors_origin.py new file mode 100644 index 0000000000..65628b4405 --- /dev/null +++ b/backend/utils/tests/test_cors_origin.py @@ -0,0 +1,197 @@ +"""Regression tests for utils.cors_origin — the CORS origin matcher and +URL normalizer that gate browser ``Origin`` headers on every Django request +and SocketIO handshake. + +UN-3439: production socket connections silently failed for wildcard subdomains +because python-socketio does exact-string comparison. These tests pin the +contract so the next refactor can't reopen the hole. +""" + +from __future__ import annotations + +import time + +import pytest + +from utils.cors_origin import RegexOrigin, normalize_web_app_origin + + +class TestRegexOrigin: + def test_subdomain_matches(self): + ro = RegexOrigin(r"^https://[^/]+\.example\.com$") + assert ("https://app.example.com" == ro) is True + assert ("https://api.example.com" == ro) is True + + def test_deep_subdomain_matches(self): + """Multi-level subdomains are accepted — DNS-owned by the same party.""" + ro = RegexOrigin(r"^https://[^/]+\.example\.com$") + assert ("https://dev.env.example.com" == ro) is True + + def test_apex_does_not_match(self): + """Apex is covered by the exact-match CORS_ALLOWED_ORIGINS entry, not + the wildcard regex.""" + ro = RegexOrigin(r"^https://[^/]+\.example\.com$") + assert ("https://example.com" == ro) is False + + def test_lookalike_rejected(self): + ro = RegexOrigin(r"^https://[^/]+\.example\.com$") + assert ("https://attacker-example.com" == ro) is False + assert ("https://example.com.attacker.com" == ro) is False + assert ("https://x.example.com.attacker.com" == ro) is False + + def test_wrong_scheme_rejected(self): + ro = RegexOrigin(r"^https://[^/]+\.example\.com$") + # NOSONAR — the `http://` URL is intentional test data: we are + # asserting it is *rejected* by an https-scoped pattern. + assert ("http://app.example.com" == ro) is False # NOSONAR + + def test_trailing_newline_rejected(self): + """``fullmatch`` (not ``match``) is required so ``$`` doesn't permit + a trailing ``\\n`` per Python regex semantics.""" + ro = RegexOrigin(r"^https://[^/]+\.example\.com$") + assert ("https://app.example.com\n" == ro) is False + + def test_in_operator_routes_to_eq(self): + """``origin in allowed_origins`` is exactly how python-socketio gates + the engine.io handshake — verify Python's reflected ``__eq__`` kicks in.""" + allowed = [RegexOrigin(r"^https://[^/]+\.example\.com$")] + assert "https://app.example.com" in allowed + assert "https://evil.com" not in allowed + + def test_non_string_returns_not_implemented(self): + """``__eq__`` must return the ``NotImplemented`` sentinel (not + ``False``) for non-strings so Python's reflected-equality protocol + can fall back to identity. Tested via direct dunder calls because + ``ro == None`` short-circuits before reaching ``__eq__``.""" + ro = RegexOrigin(r"^x$") + assert ro.__eq__(None) is NotImplemented + assert ro.__eq__(42) is NotImplemented + assert ro.__eq__([]) is NotImplemented + + def test_unhashable(self): + """``__hash__ = None`` prevents the equality/hash contract from being + violated if anyone wraps the allow-list in a ``set``/``frozenset``.""" + ro = RegexOrigin(r"^x$") + with pytest.raises(TypeError): + hash(ro) + with pytest.raises(TypeError): + # `len({ro})` builds the set (calling __hash__) and consumes it via + # an explicit function call — keeps ruff from collapsing the + # statement and Sonar from flagging it as side-effect-free. + len({ro}) + + def test_no_redos(self): + """Pattern must complete on hostile input — ``[^/]+`` has no nested + quantifiers so backtracking is bounded. Threshold is generous (500ms) + so noisy CI runners don't flake; ReDoS would blow up by orders of + magnitude past this.""" + ro = RegexOrigin(r"^https://[^/]+\.example\.com$") + hostile = "https://" + "a" * 10000 + ".evil.com" + start = time.perf_counter() + _ = hostile in [ro] + assert time.perf_counter() - start < 0.5 + + +class TestNormalizeWebAppOrigin: + def test_basic_https(self): + origin, wildcard, _ = normalize_web_app_origin("https://example.com") + assert origin == "https://example.com" + assert wildcard == "https://*.example.com" + + def test_strips_trailing_slash(self): + origin, _, _ = normalize_web_app_origin("https://example.com/") + assert origin == "https://example.com" + + def test_strips_path_and_query(self): + origin, _, _ = normalize_web_app_origin("https://example.com/path?q=1") + assert origin == "https://example.com" + + def test_lowercases_hostname(self): + """Browsers serialize ``Origin`` with a lowercase host (RFC 6454); + django-cors-headers does case-sensitive string compare, so the env + value must be lowercased to match.""" + origin, _, _ = normalize_web_app_origin("https://APP.EXAMPLE.COM") + assert origin == "https://app.example.com" + + def test_drops_default_https_port(self): + """Browsers omit the explicit default port from ``Origin`` per + RFC 6454 — keeping ``:443`` would silently break exact match.""" + origin, _, _ = normalize_web_app_origin("https://example.com:443") + assert origin == "https://example.com" + + def test_drops_default_http_port(self): + # NOSONAR — `http://` URLs are intentional test data for the port + # normalization logic, not a runtime use of the insecure protocol. + origin, _, _ = normalize_web_app_origin("http://example.com:80") # NOSONAR + assert origin == "http://example.com" # NOSONAR + + def test_keeps_non_default_port(self): + origin, wildcard, _ = normalize_web_app_origin("https://example.com:8443") + assert origin == "https://example.com:8443" + assert wildcard == "https://*.example.com:8443" + + def test_localhost_default(self): + origin, _, _ = normalize_web_app_origin("http://localhost:3000") + assert origin == "http://localhost:3000" + + @pytest.mark.parametrize( + "bad", + [ + "", + "not-a-url", + "example.com", # missing scheme + "//example.com", # protocol-relative + "https://", # missing host + "ftp://example.com", # non-browser scheme # NOSONAR — test input asserting ftp is rejected + "ws://example.com", # not a top-level browser scheme + "https://example.com:abc", # malformed port — urlparse raises on .port access + "https://example.com:99999", # out-of-range port + ], + ) + def test_rejects_misconfigured(self, bad): + """Fail fast at startup so misconfigured envs can't silently produce + CORS rules that match nothing real.""" + with pytest.raises(ValueError, match="WEB_APP_ORIGIN_URL"): + normalize_web_app_origin(bad) + + +class TestSubdomainRegexEndToEnd: + """End-to-end: the regex returned by ``normalize_web_app_origin`` is what + actually gates production. Verify via ``RegexOrigin`` (same path as + SocketIO uses).""" + + def test_apex_env_accepts_subdomains(self): + _, _, pattern = normalize_web_app_origin("https://us-central.unstract.com") + ro = RegexOrigin(pattern) + # The exact failing origins from UN-3439: + assert "https://dev.env.us-central.unstract.com" in [ro] + assert "https://test.env.us-central.unstract.com" in [ro] + + def test_apex_rejected_by_regex(self): + """Apex itself is *not* matched by the wildcard regex — that's the + exact-match CORS_ALLOWED_ORIGINS entry's job.""" + _, _, pattern = normalize_web_app_origin("https://example.com") + ro = RegexOrigin(pattern) + assert "https://example.com" not in [ro] + + @pytest.mark.parametrize( + "spoof", + [ + "https://attacker-example.com", + "https://x.example.com.attacker.com", + "https://example.com.attacker.com", + "http://app.example.com", # wrong scheme # NOSONAR — test input asserting http is rejected + "https://app.example.com\n", # trailing newline + ], + ) + def test_spoofed_origins_rejected(self, spoof): + _, _, pattern = normalize_web_app_origin("https://example.com") + ro = RegexOrigin(pattern) + assert spoof not in [ro], f"should reject {spoof!r}" + + def test_uppercase_env_still_accepts_lowercase_origin(self): + """Browser sends lowercase even if the env was set with uppercase; + normalization must canonicalize before regex compilation.""" + _, _, pattern = normalize_web_app_origin("https://APP.EXAMPLE.COM") + ro = RegexOrigin(pattern) + assert "https://sub.app.example.com" in [ro] diff --git a/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx b/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx index 65460376d4..17066ed06b 100644 --- a/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx +++ b/frontend/src/components/pipelines-or-deployments/notification-modal/CreateNotification.jsx @@ -2,6 +2,12 @@ import { Button, Checkbox, Form, Input, Select, Space } from "antd"; import PropTypes from "prop-types"; import { useEffect, useState } from "react"; import { getBackendErrorDetail } from "../../../helpers/GetStaticData"; +import { useAxiosPrivate } from "../../../hooks/useAxiosPrivate"; +import { useSessionStore } from "../../../store/session-store"; + +// Used only when the org's batch interval can't be fetched (network or auth +// failure). Backend's env-derived default is also 30 min, so this matches. +const FALLBACK_BATCH_INTERVAL_MINUTES = 30; const DEFAULT_FORM_DETAILS = { name: "", @@ -13,6 +19,7 @@ const DEFAULT_FORM_DETAILS = { is_active: false, max_retries: 0, notify_on_failures: false, + delivery_mode: "IMMEDIATE", pipeline: "", api: "", url: "", @@ -68,6 +75,37 @@ function CreateNotification({ const [formDetails, setFormDetails] = useState(DEFAULT_FORM_DETAILS); const [backendErrors, setBackendErrors] = useState(null); const [resetForm, setResetForm] = useState(false); + const [batchIntervalMinutes, setBatchIntervalMinutes] = useState( + FALLBACK_BATCH_INTERVAL_MINUTES, + ); + const axiosPrivate = useAxiosPrivate(); + const { sessionDetails } = useSessionStore(); + + useEffect(() => { + // Read live org-scoped interval (UNS-611 v2). Fall back silently to the + // hardcoded 30-min default — the dropdown still labels something useful. + if (!sessionDetails?.orgId) { + return; + } + axiosPrivate({ + method: "GET", + url: `/api/v1/unstract/${sessionDetails.orgId}/notifications/settings/`, + }) + .then((res) => { + const seconds = res?.data?.club_interval_seconds; + if (typeof seconds === "number" && seconds > 0) { + setBatchIntervalMinutes(Math.max(1, Math.round(seconds / 60))); + } + }) + .catch(() => { + // Non-fatal — keep fallback. + }); + }, [sessionDetails?.orgId]); + + const deliveryModes = [ + { value: "IMMEDIATE", label: "Immediate" }, + { value: "BATCHED", label: "Batched" }, + ]; useEffect(() => { if (editDetails) { @@ -84,7 +122,18 @@ function CreateNotification({ }, [formDetails]); const handleInputChange = (changedValues, allValues) => { - setFormDetails({ ...formDetails, ...allValues }); + let nextValues = { ...formDetails, ...allValues }; + // Failure alerts must not be delayed by the batch window — auto-select + // IMMEDIATE the moment the box is checked. The user can still override + // to BATCHED afterward and that choice will stick. + if ( + Object.hasOwn(changedValues, "notify_on_failures") && + changedValues.notify_on_failures === true + ) { + nextValues = { ...nextValues, delivery_mode: "IMMEDIATE" }; + form.setFieldsValue({ delivery_mode: "IMMEDIATE" }); + } + setFormDetails(nextValues); const changedFieldName = Object.keys(changedValues)[0]; form.setFields([ { @@ -228,6 +277,18 @@ function CreateNotification({ > Notify on failures only + + + + + Notify on failures only + + diff --git a/frontend/src/components/settings/platform/PlatformSettings.jsx b/frontend/src/components/settings/platform/PlatformSettings.jsx index 0b39cebecf..0bbcb641b6 100644 --- a/frontend/src/components/settings/platform/PlatformSettings.jsx +++ b/frontend/src/components/settings/platform/PlatformSettings.jsx @@ -77,10 +77,14 @@ function PlatformSettings() { }, []); const handleSaveInterval = () => { - if (!batchIntervalMinutes || batchIntervalMinutes < 1) { + if ( + !batchIntervalMinutes || + batchIntervalMinutes < 1 || + batchIntervalMinutes > 120 + ) { setAlertDetails({ type: "error", - content: "Batch interval must be a positive number of minutes.", + content: "Notification interval must be between 1 and 120 minutes.", }); return; } @@ -318,6 +322,7 @@ function PlatformSettings() {
+ Internal API Keys {keys.map((keyDetails, keyIndex) => { return (
@@ -398,21 +403,17 @@ function PlatformSettings() {
- - Notification batching - - - Batched notifications enqueued after a change pick up the new - value; in-flight rows keep their original cadence. - + Notifications
- Batch interval (minutes) + + Notification interval (minutes, 1–120) + setBatchIntervalMinutes(v)} - placeholder="e.g. 30" />
From 50917e8289def43671447a3104e79d2ddf8270a9 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 14:44:20 +0530 Subject: [PATCH 13/27] PR reviews --- backend/api_v2/notification.py | 7 +- backend/notification_v2/clubbed_renderer.py | 2 +- backend/notification_v2/enums.py | 7 + backend/notification_v2/helper.py | 12 +- backend/notification_v2/internal_api_views.py | 132 +++++++++++------- .../0003_add_notification_buffer.py | 8 +- backend/notification_v2/models.py | 10 +- backend/notification_v2/tasks.py | 2 +- backend/pipeline_v2/notification.py | 7 +- .../workflow_manager/internal_serializers.py | 34 +++++ .../settings/platform/PlatformSettings.jsx | 7 +- .../shared/patterns/notification/helper.py | 60 ++++---- 12 files changed, 191 insertions(+), 97 deletions(-) diff --git a/backend/api_v2/notification.py b/backend/api_v2/notification.py index 4cefe92d24..d5d2dca9df 100644 --- a/backend/api_v2/notification.py +++ b/backend/api_v2/notification.py @@ -1,9 +1,9 @@ import logging +from notification_v2.enums import FAILURE_STATUSES from notification_v2.helper import dispatch_with_delivery_mode from notification_v2.models import Notification from pipeline_v2.dto import PipelineStatusPayload -from workflow_manager.workflow_v2.enums import ExecutionStatus from workflow_manager.workflow_v2.models.execution import WorkflowExecution from api_v2.models import APIDeployment @@ -11,9 +11,6 @@ logger = logging.getLogger(__name__) -_FAILURE_STATUSES = {ExecutionStatus.ERROR.value, ExecutionStatus.STOPPED.value} - - class APINotification: def __init__(self, api: APIDeployment, workflow_execution: WorkflowExecution) -> None: self.notifications = Notification.objects.filter(api=api, is_active=True) @@ -26,7 +23,7 @@ def send(self) -> None: # status check alone misses them — see callback aggregation rules. failed_files = self.workflow_execution.failed_files or 0 is_failure = ( - self.workflow_execution.status in _FAILURE_STATUSES or failed_files > 0 + self.workflow_execution.status in FAILURE_STATUSES or failed_files > 0 ) if not is_failure: # Success path: skip rows that opted into failure-only alerts. diff --git a/backend/notification_v2/clubbed_renderer.py b/backend/notification_v2/clubbed_renderer.py index fc85120f7a..1974a896d3 100644 --- a/backend/notification_v2/clubbed_renderer.py +++ b/backend/notification_v2/clubbed_renderer.py @@ -1,6 +1,6 @@ """Canonical envelope + renderer for every dispatch — IMMEDIATE and BATCHED. -The same envelope shape feeds every channel × mode cell so receivers never +The same envelope shape feeds every channel x mode cell so receivers never need to branch on "is this batched?": { diff --git a/backend/notification_v2/enums.py b/backend/notification_v2/enums.py index d6fed8b485..9e694e128f 100644 --- a/backend/notification_v2/enums.py +++ b/backend/notification_v2/enums.py @@ -1,5 +1,12 @@ from enum import Enum +from workflow_manager.workflow_v2.enums import ExecutionStatus + +# Single source of truth for "did this run fail for notification routing?". +# STOPPED is intentionally a failure here per migrations/0002_…notify_on_failures +# db_comment ("terminal status ERROR/STOPPED or any file in the run errored"). +FAILURE_STATUSES = frozenset({ExecutionStatus.ERROR.value, ExecutionStatus.STOPPED.value}) + class NotificationType(Enum): WEBHOOK = "WEBHOOK" diff --git a/backend/notification_v2/helper.py b/backend/notification_v2/helper.py index 908c0dd993..4a6c5cc907 100644 --- a/backend/notification_v2/helper.py +++ b/backend/notification_v2/helper.py @@ -1,4 +1,5 @@ import hashlib +import json import logging from collections.abc import Iterable from datetime import timedelta @@ -26,17 +27,20 @@ def compute_auth_sig(notification: Notification) -> str: - """SHA-256 hex of (auth_type + auth_key + auth_header) — never raw creds. + """SHA-256 hex of (auth_type, auth_key, auth_header) — never raw creds. Identical auth configs produce the same sig (so grouping clubs them); - differing configs split into separate groups. + differing configs split into separate groups. The tuple is JSON-encoded + before hashing so a literal delimiter byte inside auth_key/header cannot + cause two distinct tuples to collapse to the same digest. """ - raw = "|".join( + raw = json.dumps( [ notification.authorization_type or _AUTH_SIG_NONE, notification.authorization_key or _AUTH_SIG_NONE, notification.authorization_header or _AUTH_SIG_NONE, - ] + ], + separators=(",", ":"), ) return hashlib.sha256(raw.encode("utf-8")).hexdigest() diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index bf07e2c9c4..9a9c0e4a71 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -25,12 +25,11 @@ from django.views.decorators.http import require_http_methods from pipeline_v2.models import Pipeline from utils.organization_utils import filter_queryset_by_organization -from workflow_manager.workflow_v2.enums import ExecutionStatus from workflow_manager.workflow_v2.models.execution import WorkflowExecution from backend.celery_service import app as celery_app from notification_v2.clubbed_renderer import render_clubbed_message -from notification_v2.enums import BufferStatus, DeliveryMode +from notification_v2.enums import FAILURE_STATUSES, BufferStatus, DeliveryMode from notification_v2.helper import ( build_webhook_headers, enqueue, @@ -43,8 +42,6 @@ # Constants for error messages INTERNAL_SERVER_ERROR_MSG = "Internal server error" -_FAILURE_STATUSES = {ExecutionStatus.ERROR.value, ExecutionStatus.STOPPED.value} - def _load_execution(execution_id: str | None) -> WorkflowExecution | None: """Best-effort lookup; returns None on missing id or unknown row.""" @@ -73,7 +70,7 @@ def _apply_failure_filter( if execution is None: return notifications_qs failed_files = execution.failed_files or 0 - is_failure = execution.status in _FAILURE_STATUSES or failed_files > 0 + is_failure = execution.status in FAILURE_STATUSES or failed_files > 0 if not is_failure: notifications_qs = notifications_qs.filter(notify_on_failures=False) return notifications_qs @@ -387,12 +384,71 @@ def _gc_terminal_rows() -> int: return int(deleted_count) +def _send_clubbed( + *, + url: str, + body: Any, + headers: dict[str, str], + platform: str, + max_retries: int, + buffer_ids: list[str], + org_id: Any, +) -> None: + """Send the clubbed Celery task after the DB transition has committed. + + Runs as a ``transaction.on_commit`` callback so a rolled-back UPDATE can + never leave a broker-queued message orphaned (the prior order — send + then update — risked duplicate delivery if the UPDATE failed). On broker + failure we revert rows back to PENDING in a separate transaction so the + next flush tick retries cleanly. + """ + try: + celery_app.send_task( + "send_webhook_notification", + args=[url, body, headers, settings.NOTIFICATION_TIMEOUT], + kwargs={ + "max_retries": max_retries, + "retry_delay": 10, + "platform": platform, + }, + queue="notifications", + link_error=celery_app.signature( + "notification_v2.mark_buffer_dead_letter", + kwargs={"buffer_row_ids": buffer_ids}, + ), + ) + logger.info( + "metric=notification_batch_dispatched_total platform=%s result=success " + "org_id=%s webhook_url_hash=%s rows=%d", + platform, + org_id, + webhook_url_hash(url), + len(buffer_ids), + ) + except Exception: + logger.exception( + "metric=notification_batch_dispatched_total platform=%s " + "result=broker_failure org_id=%s webhook_url_hash=%s rows=%d", + platform, + org_id, + webhook_url_hash(url), + len(buffer_ids), + ) + # Revert outside the committed transaction so a transient broker + # outage degrades to "retried next tick" rather than "stuck DISPATCHED". + NotificationBuffer.objects.filter(id__in=buffer_ids).update( + status=BufferStatus.PENDING.value, + dispatched_at=None, + ) + + def _dispatch_group( org_id: Any, webhook_url: str, auth_sig: str, + platform: str, ) -> tuple[int, int]: - """Dispatch a single (org, url, auth_sig) group; returns (rows, succeeded). + """Dispatch a single (org, url, auth_sig, platform) group; returns (rows, succeeded). Caller already filtered groups to MIN(flush_after) <= now. Locks rows with SKIP LOCKED so a sibling replica skips them rather than blocking. @@ -407,6 +463,7 @@ def _dispatch_group( organization_id=org_id, webhook_url=webhook_url, auth_sig=auth_sig, + platform=platform, ) .order_by("created_at")[:_PROCESS_BUFFER_CAP] ) @@ -417,57 +474,33 @@ def _dispatch_group( return 0, 0 # Live auth — read from the FIRST row's notification. If multiple - # notifications collide on (url, auth_sig) we have, by definition, - # identical auth, so this is safe. + # notifications collide on (url, auth_sig, platform) we have, by + # definition, identical auth + format, so this is safe. first_notification = rows[0].notification - platform = rows[0].platform payloads = [r.payload for r in rows] body = render_clubbed_message(payloads, platform) headers = build_webhook_headers(first_notification) - buffer_ids = [str(r.id) for r in rows] - try: - celery_app.send_task( - "send_webhook_notification", - args=[ - first_notification.url, - body, - headers, - settings.NOTIFICATION_TIMEOUT, - ], - kwargs={ - "max_retries": first_notification.max_retries, - "retry_delay": 10, - "platform": platform, - }, - queue="notifications", - link_error=celery_app.signature( - "notification_v2.mark_buffer_dead_letter", - kwargs={"buffer_row_ids": buffer_ids}, - ), - ) - except Exception: - # Broker hiccup — leave rows PENDING for the next tick rather - # than mark them DEAD_LETTER. `exception` keeps stack context. - logger.exception( - "Broker dispatch failed for group org=%s url_hash=%s", - org_id, - webhook_url_hash(webhook_url), - ) - return 0, 0 + # Mark DISPATCHED first; if commit succeeds the on_commit hook + # publishes the broker task. If commit fails, rows stay PENDING and + # no task is published — eliminates the broker-vs-DB duplicate-send + # race that bit us when the order was reversed. now = timezone.now() NotificationBuffer.objects.filter(id__in=buffer_ids).update( status=BufferStatus.DISPATCHED.value, dispatched_at=now, ) - logger.info( - "metric=notification_batch_dispatched_total platform=%s result=success " - "org_id=%s webhook_url_hash=%s rows=%d", - platform, - org_id, - webhook_url_hash(webhook_url), - len(rows), + transaction.on_commit( + lambda: _send_clubbed( + url=first_notification.url, + body=body, + headers=headers, + platform=platform, + max_retries=first_notification.max_retries, + buffer_ids=buffer_ids, + org_id=org_id, + ) ) return len(rows), len(rows) @@ -484,9 +517,9 @@ def process_notification_buffer(request: HttpRequest) -> JsonResponse: """Flush PENDING groups that have hit their flush_after; then GC. Algorithm: - 1. GROUP BY (org, url, auth_sig), HAVING MIN(flush_after) <= NOW() + 1. GROUP BY (org, url, auth_sig, platform), HAVING MIN(flush_after) <= NOW() 2. For each group, in its own transaction: lock-skip-locked rows, - render, dispatch a single Celery task, mark rows DISPATCHED. + render, mark rows DISPATCHED, on_commit-dispatch a single Celery task. 3. Sweep terminal rows older than NOTIFICATION_BUFFER_RETENTION_DAYS. Concurrency: SELECT FOR UPDATE SKIP LOCKED makes parallel calls safe — @@ -495,7 +528,7 @@ def process_notification_buffer(request: HttpRequest) -> JsonResponse: now = timezone.now() groups = list( NotificationBuffer.objects.filter(status=BufferStatus.PENDING.value) - .values("organization_id", "webhook_url", "auth_sig") + .values("organization_id", "webhook_url", "auth_sig", "platform") .annotate(earliest_flush=Min("flush_after")) .filter(earliest_flush__lte=now) ) @@ -508,6 +541,7 @@ def process_notification_buffer(request: HttpRequest) -> JsonResponse: org_id=group["organization_id"], webhook_url=group["webhook_url"], auth_sig=group["auth_sig"], + platform=group["platform"], ) except Exception: logger.exception( diff --git a/backend/notification_v2/migrations/0003_add_notification_buffer.py b/backend/notification_v2/migrations/0003_add_notification_buffer.py index fd66f014ba..5090eff18a 100644 --- a/backend/notification_v2/migrations/0003_add_notification_buffer.py +++ b/backend/notification_v2/migrations/0003_add_notification_buffer.py @@ -141,7 +141,13 @@ class Migration(migrations.Migration): model_name="notificationbuffer", index=models.Index( condition=models.Q(("status", "PENDING")), - fields=["organization", "webhook_url", "auth_sig", "flush_after"], + fields=[ + "organization", + "webhook_url", + "auth_sig", + "platform", + "flush_after", + ], name="idx_notif_buffer_pending", ), ), diff --git a/backend/notification_v2/models.py b/backend/notification_v2/models.py index 9c7d136534..ea8b68f4ad 100644 --- a/backend/notification_v2/models.py +++ b/backend/notification_v2/models.py @@ -195,8 +195,16 @@ class Meta: indexes = [ # Partial covering index — supports Index Only Scans on the flush # GROUP BY query and bounds index size to live PENDING backlog. + # `platform` is part of the grouping key so SLACK and API rows on + # the same (org, url, auth) split into separate dispatches. models.Index( - fields=["organization", "webhook_url", "auth_sig", "flush_after"], + fields=[ + "organization", + "webhook_url", + "auth_sig", + "platform", + "flush_after", + ], name="idx_notif_buffer_pending", condition=models.Q(status=BufferStatus.PENDING.value), ), diff --git a/backend/notification_v2/tasks.py b/backend/notification_v2/tasks.py index a143f9295e..f14a07ec55 100644 --- a/backend/notification_v2/tasks.py +++ b/backend/notification_v2/tasks.py @@ -44,7 +44,7 @@ def mark_buffer_dead_letter( status=BufferStatus.DEAD_LETTER.value ) logger.warning( - "metric=notification_batch_dispatched_total result=dead_letter rows=%d " "exc=%r", + "metric=notification_batch_dispatched_total result=dead_letter rows=%d exc=%r", updated, exc, ) diff --git a/backend/pipeline_v2/notification.py b/backend/pipeline_v2/notification.py index 5a40a37506..9537cad47b 100644 --- a/backend/pipeline_v2/notification.py +++ b/backend/pipeline_v2/notification.py @@ -1,8 +1,8 @@ import logging +from notification_v2.enums import FAILURE_STATUSES from notification_v2.helper import dispatch_with_delivery_mode from notification_v2.models import Notification -from workflow_manager.workflow_v2.enums import ExecutionStatus from workflow_manager.workflow_v2.models.execution import WorkflowExecution from pipeline_v2.dto import PipelineStatusPayload @@ -11,9 +11,6 @@ logger = logging.getLogger(__name__) -_FAILURE_STATUSES = {ExecutionStatus.ERROR.value, ExecutionStatus.STOPPED.value} - - class PipelineNotification: def __init__( self, @@ -54,7 +51,7 @@ def send(self) -> None: failed_files = (execution.failed_files or 0) if execution else 0 execution_status = execution.status if execution else None is_failure = ( - execution_status in _FAILURE_STATUSES + execution_status in FAILURE_STATUSES or failed_files > 0 or self.pipeline.last_run_status == Pipeline.PipelineStatus.FAILURE ) diff --git a/backend/workflow_manager/internal_serializers.py b/backend/workflow_manager/internal_serializers.py index cd221470cb..951bf1e54d 100644 --- a/backend/workflow_manager/internal_serializers.py +++ b/backend/workflow_manager/internal_serializers.py @@ -183,6 +183,40 @@ class WorkflowExecutionStatusUpdateSerializer(serializers.Serializer): attempts = serializers.IntegerField(required=False, min_value=0) execution_time = serializers.FloatField(required=False, min_value=0) + def validate(self, attrs): + """Reject impossible file-count aggregates. + + Per-field min_value=0 catches negatives, but successful + failed > + total or either component > total slips through and skews the + outcome-based notification filter downstream. + """ + total = attrs.get("total_files") + successful = attrs.get("successful_files") + failed = attrs.get("failed_files") + + if total is None: + if successful is not None or failed is not None: + raise serializers.ValidationError( + { + "total_files": "total_files is required when file aggregates are provided." + } + ) + return attrs + + if successful is not None and successful > total: + raise serializers.ValidationError( + {"successful_files": "successful_files cannot exceed total_files."} + ) + if failed is not None and failed > total: + raise serializers.ValidationError( + {"failed_files": "failed_files cannot exceed total_files."} + ) + if successful is not None and failed is not None and successful + failed > total: + raise serializers.ValidationError( + "successful_files + failed_files cannot exceed total_files." + ) + return attrs + class OrganizationContextSerializer(serializers.Serializer): """Serializer for organization context information.""" diff --git a/frontend/src/components/settings/platform/PlatformSettings.jsx b/frontend/src/components/settings/platform/PlatformSettings.jsx index 0bbcb641b6..2b08c8e0ba 100644 --- a/frontend/src/components/settings/platform/PlatformSettings.jsx +++ b/frontend/src/components/settings/platform/PlatformSettings.jsx @@ -59,6 +59,11 @@ function PlatformSettings() { const { setPostHogCustomEvent } = usePostHogEvents(); useEffect(() => { + // Wait for session hydration — without this guard the first render + // fires GET against /api/v1/unstract/undefined/... and silently 404s. + if (!sessionDetails?.orgId) { + return; + } // Load org-scoped batch interval (UNS-611 v2). Falls back silently to // null on failure so the rest of the page still renders. axiosPrivate({ @@ -74,7 +79,7 @@ function PlatformSettings() { .catch(() => { // Non-fatal — admin just won't see a pre-filled value. }); - }, []); + }, [sessionDetails?.orgId]); const handleSaveInterval = () => { if ( diff --git a/workers/shared/patterns/notification/helper.py b/workers/shared/patterns/notification/helper.py index 0148bc2f5f..460c308146 100644 --- a/workers/shared/patterns/notification/helper.py +++ b/workers/shared/patterns/notification/helper.py @@ -29,27 +29,23 @@ def _enqueue_to_buffer( api_client: Any, notification: dict[str, Any], payload: NotificationPayload, -) -> bool: +) -> None: """POST a single execution event to the backend's buffer endpoint. Worker writes nothing to the DB itself — the backend owns NotificationBuffer - rows. Returns True on success so callers can fall back to immediate dispatch - if the buffer endpoint is unavailable. The fallback decision is opinionated: - we keep behavior conservative and DON'T fall back so a misconfigured or - outage-mode backend can't silently turn BATCHED into IMMEDIATE. + rows. Raises on any failure so the outer trigger_* caller's except block + logs the drop instead of silently treating BATCHED delivery as successful. """ + # Forward the full per-event shape so the backend renderer can match + # IMMEDIATE's KV layout per event (Type / Pipeline Id / Pipeline Name / + # Status / Execution Id / Timestamp / Additional Data). Older backend + # builds that ignore the extra fields stay unaffected. + payload_type = payload.type.value if hasattr(payload.type, "value") else payload.type + payload_status = ( + payload.status.value if hasattr(payload.status, "value") else payload.status + ) + payload_timestamp = payload.timestamp.isoformat() if payload.timestamp else None try: - # Forward the full per-event shape so the backend renderer can match - # IMMEDIATE's KV layout per event (Type / Pipeline Id / Pipeline Name - # / Status / Execution Id / Timestamp / Additional Data). Older - # backend builds that ignore the extra fields stay unaffected. - payload_type = ( - payload.type.value if hasattr(payload.type, "value") else payload.type - ) - payload_status = ( - payload.status.value if hasattr(payload.status, "value") else payload.status - ) - payload_timestamp = payload.timestamp.isoformat() if payload.timestamp else None api_client._make_request( method="POST", endpoint=ENQUEUE_BUFFER_ENDPOINT, @@ -67,21 +63,19 @@ def _enqueue_to_buffer( }, timeout=10, ) - logger.info( - "Enqueued BATCHED notification %s for pipeline %s execution %s", - notification["id"], - payload.pipeline_id, - payload.execution_id, - ) - return True - except Exception as e: - logger.error( - "Failed to enqueue BATCHED notification %s for pipeline %s: %s", + except Exception: # noqa: BLE001 — propagate any failure, don't classify + logger.exception( + "Failed to enqueue BATCHED notification %s for pipeline %s", notification["id"], payload.pipeline_id, - e, ) - return False + raise + logger.info( + "Enqueued BATCHED notification %s for pipeline %s execution %s", + notification["id"], + payload.pipeline_id, + payload.execution_id, + ) def _route_notification( @@ -102,7 +96,15 @@ def _route_notification( return if notification.get("delivery_mode") == DELIVERY_MODE_BATCHED: - _enqueue_to_buffer(api_client, notification, payload) + try: + _enqueue_to_buffer(api_client, notification, payload) + except Exception: # noqa: BLE001 — already logged with stack inside + # Surface but don't abort the outer trigger_* loop — sibling + # BATCHED notifications still deserve their enqueue attempt. + logger.warning( + "BATCHED enqueue failed for notification %s; continuing with others", + notification.get("id"), + ) return send_notification_to_worker( From 92ad063e4991ba8d3bfeca95d185402148e118cf Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 15:59:27 +0530 Subject: [PATCH 14/27] sonar issues --- backend/notification_v2/clubbed_renderer.py | 185 ++---------------- .../workflow_manager/internal_serializers.py | 4 +- .../settings/platform/PlatformSettings.jsx | 2 + .../core/notification_clubbed_renderer.py | 85 ++++++-- workers/notification/providers/api_webhook.py | 3 +- .../notification/providers/slack_webhook.py | 9 +- 6 files changed, 94 insertions(+), 194 deletions(-) rename workers/notification/providers/_clubbed_format.py => unstract/core/src/unstract/core/notification_clubbed_renderer.py (53%) diff --git a/backend/notification_v2/clubbed_renderer.py b/backend/notification_v2/clubbed_renderer.py index 1974a896d3..a0096197bd 100644 --- a/backend/notification_v2/clubbed_renderer.py +++ b/backend/notification_v2/clubbed_renderer.py @@ -1,195 +1,46 @@ -"""Canonical envelope + renderer for every dispatch — IMMEDIATE and BATCHED. - -The same envelope shape feeds every channel x mode cell so receivers never -need to branch on "is this batched?": - - { - "summary": {"total": N, "succeeded": S, "failed": F}, - "events": [ - { - "type": "ETL" | "TASK" | "API", - "pipeline_name": "...", - "status": "ERROR" | "SUCCESS" | ..., - "execution_id": "...", - "timestamp": "2026 May 5 5:03:34 PM", - "additional_data": { - "total_files": int, - "successful_files": int, - "failed_files": int, - }, - "error_message": "...", # only on failure - }, - ... - ] - } - -Slack receives `{"text": ""}` pre-rendered from this envelope; API -receivers see the envelope unchanged so programmatic consumers always parse -the same shape. `pipeline_id` is intentionally absent from every event dict. +"""Backend dispatch entry for clubbed-notification rendering. + +Delegates the canonical envelope + Slack body to +``unstract.core.notification_clubbed_renderer`` so backend dispatches and +worker IMMEDIATE callbacks emit byte-identical receiver-visible payloads. +This thin shim keeps the ``render_clubbed_message`` platform dispatcher +(uses ``PlatformType`` enum) backend-side; everything else lives in the +shared module. """ from __future__ import annotations -import datetime import logging from typing import Any from notification_v2.enums import PlatformType +from unstract.core.notification_clubbed_renderer import ( + build_envelope, + render_slack_text, +) logger = logging.getLogger(__name__) -# Hard cap on events per dispatch; the rest roll into the next flush tick. -MAX_BATCH_SIZE = 500 -# Slack inlines this many events before collapsing the rest under an -# "_… and K more_" footer. Slack tolerates much larger payloads, but -# readability tanks past ~25 lines. -SLACK_MAX_DISPLAY_EVENTS = 25 - -_SUCCESS_STATUSES = {"COMPLETED", "SUCCESS"} - -# Middle dot (U+00B7) padded by single spaces — the per-event field separator. -_SEPARATOR = " · " -_MISSING = "—" # em-dash placeholder for missing fields -_DIVIDER = "———" # triple em-dash divider between header and events - -# Slack emoji shortcodes — render the same as the literal unicode glyphs and -# stay readable in source. -_EMOJI_SUCCESS = ":white_check_mark:" -_EMOJI_FAILURE = ":x:" - - -def _is_success(status: str | None) -> bool: - if not status: - return False - return status.upper() in _SUCCESS_STATUSES - - -def _humanize_timestamp(iso: str | None) -> str: - """Render an ISO timestamp as `2026 May 11 11:38:31 AM` (POSIX `%-d`). - - Falls back to the missing placeholder on falsy / unparseable input so a - partial row still renders without raising. - """ - if not iso: - return _MISSING - try: - dt = datetime.datetime.fromisoformat(iso) - except (TypeError, ValueError): - return _MISSING - return dt.strftime("%Y %b %-d %I:%M:%S %p") - - -def _format_file_count(event: dict[str, Any]) -> str: - """Render the file-count summary; empty string when no totals available.""" - counts = event.get("additional_data") or {} - total = counts.get("total_files") - if total is None: - return "" - if _is_success(event.get("status")): - successful = counts.get("successful_files", 0) - return f"{_EMOJI_SUCCESS} {successful}/{total} files" - failed = counts.get("failed_files", 0) - return f"{_EMOJI_FAILURE} {failed}/{total} files" - +__all__ = ["build_envelope", "render_clubbed_message"] -def _format_event_line(event: dict[str, Any]) -> str: - """Format one event as a single Slack mrkdwn line. - Fields are middle-dot separated; the file-count column is omitted when - `additional_data` is empty so the line collapses to 5 fields, not 6. - """ - parts = [ - event.get("timestamp") or _MISSING, - f"*{event.get('execution_id') or _MISSING}*", - event.get("type") or _MISSING, - event.get("pipeline_name") or _MISSING, - event.get("status") or _MISSING, - ] - file_count = _format_file_count(event) - if file_count: - parts.append(file_count) - return _SEPARATOR.join(parts) - - -def _event_from_payload(payload: dict[str, Any]) -> dict[str, Any]: - """Project a buffered payload into the canonical per-event dict. - - Unified shape across Slack/API and IMMEDIATE/BATCHED. `pipeline_id` is - intentionally dropped here — neither channel surfaces it. Timestamps are - humanized once at projection so Slack and API consumers see the same - string (implicit UTC, no timezone suffix). - """ - event: dict[str, Any] = { - "type": payload.get("type") or "", - "pipeline_name": payload.get("pipeline_name") or "", - "status": payload.get("status") or "", - "execution_id": payload.get("execution_id") or "", - "timestamp": _humanize_timestamp(payload.get("timestamp")), - "additional_data": payload.get("additional_data") or {}, - } - error_message = payload.get("error_message") - if error_message: - event["error_message"] = error_message - return event - - -def build_envelope(payloads: list[dict[str, Any]]) -> dict[str, Any]: - """Build the canonical envelope used by every dispatch path. - - Summary carries only `{total, succeeded, failed}` — same shape for - IMMEDIATE and BATCHED so receivers parse one envelope, not two. - """ - capped = payloads[:MAX_BATCH_SIZE] - succeeded = sum(1 for p in capped if _is_success(p.get("status"))) - failed = len(capped) - succeeded - return { - "summary": { - "total": len(capped), - "succeeded": succeeded, - "failed": failed, - }, - "events": [_event_from_payload(p) for p in capped], - } - - -def render_for_slack(envelope: dict[str, Any]) -> dict[str, Any]: - """Render the envelope as `{"text": ""}` for Slack. - - Header + divider are emitted for every dispatch — IMMEDIATE, BATCHED N=1, - and BATCHED N>1 all share the same shape. Visible events are capped at - SLACK_MAX_DISPLAY_EVENTS with an `_… and K more_` overflow footer. - """ - summary = envelope["summary"] - events: list[dict[str, Any]] = envelope["events"] - total = summary["total"] - noun = "execution" if total == 1 else "executions" - header = ( - f"*{total} {noun}* " - f"({_EMOJI_SUCCESS} {summary['succeeded']} succeeded " - f"{_EMOJI_FAILURE} {summary['failed']} failed)" - ) - visible = events[:SLACK_MAX_DISPLAY_EVENTS] - sections: list[str] = [header, _DIVIDER] - sections.extend(_format_event_line(e) for e in visible) - overflow = len(events) - len(visible) - if overflow > 0: - sections.append(_DIVIDER) - sections.append(f"_… and {overflow} more executions_") - return {"text": "\n".join(sections)} +def _render_for_slack(envelope: dict[str, Any]) -> dict[str, Any]: + """Wrap the rendered Slack mrkdwn body in the dict shape Slack expects.""" + return {"text": render_slack_text(envelope)} def render_clubbed_message( payloads: list[dict[str, Any]], platform: str, ) -> dict[str, Any]: - """Top-level entry — returns the dispatch body for `platform`. + """Top-level entry — returns the dispatch body for ``platform``. Used by every dispatch site (BATCHED flush, IMMEDIATE backend providers) so the receiver-visible payload is identical regardless of mode. """ envelope = build_envelope(payloads) if platform == PlatformType.SLACK.value: - return render_for_slack(envelope) + return _render_for_slack(envelope) if platform == PlatformType.API.value: return envelope # Unknown platform — fall back to the raw envelope and warn so misrouted diff --git a/backend/workflow_manager/internal_serializers.py b/backend/workflow_manager/internal_serializers.py index 951bf1e54d..2e43f66630 100644 --- a/backend/workflow_manager/internal_serializers.py +++ b/backend/workflow_manager/internal_serializers.py @@ -213,7 +213,9 @@ def validate(self, attrs): ) if successful is not None and failed is not None and successful + failed > total: raise serializers.ValidationError( - "successful_files + failed_files cannot exceed total_files." + { + "non_field_errors": "successful_files + failed_files cannot exceed total_files." + } ) return attrs diff --git a/frontend/src/components/settings/platform/PlatformSettings.jsx b/frontend/src/components/settings/platform/PlatformSettings.jsx index 2b08c8e0ba..f2eee087b0 100644 --- a/frontend/src/components/settings/platform/PlatformSettings.jsx +++ b/frontend/src/components/settings/platform/PlatformSettings.jsx @@ -417,6 +417,8 @@ function PlatformSettings() { setBatchIntervalMinutes(v)} /> diff --git a/workers/notification/providers/_clubbed_format.py b/unstract/core/src/unstract/core/notification_clubbed_renderer.py similarity index 53% rename from workers/notification/providers/_clubbed_format.py rename to unstract/core/src/unstract/core/notification_clubbed_renderer.py index d489d76ec7..9571bdfeb7 100644 --- a/workers/notification/providers/_clubbed_format.py +++ b/unstract/core/src/unstract/core/notification_clubbed_renderer.py @@ -1,12 +1,33 @@ -"""Worker-side mirror of backend/notification_v2/clubbed_renderer. - -Producing the same envelope shape and Slack mrkdwn body the backend renders -so worker-callback IMMEDIATE payloads (flat per-event dicts) match the -canonical wire format used by backend BATCHED dispatches. Backend pre-renders -for its own dispatches — this module covers only the worker-callback IMMEDIATE -path. Keep the constants and string output byte-identical to -`backend/notification_v2/clubbed_renderer.py`; promote to `unstract/core/` if -a third site ever needs the same logic. +"""Shared clubbed-notification envelope + Slack renderer. + +Imported by both `backend/notification_v2/clubbed_renderer.py` and the +worker `notification/providers/*_webhook.py` so the receiver-visible +payload (envelope JSON for API, mrkdwn string for Slack) is byte-identical +regardless of which side rendered it. + +Envelope shape: + + { + "summary": {"total": N, "succeeded": S, "failed": F}, + "events": [ + { + "type": "ETL" | "TASK" | "API", + "pipeline_name": "...", + "status": "ERROR" | "SUCCESS" | ..., + "execution_id": "...", + "timestamp": "2026 May 5 5:03:34 PM", + "additional_data": { + "total_files": int, + "successful_files": int, + "failed_files": int, + }, + "error_message": "...", # only on failure + }, + ... + ] + } + +`pipeline_id` is intentionally absent — neither channel surfaces it. """ from __future__ import annotations @@ -14,13 +35,22 @@ import datetime from typing import Any +# Hard cap on events per dispatch; the rest roll into the next flush tick. MAX_BATCH_SIZE = 500 +# Slack inlines this many events before collapsing the rest under an +# "_… and K more_" footer. Slack tolerates much larger payloads, but +# readability tanks past ~25 lines. SLACK_MAX_DISPLAY_EVENTS = 25 -_SUCCESS_STATUSES = {"COMPLETED", "SUCCESS"} +_SUCCESS_STATUSES = frozenset({"COMPLETED", "SUCCESS"}) + +# Middle dot (U+00B7) padded by single spaces — the per-event field separator. _SEPARATOR = " · " -_MISSING = "—" -_DIVIDER = "———" +_MISSING = "—" # em-dash placeholder for missing fields +_DIVIDER = "———" # triple em-dash divider between header and events + +# Slack emoji shortcodes — render the same as the literal unicode glyphs and +# stay readable in source. _EMOJI_SUCCESS = ":white_check_mark:" _EMOJI_FAILURE = ":x:" @@ -32,6 +62,11 @@ def _is_success(status: str | None) -> bool: def _humanize_timestamp(iso: str | None) -> str: + """Render an ISO timestamp as `2026 May 11 11:38:31 AM` (POSIX `%-d`). + + Falls back to the missing placeholder on falsy / unparseable input so a + partial row still renders without raising. + """ if not iso: return _MISSING try: @@ -42,6 +77,7 @@ def _humanize_timestamp(iso: str | None) -> str: def _format_file_count(event: dict[str, Any]) -> str: + """Render the file-count summary; empty string when no totals available.""" counts = event.get("additional_data") or {} total = counts.get("total_files") if total is None: @@ -54,6 +90,11 @@ def _format_file_count(event: dict[str, Any]) -> str: def _format_event_line(event: dict[str, Any]) -> str: + """Format one event as a single Slack mrkdwn line. + + Fields are middle-dot separated; the file-count column is omitted when + `additional_data` is empty so the line collapses to 5 fields, not 6. + """ parts = [ event.get("timestamp") or _MISSING, f"*{event.get('execution_id') or _MISSING}*", @@ -68,11 +109,12 @@ def _format_event_line(event: dict[str, Any]) -> str: def _event_from_payload(payload: dict[str, Any]) -> dict[str, Any]: - """Project a flat per-event payload into the canonical shape. + """Project a buffered payload into the canonical per-event dict. - Drops `pipeline_id` and `_source` — neither appears in receiver-visible - output. Mirrors the backend projection (including the humanized timestamp) - so renderer input is identical. + Unified shape across Slack/API and IMMEDIATE/BATCHED. `pipeline_id` is + intentionally dropped — neither channel surfaces it. Timestamps are + humanized once at projection so Slack and API consumers see the same + string (implicit UTC, no timezone suffix). """ event: dict[str, Any] = { "type": payload.get("type") or "", @@ -89,10 +131,10 @@ def _event_from_payload(payload: dict[str, Any]) -> dict[str, Any]: def build_envelope(payloads: list[dict[str, Any]]) -> dict[str, Any]: - """Build the canonical `{summary, events}` envelope. + """Build the canonical envelope used by every dispatch path. - Summary carries only `{total, succeeded, failed}` — identical shape for - IMMEDIATE and BATCHED. + Summary carries only `{total, succeeded, failed}` — same shape for + IMMEDIATE and BATCHED so receivers parse one envelope, not two. """ capped = payloads[:MAX_BATCH_SIZE] succeeded = sum(1 for p in capped if _is_success(p.get("status"))) @@ -110,8 +152,9 @@ def build_envelope(payloads: list[dict[str, Any]]) -> dict[str, Any]: def render_slack_text(envelope: dict[str, Any]) -> str: """Render the envelope as Slack mrkdwn body text. - Always emits header + divider regardless of event count so IMMEDIATE, - BATCHED N=1, and BATCHED N>1 all share the same shape. + Header + divider are emitted for every dispatch — IMMEDIATE, BATCHED N=1, + and BATCHED N>1 all share the same shape. Visible events are capped at + SLACK_MAX_DISPLAY_EVENTS with an `_… and K more_` overflow footer. """ summary = envelope["summary"] events: list[dict[str, Any]] = envelope["events"] diff --git a/workers/notification/providers/api_webhook.py b/workers/notification/providers/api_webhook.py index 1e652e9464..cbffc0a1d4 100644 --- a/workers/notification/providers/api_webhook.py +++ b/workers/notification/providers/api_webhook.py @@ -8,10 +8,11 @@ from typing import Any -from notification.providers._clubbed_format import build_envelope from notification.providers.webhook_provider import WebhookProvider from shared.infrastructure.logging import WorkerLogger +from unstract.core.notification_clubbed_renderer import build_envelope + logger = WorkerLogger.get_logger(__name__) diff --git a/workers/notification/providers/slack_webhook.py b/workers/notification/providers/slack_webhook.py index 01f272b024..2bb757662c 100644 --- a/workers/notification/providers/slack_webhook.py +++ b/workers/notification/providers/slack_webhook.py @@ -8,12 +8,13 @@ from typing import Any -from notification.providers._clubbed_format import ( +from notification.providers.webhook_provider import WebhookProvider +from shared.infrastructure.logging import WorkerLogger + +from unstract.core.notification_clubbed_renderer import ( build_envelope, render_slack_text, ) -from notification.providers.webhook_provider import WebhookProvider -from shared.infrastructure.logging import WorkerLogger logger = WorkerLogger.get_logger(__name__) @@ -56,7 +57,7 @@ def format_payload(self, payload: dict[str, Any]) -> dict[str, Any]: wrapped in a single-event envelope and rendered to the canonical single-line mrkdwn body. """ - if "text" in payload and len(payload) == 1: + if "text" in payload and "events" not in payload: return {"text": payload["text"]} envelope = build_envelope(payloads=[payload]) From e6a87e48881adaebf49852560aede610aa97f43e Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 16:00:06 +0530 Subject: [PATCH 15/27] sonar issues --- backend/workflow_manager/internal_serializers.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/backend/workflow_manager/internal_serializers.py b/backend/workflow_manager/internal_serializers.py index 2e43f66630..506d389ef8 100644 --- a/backend/workflow_manager/internal_serializers.py +++ b/backend/workflow_manager/internal_serializers.py @@ -212,11 +212,8 @@ def validate(self, attrs): {"failed_files": "failed_files cannot exceed total_files."} ) if successful is not None and failed is not None and successful + failed > total: - raise serializers.ValidationError( - { - "non_field_errors": "successful_files + failed_files cannot exceed total_files." - } - ) + msg = "successful_files + failed_files cannot exceed total_files." + raise serializers.ValidationError({"non_field_errors": msg}) return attrs From a736bf88e8e07abd9e37723b622c748da316b451 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 16:17:38 +0530 Subject: [PATCH 16/27] code rabbit refactor --- .../core/notification_clubbed_renderer.py | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/unstract/core/src/unstract/core/notification_clubbed_renderer.py b/unstract/core/src/unstract/core/notification_clubbed_renderer.py index 9571bdfeb7..5a21bc65cb 100644 --- a/unstract/core/src/unstract/core/notification_clubbed_renderer.py +++ b/unstract/core/src/unstract/core/notification_clubbed_renderer.py @@ -61,6 +61,22 @@ def _is_success(status: str | None) -> bool: return status.upper() in _SUCCESS_STATUSES +def _has_failed_files(counts: dict[str, Any]) -> bool: + """True when file-level aggregates show at least one failure.""" + failed = counts.get("failed_files") + return isinstance(failed, int) and failed > 0 + + +def _is_effective_success(status: str | None, counts: dict[str, Any]) -> bool: + """Treat a COMPLETED run with any file failures as a partial failure. + + Mirrors the failure-filter contract at dispatch time so renderer summary + counts and the per-event file-count emoji match the reason the alert + fired. + """ + return _is_success(status) and not _has_failed_files(counts) + + def _humanize_timestamp(iso: str | None) -> str: """Render an ISO timestamp as `2026 May 11 11:38:31 AM` (POSIX `%-d`). @@ -77,11 +93,18 @@ def _humanize_timestamp(iso: str | None) -> str: def _format_file_count(event: dict[str, Any]) -> str: - """Render the file-count summary; empty string when no totals available.""" + """Render the file-count summary; empty string when no totals available. + + A COMPLETED run with file failures short-circuits to the failure shape so + the rendered line matches why a failures-only notification fired. + """ counts = event.get("additional_data") or {} total = counts.get("total_files") if total is None: return "" + if _has_failed_files(counts): + failed = counts.get("failed_files", 0) + return f"{_EMOJI_FAILURE} {failed}/{total} files" if _is_success(event.get("status")): successful = counts.get("successful_files", 0) return f"{_EMOJI_SUCCESS} {successful}/{total} files" @@ -137,7 +160,11 @@ def build_envelope(payloads: list[dict[str, Any]]) -> dict[str, Any]: IMMEDIATE and BATCHED so receivers parse one envelope, not two. """ capped = payloads[:MAX_BATCH_SIZE] - succeeded = sum(1 for p in capped if _is_success(p.get("status"))) + succeeded = sum( + 1 + for p in capped + if _is_effective_success(p.get("status"), p.get("additional_data") or {}) + ) failed = len(capped) - succeeded return { "summary": { From e6534949d63d1475f6a4581c16884bb87369027d Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 17:02:24 +0530 Subject: [PATCH 17/27] greptile comments resolve --- backend/notification_v2/internal_api_views.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 9a9c0e4a71..9eacba6c76 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -286,12 +286,11 @@ def get_api_data(request: HttpRequest, api_id: str) -> JsonResponse: ) -# Required fields on the enqueue endpoint body. Worker-side serialization -# guarantees these — keep this list in sync with -# workers/shared/patterns/notification/helper.py. +# `execution_id` is intentionally optional: the scheduler INPROGRESS path +# (workers/scheduler/tasks.py, UN-2850 / #1562) fires before WorkflowExecution +# is created. Renderer falls back to `—` for missing values. _ENQUEUE_REQUIRED_FIELDS = ( "notification_id", - "execution_id", "pipeline_id", "pipeline_name", "status", @@ -351,7 +350,7 @@ def enqueue_notification_buffer(request: HttpRequest) -> JsonResponse: # (renderer falls back to "Type: —" / no Additional Data line). payload = { "type": body.get("type", ""), - "execution_id": body["execution_id"], + "execution_id": body.get("execution_id"), "pipeline_id": body["pipeline_id"], "pipeline_name": body["pipeline_name"], "status": body["status"], From 724d280c6651a97d69ef14760b94eeeafbf4fc8a Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 17:55:05 +0530 Subject: [PATCH 18/27] UN-3056 Scope enqueue execution_id exemption to INPROGRESS Keep execution_id in _ENQUEUE_REQUIRED_FIELDS as the canonical required set; carve out the INPROGRESS exemption at the validator instead of dropping it broadly. Non-INPROGRESS callers (COMPLETED / ERROR / STOPPED / PARTIAL_SUCCESS) once again get a loud 400 if they omit execution_id, addressing Greptile's silent-failure concern on e6534949d. Extends the comment above the tuple to also flag the consumer-side gap: INPROGRESS buffer rows ship with execution_id=null, so API receivers cannot correlate them with execution logs until the producer-reorder follow-up (UN-3056) lands. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/notification_v2/internal_api_views.py | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 9eacba6c76..79f65193d8 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -286,15 +286,22 @@ def get_api_data(request: HttpRequest, api_id: str) -> JsonResponse: ) -# `execution_id` is intentionally optional: the scheduler INPROGRESS path +# All fields are required at enqueue time. `execution_id` carries one +# exemption (handled in the validator below): the scheduler INPROGRESS path # (workers/scheduler/tasks.py, UN-2850 / #1562) fires before WorkflowExecution -# is created. Renderer falls back to `—` for missing values. +# is created, so it has no execution_id to forward. Renderer falls back to +# `—` for missing values. Long-term fix belongs at the producer. +# +# Consumer-side gap: INPROGRESS buffer rows ship with execution_id=null, so +# API webhook receivers cannot correlate the event with execution logs until +# the producer-reorder follow-up (UN-3056) lands. _ENQUEUE_REQUIRED_FIELDS = ( "notification_id", "pipeline_id", "pipeline_name", "status", "platform", + "execution_id", ) @@ -315,12 +322,19 @@ def enqueue_notification_buffer(request: HttpRequest) -> JsonResponse: {"status": "error", "message": "Invalid JSON body"}, status=400 ) - missing = [f for f in _ENQUEUE_REQUIRED_FIELDS if not body.get(f)] - if missing: + missing_fields = [f for f in _ENQUEUE_REQUIRED_FIELDS if not body.get(f)] + # INPROGRESS is the one status legitimately allowed to omit execution_id + # (see comment on _ENQUEUE_REQUIRED_FIELDS). + if ( + body.get("status") == Pipeline.PipelineStatus.INPROGRESS + and "execution_id" in missing_fields + ): + missing_fields.remove("execution_id") + if missing_fields: return JsonResponse( { "status": "error", - "message": f"Missing required fields: {', '.join(missing)}", + "message": f"Missing required fields: {', '.join(missing_fields)}", }, status=400, ) From 4104fafb25eccc5bcbb654cf1bf75976569114fe Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 17:57:18 +0530 Subject: [PATCH 19/27] greptile comments resolve --- backend/notification_v2/internal_api_views.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 79f65193d8..693e2a0400 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -286,15 +286,11 @@ def get_api_data(request: HttpRequest, api_id: str) -> JsonResponse: ) -# All fields are required at enqueue time. `execution_id` carries one -# exemption (handled in the validator below): the scheduler INPROGRESS path -# (workers/scheduler/tasks.py, UN-2850 / #1562) fires before WorkflowExecution -# is created, so it has no execution_id to forward. Renderer falls back to -# `—` for missing values. Long-term fix belongs at the producer. -# -# Consumer-side gap: INPROGRESS buffer rows ship with execution_id=null, so -# API webhook receivers cannot correlate the event with execution logs until -# the producer-reorder follow-up (UN-3056) lands. +# `execution_id` is required except for INPROGRESS, which fires from the +# scheduler (workers/scheduler/tasks.py, UN-2850) before WorkflowExecution +# exists. INPROGRESS rows therefore store execution_id=null — receivers +# cannot correlate with execution logs until the producer-reorder lands +# (UN-3056). _ENQUEUE_REQUIRED_FIELDS = ( "notification_id", "pipeline_id", From 167d60f679602e575dceefa9507c81b18ffa8f22 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 18:15:39 +0530 Subject: [PATCH 20/27] UN-3056 Skip deactivated notifications in BATCHED flush _dispatch_group's lock query did not check notification.is_active, so PENDING NotificationBuffer rows tied to a deactivated source notification still dispatched on the next flush tick (up to one NOTIFICATION_CLUB_INTERVAL of stale traffic). IMMEDIATE deactivation is instant because the GET notifications endpoint filters by is_active=True; this restores the same expectation for BATCHED. Also adds select_related("notification") so the later rows[0].notification read is part of the same query rather than a per-group round-trip. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/notification_v2/internal_api_views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 693e2a0400..31d6dffe11 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -467,12 +467,14 @@ def _dispatch_group( with transaction.atomic(): rows = list( NotificationBuffer.objects.select_for_update(skip_locked=True) + .select_related("notification") .filter( status=BufferStatus.PENDING.value, organization_id=org_id, webhook_url=webhook_url, auth_sig=auth_sig, platform=platform, + notification__is_active=True, ) .order_by("created_at")[:_PROCESS_BUFFER_CAP] ) From 77b7956c2c0b1bf89ef63c8732432871013c2e6f Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 18:22:47 +0530 Subject: [PATCH 21/27] greptile comments resolve --- backend/notification_v2/internal_api_views.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 31d6dffe11..492ce42cbf 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -486,12 +486,16 @@ def _dispatch_group( # Live auth — read from the FIRST row's notification. If multiple # notifications collide on (url, auth_sig, platform) we have, by - # definition, identical auth + format, so this is safe. + # definition, identical auth + format, so this is safe. Retry budget + # is the MAX across rows: there's a single HTTP call per batch, so + # the most retry-tolerant subscriber's intent wins; using the first + # row's value would silently truncate everyone else's retry budget. first_notification = rows[0].notification payloads = [r.payload for r in rows] body = render_clubbed_message(payloads, platform) headers = build_webhook_headers(first_notification) buffer_ids = [str(r.id) for r in rows] + max_retries = max(r.notification.max_retries for r in rows) # Mark DISPATCHED first; if commit succeeds the on_commit hook # publishes the broker task. If commit fails, rows stay PENDING and @@ -508,7 +512,7 @@ def _dispatch_group( body=body, headers=headers, platform=platform, - max_retries=first_notification.max_retries, + max_retries=max_retries, buffer_ids=buffer_ids, org_id=org_id, ) From 780d51cf30e33a8700b093b79b6b86905a8e2b7a Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 19:26:52 +0530 Subject: [PATCH 22/27] remove immediate mode --- backend/notification_v2/clubbed_renderer.py | 14 +- backend/notification_v2/enums.py | 12 +- backend/notification_v2/helper.py | 93 ++------------ backend/notification_v2/internal_api_views.py | 20 +-- backend/notification_v2/provider/__init__.py | 0 .../provider/notification_provider.py | 30 ----- backend/notification_v2/provider/registry.py | 54 -------- .../provider/webhook/__init__.py | 0 .../provider/webhook/api_webhook.py | 30 ----- .../provider/webhook/slack_webhook.py | 33 ----- .../provider/webhook/webhook.py | 103 --------------- .../core/notification_clubbed_renderer.py | 14 +- workers/notification/providers/api_webhook.py | 8 +- .../notification/providers/slack_webhook.py | 22 ++-- .../shared/patterns/notification/helper.py | 120 ++---------------- 15 files changed, 64 insertions(+), 489 deletions(-) delete mode 100644 backend/notification_v2/provider/__init__.py delete mode 100644 backend/notification_v2/provider/notification_provider.py delete mode 100644 backend/notification_v2/provider/registry.py delete mode 100644 backend/notification_v2/provider/webhook/__init__.py delete mode 100644 backend/notification_v2/provider/webhook/api_webhook.py delete mode 100644 backend/notification_v2/provider/webhook/slack_webhook.py delete mode 100644 backend/notification_v2/provider/webhook/webhook.py diff --git a/backend/notification_v2/clubbed_renderer.py b/backend/notification_v2/clubbed_renderer.py index a0096197bd..af20007f76 100644 --- a/backend/notification_v2/clubbed_renderer.py +++ b/backend/notification_v2/clubbed_renderer.py @@ -1,11 +1,11 @@ """Backend dispatch entry for clubbed-notification rendering. Delegates the canonical envelope + Slack body to -``unstract.core.notification_clubbed_renderer`` so backend dispatches and -worker IMMEDIATE callbacks emit byte-identical receiver-visible payloads. -This thin shim keeps the ``render_clubbed_message`` platform dispatcher -(uses ``PlatformType`` enum) backend-side; everything else lives in the -shared module. +``unstract.core.notification_clubbed_renderer`` so backend and worker +callbacks emit byte-identical receiver-visible payloads. This thin shim +keeps the ``render_clubbed_message`` platform dispatcher (uses +``PlatformType`` enum) backend-side; everything else lives in the shared +module. """ from __future__ import annotations @@ -35,8 +35,8 @@ def render_clubbed_message( ) -> dict[str, Any]: """Top-level entry — returns the dispatch body for ``platform``. - Used by every dispatch site (BATCHED flush, IMMEDIATE backend providers) - so the receiver-visible payload is identical regardless of mode. + Used by every dispatch site so the receiver-visible payload is + identical regardless of caller. """ envelope = build_envelope(payloads) if platform == PlatformType.SLACK.value: diff --git a/backend/notification_v2/enums.py b/backend/notification_v2/enums.py index 9e694e128f..8ebced2a2e 100644 --- a/backend/notification_v2/enums.py +++ b/backend/notification_v2/enums.py @@ -48,10 +48,14 @@ def choices(cls): class DeliveryMode(Enum): """Per-notification dispatch mode. - IMMEDIATE fires on every workflow completion (pre-existing behavior). - BATCHED buffers events into NotificationBuffer and flushes them as one - clubbed message per (org, webhook_url, auth_sig) every - NOTIFICATION_CLUB_INTERVAL seconds. + Product ships every notification as BATCHED — events buffer into + ``NotificationBuffer`` and flush as one clubbed message per + (org, webhook_url, auth_sig) every ``NOTIFICATION_CLUB_INTERVAL`` seconds. + + The ``IMMEDIATE`` value is purely a historical DB value — no code reads + it anymore (the legacy synchronous-dispatch path was removed). The + column and enum value remain so existing rows don't break; both will be + dropped in a follow-up schema migration. """ IMMEDIATE = "IMMEDIATE" diff --git a/backend/notification_v2/helper.py b/backend/notification_v2/helper.py index 4a6c5cc907..9e99335ce8 100644 --- a/backend/notification_v2/helper.py +++ b/backend/notification_v2/helper.py @@ -11,13 +11,9 @@ from notification_v2.enums import ( AuthorizationType, BufferStatus, - DeliveryMode, - NotificationType, PlatformType, ) from notification_v2.models import Notification, NotificationBuffer -from notification_v2.provider.notification_provider import NotificationProvider -from notification_v2.provider.registry import get_notification_provider logger = logging.getLogger(__name__) @@ -75,9 +71,8 @@ def get_org_club_interval_seconds(organization: Organization) -> int: def build_webhook_headers(notification: Notification) -> dict[str, str]: """Build HTTP headers for a webhook dispatch from the notification's auth. - Mirrors the logic in ``provider/webhook/webhook.py`` and the worker-side - ``get_webhook_headers`` so the clubbed dispatcher and the immediate path - produce identical headers for the same auth config. + Used by the buffer flush in ``internal_api_views._dispatch_group`` to + pass live auth headers through to the Celery task. """ headers = {"Content-Type": "application/json"} auth_type_raw = (notification.authorization_type or "").upper() @@ -107,51 +102,35 @@ def _resolve_organization(notification: Notification) -> Organization | None: return None -def split_by_delivery_mode( - notifications: "Iterable[Notification]", -) -> tuple[list[Notification], list[Notification]]: - """Partition into (IMMEDIATE, BATCHED). Unknown modes default to IMMEDIATE.""" - immediate: list[Notification] = [] - batched: list[Notification] = [] - for n in notifications: - if n.delivery_mode == DeliveryMode.BATCHED.value: - batched.append(n) - else: - immediate.append(n) - return immediate, batched - - def dispatch_with_delivery_mode( notifications: "Iterable[Notification]", payload: dict[str, Any], *, error_context: str = "", ) -> None: - """Single-call entry point that splits IMMEDIATE / BATCHED and dispatches. + """Enqueue every active notification into ``NotificationBuffer``. - IMMEDIATE rows fire synchronously via NotificationHelper. BATCHED rows - enqueue into NotificationBuffer; an enqueue failure is logged but does - not abort the loop — other notifications still get their chance. + Single dispatch path: each notification produces a buffer row that the + periodic flush ships as part of a clubbed message. An enqueue failure + on one row is logged but does not abort the loop — sibling notifications + still get their chance. ``error_context`` lets callers tag failures with their dispatch source (pipeline id, api id) for easier triage. """ - immediate, batched = split_by_delivery_mode(notifications) - if immediate: - NotificationHelper.send_notification(notifications=immediate, payload=payload) - for notification in batched: + for notification in notifications: try: enqueue(notification, payload) except Exception: logger.exception( - "Failed to enqueue BATCHED notification %s%s", + "Failed to enqueue notification %s%s", notification.id, f" ({error_context})" if error_context else "", ) def enqueue(notification: Notification, payload: dict[str, Any]) -> NotificationBuffer: - """Buffer a single execution event for a BATCHED notification. + """Buffer a single execution event for a notification. Computes auth_sig and flush_after at write time so existing PENDING rows keep their original cadence even if NOTIFICATION_CLUB_INTERVAL or the @@ -172,9 +151,9 @@ def enqueue(notification: Notification, payload: dict[str, Any]) -> Notification auth_sig = compute_auth_sig(notification) platform = notification.platform or PlatformType.API.value - # Stamp a buffered-at timestamp so renderers can surface it consistently - # alongside IMMEDIATE. Worker callers already supply one; backend - # dispatchers (PipelineStatusPayload.to_dict) don't, so default here. + # Stamp a buffered-at timestamp so renderers always have one to humanize. + # Worker callers already supply one; backend dispatchers + # (PipelineStatusPayload.to_dict) don't, so default here. payload = { **payload, "timestamp": payload.get("timestamp") or timezone.now().isoformat(), @@ -204,49 +183,3 @@ def enqueue(notification: Notification, payload: dict[str, Any]) -> Notification flush_after.isoformat(), ) return buffer_row - - -class NotificationHelper: - @classmethod - def send_notification(cls, notifications: list[Notification], payload: Any) -> None: - """Dispatch IMMEDIATE notifications via the registered provider. - - Iterates over notifications, resolves the provider for each - (notification_type, platform) pair, and fires the webhook task. BATCHED - notifications must be routed to ``enqueue()`` instead — callers branch - on ``notification.delivery_mode`` before reaching this method. - - Args: - notifications: Active Notification rows to dispatch synchronously. - payload: Provider-specific payload (typically a dict). - """ - for notification in notifications: - if notification.delivery_mode == DeliveryMode.BATCHED.value: - # Callers should not reach here for BATCHED — log loudly so - # routing regressions are visible without breaking dispatch. - logger.warning( - "BATCHED notification %s reached IMMEDIATE dispatch path; " - "skipping. Caller must branch on delivery_mode.", - notification.id, - ) - continue - notification_type = NotificationType(notification.notification_type) - platform_type = PlatformType(notification.platform) - try: - notification_provider = get_notification_provider( - notification_type, platform_type - ) - notifier: NotificationProvider = notification_provider( - notification=notification, payload=payload - ) - notifier.send() - logger.info("Sending notification to %s", notification) - except ValueError as e: - logger.error( - "Error in notification type %s and platform %s for " - "notification %s: %s", - notification_type, - platform_type, - notification, - e, - ) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 492ce42cbf..6409f6bdf6 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -29,7 +29,7 @@ from backend.celery_service import app as celery_app from notification_v2.clubbed_renderer import render_clubbed_message -from notification_v2.enums import FAILURE_STATUSES, BufferStatus, DeliveryMode +from notification_v2.enums import FAILURE_STATUSES, BufferStatus from notification_v2.helper import ( build_webhook_headers, enqueue, @@ -99,9 +99,6 @@ def _serialize_notification(n: Notification) -> dict[str, Any]: "max_retries": n.max_retries, "is_active": n.is_active, "notify_on_failures": n.notify_on_failures, - # Drives the worker-side IMMEDIATE-vs-BATCHED branch in - # workers/shared/patterns/notification/helper.py. - "delivery_mode": n.delivery_mode, } @@ -309,7 +306,7 @@ def enqueue_notification_buffer(request: HttpRequest) -> JsonResponse: Worker code is model-free: it forwards a notification_id + structured payload here and lets the backend write the NotificationBuffer row. Rejects rows whose source notification is not BATCHED so a worker - routing bug cannot silently divert IMMEDIATE traffic into the buffer. + routing bug cannot silently divert non-BATCHED traffic into the buffer. """ try: body = json.loads(request.body.decode("utf-8") or "{}") @@ -342,19 +339,6 @@ def enqueue_notification_buffer(request: HttpRequest) -> JsonResponse: {"status": "error", "message": "Notification not found"}, status=404 ) - if notification.delivery_mode != DeliveryMode.BATCHED.value: - # Hard-fail rather than silently auto-correcting — surfaces worker - # routing regressions instead of letting them drain into the buffer. - return JsonResponse( - { - "status": "error", - "message": ( - "Notification delivery_mode is not BATCHED; refuse to enqueue" - ), - }, - status=409, - ) - # type / timestamp / additional_data stay optional during rollout — older # worker builds that don't forward them still produce a usable row # (renderer falls back to "Type: —" / no Additional Data line). diff --git a/backend/notification_v2/provider/__init__.py b/backend/notification_v2/provider/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/backend/notification_v2/provider/notification_provider.py b/backend/notification_v2/provider/notification_provider.py deleted file mode 100644 index 10492aaf3c..0000000000 --- a/backend/notification_v2/provider/notification_provider.py +++ /dev/null @@ -1,30 +0,0 @@ -from abc import ABC, abstractmethod - -from django.conf import settings - -from notification_v2.models import Notification - - -class NotificationProvider(ABC): - NOTIFICATION_TIMEOUT = settings.NOTIFICATION_TIMEOUT - RETRY_DELAY = 10 # Seconds - - def __init__(self, notification: Notification, payload): - self.payload = payload - self.notification = notification - - @abstractmethod - def send(self): - """Method to be overridden in child classes for sending the - notification. - """ - raise NotImplementedError("Subclasses should implement this method.") - - def validate(self): - """Method to validate the notification data.""" - pass - - @abstractmethod - def get_headers(self): - """Method to get the headers for the notification.""" - raise NotImplementedError("Subclasses should implement this method.") diff --git a/backend/notification_v2/provider/registry.py b/backend/notification_v2/provider/registry.py deleted file mode 100644 index 909ad58b4c..0000000000 --- a/backend/notification_v2/provider/registry.py +++ /dev/null @@ -1,54 +0,0 @@ -from notification_v2.enums import NotificationType, PlatformType -from notification_v2.provider.notification_provider import NotificationProvider -from notification_v2.provider.webhook.api_webhook import APIWebhook -from notification_v2.provider.webhook.slack_webhook import SlackWebhook - -REGISTRY = { - NotificationType.WEBHOOK: { - PlatformType.SLACK: SlackWebhook, - PlatformType.API: APIWebhook, - # Add other platform-specific classes here - }, - # Add other notification types and classes here -} - - -def get_notification_provider( - notification_type: NotificationType, platform_type: PlatformType -) -> NotificationProvider: - """Get Notification provider based on notification type and platform type - It uses the REGISTRY to map the combination of notification type and - platform type to the corresponding NotificationProvider class. - - If the provided combination is not found in the REGISTRY, a ValueError is raised. - - Note: - This function assumes that the REGISTRY dictionary is correctly populated - with the appropriate NotificationProvider classes for each combination of - notification type and platform type. - - See Also: - - NotificationType - - PlatformType - - NotificationProvider - - REGISTRY - - Parameters: - notification_type (NotificationType): The type of notification. - platform_type (PlatformType): The platform/provider type for the notification. - - Returns: - NotificationProvider: The appropriate NotificationProvider class for - the given combination. - - Raises: - ValueError: If the provided combination is not found in the REGISTRY. - """ - if notification_type not in REGISTRY: - raise ValueError(f"Unsupported notification type: {notification_type}") - - platform_registry = REGISTRY[notification_type] - if platform_type not in platform_registry: - raise ValueError(f"Unsupported platform type: {platform_type}") - - return platform_registry[platform_type] diff --git a/backend/notification_v2/provider/webhook/__init__.py b/backend/notification_v2/provider/webhook/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/backend/notification_v2/provider/webhook/api_webhook.py b/backend/notification_v2/provider/webhook/api_webhook.py deleted file mode 100644 index 9864d84a4a..0000000000 --- a/backend/notification_v2/provider/webhook/api_webhook.py +++ /dev/null @@ -1,30 +0,0 @@ -from typing import Any - -from notification_v2.clubbed_renderer import build_envelope -from notification_v2.provider.webhook.webhook import Webhook - - -class APIWebhook(Webhook): - def send(self) -> None: - """Send the API webhook notification. - - Wraps the IMMEDIATE event in the canonical envelope before queueing - so the receiver-visible JSON shape matches BATCHED dispatches — - `{"summary": {...}, "events": [{...}]}`. - """ - self.payload = self.format_payload() - super().send() - - def get_headers(self) -> dict[str, str]: - """API-specific headers.""" - headers = super().get_headers() - headers["Content-Type"] = "application/json" - return headers - - def format_payload(self) -> dict[str, Any]: - """Wrap a single IMMEDIATE event in the canonical envelope. - - Receivers parse the same `{summary, events}` shape regardless of - whether the dispatch was IMMEDIATE or BATCHED. - """ - return build_envelope(payloads=[self.payload]) diff --git a/backend/notification_v2/provider/webhook/slack_webhook.py b/backend/notification_v2/provider/webhook/slack_webhook.py deleted file mode 100644 index 0fda635d74..0000000000 --- a/backend/notification_v2/provider/webhook/slack_webhook.py +++ /dev/null @@ -1,33 +0,0 @@ -import logging -from typing import Any - -from notification_v2.clubbed_renderer import render_clubbed_message -from notification_v2.enums import PlatformType -from notification_v2.provider.webhook.webhook import Webhook - -logger = logging.getLogger(__name__) - - -class SlackWebhook(Webhook): - def send(self) -> None: - """Send the Slack webhook notification.""" - formatted_payload = self.format_payload() - self.payload = formatted_payload - super().send() - - def get_headers(self) -> dict[str, str]: - """Slack-specific headers.""" - headers = super().get_headers() - headers["Content-Type"] = "application/json" - return headers - - def format_payload(self) -> dict[str, Any]: - """Render the IMMEDIATE event through the canonical envelope. - - Single shared renderer for IMMEDIATE and BATCHED so receivers see the - same Slack body shape regardless of delivery mode. - """ - return render_clubbed_message( - payloads=[self.payload], - platform=PlatformType.SLACK.value, - ) diff --git a/backend/notification_v2/provider/webhook/webhook.py b/backend/notification_v2/provider/webhook/webhook.py deleted file mode 100644 index 40ddc53e61..0000000000 --- a/backend/notification_v2/provider/webhook/webhook.py +++ /dev/null @@ -1,103 +0,0 @@ -import logging - -from backend.celery_service import app as celery_app -from notification_v2.enums import AuthorizationType -from notification_v2.provider.notification_provider import NotificationProvider - -logger = logging.getLogger(__name__) - - -class WebhookNotificationArg: - MAX_RETRIES = "max_retries" - RETRY_DELAY = "retry_delay" - - -class HeaderConstants: - APPLICATION_JSON = "application/json" - - -class Webhook(NotificationProvider): - def send(self) -> None: - """Send the webhook notification.""" - try: - headers = self.get_headers() - self.validate() - except ValueError as e: - logger.error(f"Error validating notification {self.notification} :: {e}") - return - celery_app.send_task( - "send_webhook_notification", - args=[ - self.notification.url, - self.payload, - headers, - self.NOTIFICATION_TIMEOUT, - ], - kwargs={ - WebhookNotificationArg.MAX_RETRIES: self.notification.max_retries, - WebhookNotificationArg.RETRY_DELAY: self.RETRY_DELAY, - }, - ) - - def validate(self): - """Validate notification. - - Returns: - _type_: None - """ - if not self.notification.url: - raise ValueError("Webhook URL is required.") - if not self.payload: - raise ValueError("Payload is required.") - return super().validate() - - def get_headers(self) -> dict[str, str]: - """Get the headers for the notification based on the authorization type and key. - - Raises: - ValueError: _description_ - - Returns: - dict[str, str]: A dictionary containing the headers. - """ - headers: dict[str, str] = {} - try: - authorization_type = AuthorizationType( - self.notification.authorization_type.upper() - ) - except ValueError: - raise ValueError( - "Unsupported authorization type: " - f"{self.notification.authorization_type}" - ) - authorization_key = self.notification.authorization_key - authorization_header = self.notification.authorization_header - - header_formats = { - AuthorizationType.BEARER: lambda key: { - "Authorization": f"Bearer {key}", - "Content-Type": HeaderConstants.APPLICATION_JSON, - }, - AuthorizationType.API_KEY: lambda key: { - "Authorization": key, - "Content-Type": HeaderConstants.APPLICATION_JSON, - }, - AuthorizationType.CUSTOM_HEADER: lambda key: { - authorization_header: key, - "Content-Type": HeaderConstants.APPLICATION_JSON, - }, - AuthorizationType.NONE: lambda _: { - "Content-Type": HeaderConstants.APPLICATION_JSON, - }, - } - - if authorization_type not in header_formats: - raise ValueError(f"Unsupported authorization type: {authorization_type}") - - headers = header_formats[authorization_type](authorization_key) - - # Check if custom header type has required details - if authorization_type == AuthorizationType.CUSTOM_HEADER: - if not authorization_header or not authorization_key: - raise ValueError("Custom header or key missing for custom authorization.") - return headers diff --git a/unstract/core/src/unstract/core/notification_clubbed_renderer.py b/unstract/core/src/unstract/core/notification_clubbed_renderer.py index 5a21bc65cb..9d8ce709a9 100644 --- a/unstract/core/src/unstract/core/notification_clubbed_renderer.py +++ b/unstract/core/src/unstract/core/notification_clubbed_renderer.py @@ -134,8 +134,8 @@ def _format_event_line(event: dict[str, Any]) -> str: def _event_from_payload(payload: dict[str, Any]) -> dict[str, Any]: """Project a buffered payload into the canonical per-event dict. - Unified shape across Slack/API and IMMEDIATE/BATCHED. `pipeline_id` is - intentionally dropped — neither channel surfaces it. Timestamps are + Unified shape across Slack/API and every dispatch path. `pipeline_id` + is intentionally dropped — neither channel surfaces it. Timestamps are humanized once at projection so Slack and API consumers see the same string (implicit UTC, no timezone suffix). """ @@ -156,8 +156,8 @@ def _event_from_payload(payload: dict[str, Any]) -> dict[str, Any]: def build_envelope(payloads: list[dict[str, Any]]) -> dict[str, Any]: """Build the canonical envelope used by every dispatch path. - Summary carries only `{total, succeeded, failed}` — same shape for - IMMEDIATE and BATCHED so receivers parse one envelope, not two. + Summary carries only `{total, succeeded, failed}` — one envelope shape + so receivers parse a single schema, not two. """ capped = payloads[:MAX_BATCH_SIZE] succeeded = sum( @@ -179,9 +179,9 @@ def build_envelope(payloads: list[dict[str, Any]]) -> dict[str, Any]: def render_slack_text(envelope: dict[str, Any]) -> str: """Render the envelope as Slack mrkdwn body text. - Header + divider are emitted for every dispatch — IMMEDIATE, BATCHED N=1, - and BATCHED N>1 all share the same shape. Visible events are capped at - SLACK_MAX_DISPLAY_EVENTS with an `_… and K more_` overflow footer. + Header + divider are emitted for every dispatch — single-event and + multi-event batches share the same shape. Visible events are capped at + ``SLACK_MAX_DISPLAY_EVENTS`` with an `_… and K more_` overflow footer. """ summary = envelope["summary"] events: list[dict[str, Any]] = envelope["events"] diff --git a/workers/notification/providers/api_webhook.py b/workers/notification/providers/api_webhook.py index cbffc0a1d4..c6961e7227 100644 --- a/workers/notification/providers/api_webhook.py +++ b/workers/notification/providers/api_webhook.py @@ -1,9 +1,9 @@ """API Webhook Notification Provider -Wraps worker-callback IMMEDIATE payloads (flat per-event dict) in the -canonical envelope so API webhook receivers always see the same -``{"summary": {...}, "events": [...]}`` shape — IMMEDIATE or BATCHED. -Backend dispatches already arrive in envelope form and pass through. +Wraps worker-callback payloads (flat per-event dict) in the canonical +envelope so API webhook receivers always see the same +``{"summary": {...}, "events": [...]}`` shape. Backend dispatches already +arrive in envelope form and pass through. """ from typing import Any diff --git a/workers/notification/providers/slack_webhook.py b/workers/notification/providers/slack_webhook.py index 2bb757662c..1028aef289 100644 --- a/workers/notification/providers/slack_webhook.py +++ b/workers/notification/providers/slack_webhook.py @@ -1,9 +1,8 @@ """Slack Webhook Notification Provider -Renders worker-callback IMMEDIATE payloads (flat per-event dict) into the -same single-line Slack body the backend produces for IMMEDIATE/BATCHED via -clubbed_renderer. Backend-rendered payloads (`{"text": ""}`) pass -through unchanged. +Renders worker-callback payloads (flat per-event dict) into the same +single-line Slack body the backend produces via ``clubbed_renderer``. +Backend-rendered payloads (`{"text": ""}`) pass through unchanged. """ from typing import Any @@ -22,8 +21,9 @@ class SlackWebhook(WebhookProvider): """Slack-specific webhook provider. - Renders flat IMMEDIATE payloads via the worker-side mirror of the backend - clubbed renderer, then sends them as Slack-native ``text`` mrkdwn. + Renders flat per-event payloads via the worker-side mirror of the + backend clubbed renderer, then sends them as Slack-native ``text`` + mrkdwn. """ def __init__(self) -> None: @@ -51,11 +51,11 @@ def format_payload(self, payload: dict[str, Any]) -> dict[str, Any]: """Format the payload to match Slack's expected structure. Two input shapes are accepted: - - Backend-rendered ``{"text": ""}`` (BATCHED dispatch and - backend IMMEDIATE through ``clubbed_renderer``) — passed through. - - Flat per-event dict from the worker-callback IMMEDIATE path — - wrapped in a single-event envelope and rendered to the canonical - single-line mrkdwn body. + - Backend-rendered ``{"text": ""}`` (any backend dispatch + through ``clubbed_renderer``) — passed through. + - Flat per-event dict from a worker callback — wrapped in a + single-event envelope and rendered to the canonical single-line + mrkdwn body. """ if "text" in payload and "events" not in payload: return {"text": payload["text"]} diff --git a/workers/shared/patterns/notification/helper.py b/workers/shared/patterns/notification/helper.py index 460c308146..79009a71a3 100644 --- a/workers/shared/patterns/notification/helper.py +++ b/workers/shared/patterns/notification/helper.py @@ -7,8 +7,6 @@ import logging from typing import Any -from celery import current_app - # Import shared data models from @unstract/core from unstract.core.data_models import ( ExecutionStatus, @@ -19,9 +17,6 @@ logger = logging.getLogger(__name__) -# Mirrors notification_v2.enums.DeliveryMode.BATCHED. Worker stays string-only -# so it does not import Django enums. -DELIVERY_MODE_BATCHED = "BATCHED" ENQUEUE_BUFFER_ENDPOINT = "v1/webhook/buffer/enqueue/" @@ -37,7 +32,7 @@ def _enqueue_to_buffer( logs the drop instead of silently treating BATCHED delivery as successful. """ # Forward the full per-event shape so the backend renderer can match - # IMMEDIATE's KV layout per event (Type / Pipeline Id / Pipeline Name / + # the canonical KV layout per event (Type / Pipeline Id / Pipeline Name / # Status / Execution Id / Timestamp / Additional Data). Older backend # builds that ignore the extra fields stay unaffected. payload_type = payload.type.value if hasattr(payload.type, "value") else payload.type @@ -83,10 +78,13 @@ def _route_notification( notification: dict[str, Any], payload: NotificationPayload, ) -> None: - """IMMEDIATE -> existing worker queue; BATCHED -> backend enqueue endpoint. + """Forward webhook notifications to the backend buffer-enqueue endpoint. - Defaults to IMMEDIATE when delivery_mode is missing so older backend - builds (pre-UNS-611) keep working unchanged. + Single dispatch path: the backend owns the buffer and the periodic + flush ships clubbed messages. Non-webhook notification types are + skipped at this layer. An enqueue failure is logged but doesn't abort + the outer trigger_* loop so sibling notifications still get their + chance. """ if notification.get("notification_type") != "WEBHOOK": logger.debug( @@ -95,107 +93,13 @@ def _route_notification( ) return - if notification.get("delivery_mode") == DELIVERY_MODE_BATCHED: - try: - _enqueue_to_buffer(api_client, notification, payload) - except Exception: # noqa: BLE001 — already logged with stack inside - # Surface but don't abort the outer trigger_* loop — sibling - # BATCHED notifications still deserve their enqueue attempt. - logger.warning( - "BATCHED enqueue failed for notification %s; continuing with others", - notification.get("id"), - ) - return - - send_notification_to_worker( - url=notification["url"], - payload=payload, - auth_type=notification.get("authorization_type", "NONE"), - auth_key=notification.get("authorization_key"), - auth_header=notification.get("authorization_header"), - max_retries=notification.get("max_retries", 0), - platform=notification.get("platform"), - ) - - -def get_webhook_headers( - auth_type: str, auth_key: str | None, auth_header: str | None -) -> dict[str, str]: - """Generate webhook headers based on authorization configuration.""" - headers = {"Content-Type": "application/json"} - try: - if auth_type and auth_key: - auth_type_upper = auth_type.upper() - - if auth_type_upper == "BEARER": - headers["Authorization"] = f"Bearer {auth_key}" - elif auth_type_upper == "API_KEY": - headers["Authorization"] = auth_key - elif auth_type_upper == "CUSTOM_HEADER" and auth_header: - headers[auth_header] = auth_key - # NONE type just uses Content-Type header - except Exception as e: - logger.warning(f"Error generating webhook headers: {e}") - # Use default headers if auth config is invalid - - return headers - - -def send_notification_to_worker( - url: str, - payload: NotificationPayload, - auth_type: str, - auth_key: str | None, - auth_header: str | None, - max_retries: int = 0, - platform: str | None = None, -) -> bool: - """Send a single notification to the notification worker queue. - - Args: - url: Webhook URL to send notification to - payload: Structured notification payload - auth_type: Authorization type (NONE, BEARER, API_KEY, CUSTOM_HEADER) - auth_key: Authorization key/token - auth_header: Custom header name for CUSTOM_HEADER auth type - max_retries: Maximum number of retry attempts - platform: Platform type from notification config (SLACK, API, etc.) - - Returns: - True if task was successfully queued, False otherwise - """ - try: - headers = get_webhook_headers(auth_type, auth_key, auth_header) - - # Convert payload to webhook format (excludes internal fields) - payload_dict = payload.to_webhook_payload() - - # Send task to notification worker - current_app.send_task( - "send_webhook_notification", - args=[ - url, - payload_dict, - headers, - 10, # timeout - ], - kwargs={ - "max_retries": max_retries, - "retry_delay": 10, - "platform": platform, - }, - queue="notifications", - ) - - logger.info( - f"Sent webhook notification to worker queue for {url} (pipeline: {payload.pipeline_id})" + _enqueue_to_buffer(api_client, notification, payload) + except Exception: # noqa: BLE001 — already logged with stack inside + logger.warning( + "Buffer enqueue failed for notification %s; continuing with others", + notification.get("id"), ) - return True - - except Exception as e: - logger.error(f"Failed to send notification to {url}: {e}") - return False def trigger_notification( From 743e9a99829cc514a6c95be6c4cbf9306ad9ccc7 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 19:40:53 +0530 Subject: [PATCH 23/27] add legacy code --- backend/notification_v2/provider/__init__.py | 0 .../provider/notification_provider.py | 30 +++++ backend/notification_v2/provider/registry.py | 54 +++++++++ .../provider/webhook/__init__.py | 0 .../provider/webhook/api_webhook.py | 30 +++++ .../provider/webhook/slack_webhook.py | 33 ++++++ .../provider/webhook/webhook.py | 103 ++++++++++++++++++ 7 files changed, 250 insertions(+) create mode 100644 backend/notification_v2/provider/__init__.py create mode 100644 backend/notification_v2/provider/notification_provider.py create mode 100644 backend/notification_v2/provider/registry.py create mode 100644 backend/notification_v2/provider/webhook/__init__.py create mode 100644 backend/notification_v2/provider/webhook/api_webhook.py create mode 100644 backend/notification_v2/provider/webhook/slack_webhook.py create mode 100644 backend/notification_v2/provider/webhook/webhook.py diff --git a/backend/notification_v2/provider/__init__.py b/backend/notification_v2/provider/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/notification_v2/provider/notification_provider.py b/backend/notification_v2/provider/notification_provider.py new file mode 100644 index 0000000000..10492aaf3c --- /dev/null +++ b/backend/notification_v2/provider/notification_provider.py @@ -0,0 +1,30 @@ +from abc import ABC, abstractmethod + +from django.conf import settings + +from notification_v2.models import Notification + + +class NotificationProvider(ABC): + NOTIFICATION_TIMEOUT = settings.NOTIFICATION_TIMEOUT + RETRY_DELAY = 10 # Seconds + + def __init__(self, notification: Notification, payload): + self.payload = payload + self.notification = notification + + @abstractmethod + def send(self): + """Method to be overridden in child classes for sending the + notification. + """ + raise NotImplementedError("Subclasses should implement this method.") + + def validate(self): + """Method to validate the notification data.""" + pass + + @abstractmethod + def get_headers(self): + """Method to get the headers for the notification.""" + raise NotImplementedError("Subclasses should implement this method.") diff --git a/backend/notification_v2/provider/registry.py b/backend/notification_v2/provider/registry.py new file mode 100644 index 0000000000..909ad58b4c --- /dev/null +++ b/backend/notification_v2/provider/registry.py @@ -0,0 +1,54 @@ +from notification_v2.enums import NotificationType, PlatformType +from notification_v2.provider.notification_provider import NotificationProvider +from notification_v2.provider.webhook.api_webhook import APIWebhook +from notification_v2.provider.webhook.slack_webhook import SlackWebhook + +REGISTRY = { + NotificationType.WEBHOOK: { + PlatformType.SLACK: SlackWebhook, + PlatformType.API: APIWebhook, + # Add other platform-specific classes here + }, + # Add other notification types and classes here +} + + +def get_notification_provider( + notification_type: NotificationType, platform_type: PlatformType +) -> NotificationProvider: + """Get Notification provider based on notification type and platform type + It uses the REGISTRY to map the combination of notification type and + platform type to the corresponding NotificationProvider class. + + If the provided combination is not found in the REGISTRY, a ValueError is raised. + + Note: + This function assumes that the REGISTRY dictionary is correctly populated + with the appropriate NotificationProvider classes for each combination of + notification type and platform type. + + See Also: + - NotificationType + - PlatformType + - NotificationProvider + - REGISTRY + + Parameters: + notification_type (NotificationType): The type of notification. + platform_type (PlatformType): The platform/provider type for the notification. + + Returns: + NotificationProvider: The appropriate NotificationProvider class for + the given combination. + + Raises: + ValueError: If the provided combination is not found in the REGISTRY. + """ + if notification_type not in REGISTRY: + raise ValueError(f"Unsupported notification type: {notification_type}") + + platform_registry = REGISTRY[notification_type] + if platform_type not in platform_registry: + raise ValueError(f"Unsupported platform type: {platform_type}") + + return platform_registry[platform_type] diff --git a/backend/notification_v2/provider/webhook/__init__.py b/backend/notification_v2/provider/webhook/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/notification_v2/provider/webhook/api_webhook.py b/backend/notification_v2/provider/webhook/api_webhook.py new file mode 100644 index 0000000000..9864d84a4a --- /dev/null +++ b/backend/notification_v2/provider/webhook/api_webhook.py @@ -0,0 +1,30 @@ +from typing import Any + +from notification_v2.clubbed_renderer import build_envelope +from notification_v2.provider.webhook.webhook import Webhook + + +class APIWebhook(Webhook): + def send(self) -> None: + """Send the API webhook notification. + + Wraps the IMMEDIATE event in the canonical envelope before queueing + so the receiver-visible JSON shape matches BATCHED dispatches — + `{"summary": {...}, "events": [{...}]}`. + """ + self.payload = self.format_payload() + super().send() + + def get_headers(self) -> dict[str, str]: + """API-specific headers.""" + headers = super().get_headers() + headers["Content-Type"] = "application/json" + return headers + + def format_payload(self) -> dict[str, Any]: + """Wrap a single IMMEDIATE event in the canonical envelope. + + Receivers parse the same `{summary, events}` shape regardless of + whether the dispatch was IMMEDIATE or BATCHED. + """ + return build_envelope(payloads=[self.payload]) diff --git a/backend/notification_v2/provider/webhook/slack_webhook.py b/backend/notification_v2/provider/webhook/slack_webhook.py new file mode 100644 index 0000000000..0fda635d74 --- /dev/null +++ b/backend/notification_v2/provider/webhook/slack_webhook.py @@ -0,0 +1,33 @@ +import logging +from typing import Any + +from notification_v2.clubbed_renderer import render_clubbed_message +from notification_v2.enums import PlatformType +from notification_v2.provider.webhook.webhook import Webhook + +logger = logging.getLogger(__name__) + + +class SlackWebhook(Webhook): + def send(self) -> None: + """Send the Slack webhook notification.""" + formatted_payload = self.format_payload() + self.payload = formatted_payload + super().send() + + def get_headers(self) -> dict[str, str]: + """Slack-specific headers.""" + headers = super().get_headers() + headers["Content-Type"] = "application/json" + return headers + + def format_payload(self) -> dict[str, Any]: + """Render the IMMEDIATE event through the canonical envelope. + + Single shared renderer for IMMEDIATE and BATCHED so receivers see the + same Slack body shape regardless of delivery mode. + """ + return render_clubbed_message( + payloads=[self.payload], + platform=PlatformType.SLACK.value, + ) diff --git a/backend/notification_v2/provider/webhook/webhook.py b/backend/notification_v2/provider/webhook/webhook.py new file mode 100644 index 0000000000..40ddc53e61 --- /dev/null +++ b/backend/notification_v2/provider/webhook/webhook.py @@ -0,0 +1,103 @@ +import logging + +from backend.celery_service import app as celery_app +from notification_v2.enums import AuthorizationType +from notification_v2.provider.notification_provider import NotificationProvider + +logger = logging.getLogger(__name__) + + +class WebhookNotificationArg: + MAX_RETRIES = "max_retries" + RETRY_DELAY = "retry_delay" + + +class HeaderConstants: + APPLICATION_JSON = "application/json" + + +class Webhook(NotificationProvider): + def send(self) -> None: + """Send the webhook notification.""" + try: + headers = self.get_headers() + self.validate() + except ValueError as e: + logger.error(f"Error validating notification {self.notification} :: {e}") + return + celery_app.send_task( + "send_webhook_notification", + args=[ + self.notification.url, + self.payload, + headers, + self.NOTIFICATION_TIMEOUT, + ], + kwargs={ + WebhookNotificationArg.MAX_RETRIES: self.notification.max_retries, + WebhookNotificationArg.RETRY_DELAY: self.RETRY_DELAY, + }, + ) + + def validate(self): + """Validate notification. + + Returns: + _type_: None + """ + if not self.notification.url: + raise ValueError("Webhook URL is required.") + if not self.payload: + raise ValueError("Payload is required.") + return super().validate() + + def get_headers(self) -> dict[str, str]: + """Get the headers for the notification based on the authorization type and key. + + Raises: + ValueError: _description_ + + Returns: + dict[str, str]: A dictionary containing the headers. + """ + headers: dict[str, str] = {} + try: + authorization_type = AuthorizationType( + self.notification.authorization_type.upper() + ) + except ValueError: + raise ValueError( + "Unsupported authorization type: " + f"{self.notification.authorization_type}" + ) + authorization_key = self.notification.authorization_key + authorization_header = self.notification.authorization_header + + header_formats = { + AuthorizationType.BEARER: lambda key: { + "Authorization": f"Bearer {key}", + "Content-Type": HeaderConstants.APPLICATION_JSON, + }, + AuthorizationType.API_KEY: lambda key: { + "Authorization": key, + "Content-Type": HeaderConstants.APPLICATION_JSON, + }, + AuthorizationType.CUSTOM_HEADER: lambda key: { + authorization_header: key, + "Content-Type": HeaderConstants.APPLICATION_JSON, + }, + AuthorizationType.NONE: lambda _: { + "Content-Type": HeaderConstants.APPLICATION_JSON, + }, + } + + if authorization_type not in header_formats: + raise ValueError(f"Unsupported authorization type: {authorization_type}") + + headers = header_formats[authorization_type](authorization_key) + + # Check if custom header type has required details + if authorization_type == AuthorizationType.CUSTOM_HEADER: + if not authorization_header or not authorization_key: + raise ValueError("Custom header or key missing for custom authorization.") + return headers From ba04d587fb380e5dc36bd2a7689dad82d3539772 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 19:45:52 +0530 Subject: [PATCH 24/27] add legacy code --- backend/notification_v2/internal_serializers.py | 1 - backend/notification_v2/serializers.py | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/backend/notification_v2/internal_serializers.py b/backend/notification_v2/internal_serializers.py index db7a35ab32..94669d64a4 100644 --- a/backend/notification_v2/internal_serializers.py +++ b/backend/notification_v2/internal_serializers.py @@ -23,7 +23,6 @@ class Meta: "platform", "max_retries", "is_active", - "delivery_mode", "created_at", "modified_at", "pipeline", diff --git a/backend/notification_v2/serializers.py b/backend/notification_v2/serializers.py index 956a25b86e..79802be90b 100644 --- a/backend/notification_v2/serializers.py +++ b/backend/notification_v2/serializers.py @@ -1,7 +1,7 @@ from rest_framework import serializers from utils.input_sanitizer import validate_name_field -from .enums import AuthorizationType, DeliveryMode, NotificationType, PlatformType +from .enums import AuthorizationType, NotificationType, PlatformType from .models import Notification @@ -21,11 +21,6 @@ class NotificationSerializer(serializers.ModelSerializer): max_value=4, min_value=0, default=0, required=False ) notify_on_failures = serializers.BooleanField(default=False, required=False) - delivery_mode = serializers.ChoiceField( - choices=DeliveryMode.choices(), - default=DeliveryMode.BATCHED.value, - required=False, - ) class Meta: model = Notification From b1fd243c347cfb5c3696e6fec7a1cc79d2a3685f Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Wed, 13 May 2026 19:57:08 +0530 Subject: [PATCH 25/27] greptile review --- backend/notification_v2/internal_api_views.py | 10 ++++++++++ workers/shared/patterns/notification/helper.py | 7 +++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 6409f6bdf6..27d701bc0f 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -339,6 +339,16 @@ def enqueue_notification_buffer(request: HttpRequest) -> JsonResponse: {"status": "error", "message": "Notification not found"}, status=404 ) + # INPROGRESS fires from the scheduler before a WorkflowExecution exists, + # so the GET-side `_apply_failure_filter` cannot run (no execution → no + # filter applied) and returns notify_on_failures=True rows too. Drop the + # event here so failure-only subscribers never receive a run-start. + if ( + notification.notify_on_failures + and body.get("status") == Pipeline.PipelineStatus.INPROGRESS + ): + return JsonResponse({"status": "ok", "buffer_row_id": None}) + # type / timestamp / additional_data stay optional during rollout — older # worker builds that don't forward them still produce a usable row # (renderer falls back to "Type: —" / no Additional Data line). diff --git a/workers/shared/patterns/notification/helper.py b/workers/shared/patterns/notification/helper.py index 79009a71a3..ed00475ea0 100644 --- a/workers/shared/patterns/notification/helper.py +++ b/workers/shared/patterns/notification/helper.py @@ -58,7 +58,8 @@ def _enqueue_to_buffer( }, timeout=10, ) - except Exception: # noqa: BLE001 — propagate any failure, don't classify + # Propagate any failure; caller decides whether to continue iteration. + except Exception: # noqa: BLE001 logger.exception( "Failed to enqueue BATCHED notification %s for pipeline %s", notification["id"], @@ -95,7 +96,9 @@ def _route_notification( try: _enqueue_to_buffer(api_client, notification, payload) - except Exception: # noqa: BLE001 — already logged with stack inside + # Already logged with stack inside _enqueue_to_buffer; broad catch keeps + # sibling notifications going. + except Exception: # noqa: BLE001 logger.warning( "Buffer enqueue failed for notification %s; continuing with others", notification.get("id"), From fb47c4fe22d2e2a1a5c7dd66904925258f937f2c Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Thu, 14 May 2026 17:22:57 +0530 Subject: [PATCH 26/27] greptile review --- backend/notification_v2/internal_api_views.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/backend/notification_v2/internal_api_views.py b/backend/notification_v2/internal_api_views.py index 27d701bc0f..d823da302f 100644 --- a/backend/notification_v2/internal_api_views.py +++ b/backend/notification_v2/internal_api_views.py @@ -374,17 +374,32 @@ def enqueue_notification_buffer(request: HttpRequest) -> JsonResponse: def _gc_terminal_rows() -> int: - """Delete DISPATCHED / DEAD_LETTER rows older than the retention window. - - PENDING rows are intentionally untouched regardless of age — they - represent live work the flush job still owns. + """Delete buffer rows past the retention window. + + Two sweeps: + - Terminal rows (DISPATCHED / DEAD_LETTER) older than the retention + window: hygiene for completed work. + - PENDING rows whose source notification has been deactivated and + whose ``flush_after`` has aged past the same window: ``_dispatch_group`` + filters ``notification__is_active=True``, so without this sweep + these rows are unreachable from both dispatch and GC and would + accumulate forever in the partial PENDING index. + + PENDING rows attached to active notifications are intentionally + untouched regardless of age — they represent live work the flush + job still owns. """ cutoff = timezone.now() - timedelta(days=settings.NOTIFICATION_BUFFER_RETENTION_DAYS) - deleted_count, _ = NotificationBuffer.objects.filter( + terminal_deleted, _ = NotificationBuffer.objects.filter( status__in=[BufferStatus.DISPATCHED.value, BufferStatus.DEAD_LETTER.value], created_at__lt=cutoff, ).delete() - return int(deleted_count) + inactive_deleted, _ = NotificationBuffer.objects.filter( + status=BufferStatus.PENDING.value, + notification__is_active=False, + flush_after__lt=cutoff, + ).delete() + return int(terminal_deleted) + int(inactive_deleted) def _send_clubbed( From b8699f4eefe026cfccb94f5c41a3c7840c3ef016 Mon Sep 17 00:00:00 2001 From: kirtimanmishrazipstack Date: Fri, 22 May 2026 17:11:39 +0530 Subject: [PATCH 27/27] UI as per new designs --- .../DisplayNotifications.jsx | 2 +- .../settings/platform/PlatformSettings.css | 51 ++++- .../settings/platform/PlatformSettings.jsx | 188 ++++++++++-------- 3 files changed, 150 insertions(+), 91 deletions(-) diff --git a/frontend/src/components/pipelines-or-deployments/notification-modal/DisplayNotifications.jsx b/frontend/src/components/pipelines-or-deployments/notification-modal/DisplayNotifications.jsx index 9754dd20ab..6860bf0258 100644 --- a/frontend/src/components/pipelines-or-deployments/notification-modal/DisplayNotifications.jsx +++ b/frontend/src/components/pipelines-or-deployments/notification-modal/DisplayNotifications.jsx @@ -91,7 +91,7 @@ function DisplayNotifications({ indicator: , spinning: isLoading, }} - pagination={{ pageSize: 5 }} + pagination={false} /> ); diff --git a/frontend/src/components/settings/platform/PlatformSettings.css b/frontend/src/components/settings/platform/PlatformSettings.css index 4ad6f4bf8e..da0da79318 100644 --- a/frontend/src/components/settings/platform/PlatformSettings.css +++ b/frontend/src/components/settings/platform/PlatformSettings.css @@ -2,7 +2,7 @@ .plt-set-layout { height: calc(100vh - 60px); - background-color: #ffffff; + background-color: var(--page-bg-3); } .plt-set-layout-2 { @@ -10,9 +10,10 @@ } .plt-set-head { - background-color: #f5f7f9; + background-color: var(--white); padding: 14px; height: 60px; + border-bottom: 1px solid var(--border-color-1); } .plt-set-head-typo { @@ -20,14 +21,56 @@ font-weight: 600; } +.plt-set-section { + margin-bottom: 16px; +} + +.plt-set-section > .ant-typography { + margin-top: 0; +} + +.plt-set-section-subtitle { + display: block; + margin-bottom: 8px; + font-size: 12px; +} + +.plt-set-inner-card { + background-color: #f5f7f9; + border: 1px solid #e5e7eb; + border-radius: 8px; + padding: 16px; +} + .plt-set-key-head { + display: flex; + align-items: center; + gap: 8px; margin-bottom: 8px; } -.plt-set-key-head-col-1 { - padding-right: 16px; +.plt-set-key-pill-clickable { + cursor: pointer; } .plt-set-key-display { width: 300px; } + +.plt-set-notif-field-label { + display: block; + margin-bottom: 8px; + font-size: 13px; +} + +.plt-set-notif-field-row { + display: flex; + align-items: center; + gap: 8px; +} + +.plt-set-notif-helper { + display: block; + margin-top: 6px; + font-size: 12px; +} diff --git a/frontend/src/components/settings/platform/PlatformSettings.jsx b/frontend/src/components/settings/platform/PlatformSettings.jsx index f2eee087b0..88a4613e78 100644 --- a/frontend/src/components/settings/platform/PlatformSettings.jsx +++ b/frontend/src/components/settings/platform/PlatformSettings.jsx @@ -9,9 +9,8 @@ import { Divider, Input, InputNumber, - Radio, Row, - Space, + Tag, Typography, } from "antd"; import { useEffect, useState } from "react"; @@ -326,94 +325,110 @@ function PlatformSettings() {
-
+
Internal API Keys - {keys.map((keyDetails, keyIndex) => { - return ( -
-
-
- - -
- - {keyDetails?.keyName} - -
- - -
- handleToggle(keyIndex)} - > - Active Key - -
- -
-
+ + Authenticate platform-to-platform requests. Keep these values + secret. + +
+ {keys.map((keyDetails, keyIndex) => { + const isActive = + Boolean(keyDetails?.id) && activeKey === keyIndex; + const canActivate = keyDetails?.id !== null; + return ( +
- - -
- - copyText(keys[keyIndex].key) - } - /> - } - /> -
- - - - - - handleDelete(keyIndex)} - content="Want to delete this platform key? This action cannot be undone." - okText="Delete" +
+ + {keyDetails?.keyName} + + {isActive ? ( + Active + ) : ( + handleToggle(keyIndex) + : undefined + } > + Inactive + + )} +
+
+ + +
+ + copyText(keys[keyIndex].key) + } + /> + } + /> +
+ + + + + handleDelete(keyIndex)} + content="Want to delete this platform key? This action cannot be undone." + okText="Delete" + > +
+ {keyIndex < keys?.length - 1 && }
- {keyIndex < keys?.length - 1 && } -
- ); - })} + ); + })} +
- -
+
Notifications -
- - - Notification interval (minutes, 1–120) - + + Control how often the platform notifies you about activity. + +
+ + Notification interval + +
Save - -
- - Allowed: 1–120 minutes. Default: 5 minutes. -
+ + Allowed: 1 to 120 minutes. Default: 5 minutes. +