From 6400e6f2eafc43370cd8fad2e32ad198349dc771 Mon Sep 17 00:00:00 2001
From: Prajwal D C <v-dcprajwal@microsoft.com>
Date: Fri, 27 Mar 2026 21:53:29 +0530
Subject: [PATCH] fix: Deserialization issue

---
 .../gap_analysis/executor/gap_executor.py     | 22 ++++++++++-
 .../tests/unit/steps/test_gap_executor.py     | 39 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py b/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py
index 3f14eaa7..00634402 100644
--- a/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py
+++ b/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py
@@ -11,6 +11,7 @@
 """
 
 import json
+from datetime import date, datetime, time
 from pathlib import Path
 from typing import Never, cast
 
@@ -93,6 +94,25 @@ def _load_prompt_and_rules(self) -> str:
 
         return prompt_template.replace("{{RULES_DSL}}", rules_text)
 
+    def _json_default(self, value: object) -> str:
+        """Convert non-JSON-native values from processed output into strings."""
+        if isinstance(value, (datetime, date, time)):
+            return value.isoformat()
+        raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")
+
+    def _serialize_processed_output(self, processed_output: dict) -> str:
+        """Serialize processed output for prompt injection.
+
+        Content-processing results can contain Python datetime objects when they
+        are materialized from storage, so serialize those explicitly instead of
+        letting ``json.dumps`` fail mid-workflow.
+        """
+        return json.dumps(
+            processed_output,
+            ensure_ascii=False,
+            default=self._json_default,
+        )
+
     @handler
     async def handle_execute(
         self,
@@ -157,7 +177,7 @@ async def handle_execute(
                 extracted_file = ExtractedFile(
                     file_name=document["file_name"],
                     mime_type=document["mime_type"],
-                    extracted_content=json.dumps(processed_output),
+                    extracted_content=self._serialize_processed_output(processed_output),
                 )
                 processed_files.append(extracted_file)
 
diff --git a/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py b/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py
index 3958c173..a5c98856 100644
--- a/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py
+++ b/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py
@@ -5,11 +5,22 @@
 
 from __future__ import annotations
 
-from unittest.mock import patch
+import json
+import sys
+from datetime import datetime
+from unittest.mock import MagicMock, patch
 
 import pytest
 
-from steps.gap_analysis.executor.gap_executor import GapExecutor
+with patch.dict(
+    sys.modules,
+    {
+        "repositories.claim_processes": MagicMock(Claim_Processes=object),
+        "services.content_process_service": MagicMock(ContentProcessService=object),
+    },
+):
+    with patch("agent_framework.handler", lambda fn: fn):
+        from steps.gap_analysis.executor.gap_executor import GapExecutor
 
 
 class TestReadTextFile:
@@ -69,3 +80,27 @@ def fake_read(path):
 
         with pytest.raises(RuntimeError, match="Invalid YAML"):
             exe._load_prompt_and_rules()
+
+
+class TestSerializeProcessedOutput:
+    def _make_executor(self):
+        with patch.object(GapExecutor, "__init__", lambda self, *a, **kw: None):
+            exe = GapExecutor.__new__(GapExecutor)
+        exe._PROMPT_FILE_NAME = "gap_executor_prompt.txt"
+        exe._RULES_FILE_NAME = "fnol_gap_rules.dsl.yaml"
+        return exe
+
+    def test_serializes_datetime_values(self):
+        exe = self._make_executor()
+
+        serialized = exe._serialize_processed_output(
+            {
+                "created_at": datetime(2026, 3, 27, 12, 56, 20),
+                "nested": {"updated_at": datetime(2026, 3, 27, 13, 1, 2)},
+            }
+        )
+
+        assert json.loads(serialized) == {
+            "created_at": "2026-03-27T12:56:20",
+            "nested": {"updated_at": "2026-03-27T13:01:02"},
+        }