From 6400e6f2eafc43370cd8fad2e32ad198349dc771 Mon Sep 17 00:00:00 2001 From: Prajwal D C Date: Fri, 27 Mar 2026 21:53:29 +0530 Subject: [PATCH] fix: Deserialization issue --- .../gap_analysis/executor/gap_executor.py | 22 ++++++++++- .../tests/unit/steps/test_gap_executor.py | 39 ++++++++++++++++++- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py b/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py index 3f14eaa7..00634402 100644 --- a/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py +++ b/src/ContentProcessorWorkflow/src/steps/gap_analysis/executor/gap_executor.py @@ -11,6 +11,7 @@ """ import json +from datetime import date, datetime, time from pathlib import Path from typing import Never, cast @@ -93,6 +94,25 @@ def _load_prompt_and_rules(self) -> str: return prompt_template.replace("{{RULES_DSL}}", rules_text) + def _json_default(self, value: object) -> str: + """Convert non-JSON-native values from processed output into strings.""" + if isinstance(value, (datetime, date, time)): + return value.isoformat() + raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable") + + def _serialize_processed_output(self, processed_output: dict) -> str: + """Serialize processed output for prompt injection. + + Content-processing results can contain Python datetime objects when they + are materialized from storage, so serialize those explicitly instead of + letting ``json.dumps`` fail mid-workflow. + """ + return json.dumps( + processed_output, + ensure_ascii=False, + default=self._json_default, + ) + @handler async def handle_execute( self, @@ -157,7 +177,7 @@ async def handle_execute( extracted_file = ExtractedFile( file_name=document["file_name"], mime_type=document["mime_type"], - extracted_content=json.dumps(processed_output), + extracted_content=self._serialize_processed_output(processed_output), ) processed_files.append(extracted_file) diff --git a/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py b/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py index 3958c173..a5c98856 100644 --- a/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py +++ b/src/ContentProcessorWorkflow/tests/unit/steps/test_gap_executor.py @@ -5,11 +5,22 @@ from __future__ import annotations -from unittest.mock import patch +import json +import sys +from datetime import datetime +from unittest.mock import MagicMock, patch import pytest -from steps.gap_analysis.executor.gap_executor import GapExecutor +with patch.dict( + sys.modules, + { + "repositories.claim_processes": MagicMock(Claim_Processes=object), + "services.content_process_service": MagicMock(ContentProcessService=object), + }, +): + with patch("agent_framework.handler", lambda fn: fn): + from steps.gap_analysis.executor.gap_executor import GapExecutor class TestReadTextFile: @@ -69,3 +80,27 @@ def fake_read(path): with pytest.raises(RuntimeError, match="Invalid YAML"): exe._load_prompt_and_rules() + + +class TestSerializeProcessedOutput: + def _make_executor(self): + with patch.object(GapExecutor, "__init__", lambda self, *a, **kw: None): + exe = GapExecutor.__new__(GapExecutor) + exe._PROMPT_FILE_NAME = "gap_executor_prompt.txt" + exe._RULES_FILE_NAME = "fnol_gap_rules.dsl.yaml" + return exe + + def test_serializes_datetime_values(self): + exe = self._make_executor() + + serialized = exe._serialize_processed_output( + { + "created_at": datetime(2026, 3, 27, 12, 56, 20), + "nested": {"updated_at": datetime(2026, 3, 27, 13, 1, 2)}, + } + ) + + assert json.loads(serialized) == { + "created_at": "2026-03-27T12:56:20", + "nested": {"updated_at": "2026-03-27T13:01:02"}, + }