Zipstack · pk-zipstack · Apr 7, 2026 · Apr 9, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/backend/prompt_studio/prompt_studio_core_v2/constants.py b/backend/prompt_studio/prompt_studio_core_v2/constants.py
@@ -105,6 +105,8 @@ class ToolStudioPromptKeys:
     EXECUTION_SOURCE = "execution_source"
     LINE_ITEM = "line-item"
     CUSTOM_DATA = "custom_data"
+    SIGNATURE_METADATA = "signature_metadata"
+    SIGNATURE_PAGE_REFERENCES = "signature_page_references"
     # Webhook postprocessing settings
     ENABLE_POSTPROCESSING_WEBHOOK = "enable_postprocessing_webhook"
     POSTPROCESSING_WEBHOOK_URL = "postprocessing_webhook_url"

diff --git a/backend/prompt_studio/prompt_studio_core_v2/internal_views.py b/backend/prompt_studio/prompt_studio_core_v2/internal_views.py
@@ -244,11 +244,13 @@ def indexing_status(request):
     user_id = data.get("user_id", "")
     doc_id_key = data.get("doc_id_key", "")
 
-    if not action or not org_id or not user_id or not doc_id_key:
+    # user_id may be empty (e.g. mock auth users) - it's only used as a
+    # Redis cache key fragment, so empty is acceptable.
+    if not action or not org_id or not doc_id_key:
         return JsonResponse(
             {
                 "success": False,
-                "error": "action, org_id, user_id, doc_id_key are required",
+                "error": "action, org_id, doc_id_key are required",
             },
             status=status.HTTP_400_BAD_REQUEST,
         )

diff --git a/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
@@ -4,7 +4,7 @@
 import time
 import uuid
 from pathlib import Path
-from typing import Any
+from typing import Any, NamedTuple
 
 from account_v2.constants import Common
 from account_v2.models import User
@@ -85,7 +85,20 @@
 CHOICES_JSON = "/static/select_choices.json"
 ERROR_MSG = "User %s doesn't have access to adapter %s"
 
-logger = logging.getLogger(__name__)
+
+class ExtractResult(NamedTuple):
+    """Return value of ``PromptStudioHelper.dynamic_extractor``.
+
+    ``signature_metadata`` and ``signature_page_references`` are populated
+    only when the x2text adapter is LLMWhisperer V2 in ``document_insights``
+    mode and the document contains signatures. They are read either from
+    the live extract dispatch result (cache miss) or from the on-disk
+    ``.doc_insights.json`` sidecar (cache hit).
+    """
+
+    text: str
+    signature_metadata: dict[str, Any] | None = None
+    signature_page_references: dict[str, Any] | None = None
 
 
 class PromptStudioHelper:
@@ -742,14 +755,15 @@ def build_fetch_response_payload(
         )
 
         # Extract (blocking, usually cached)
-        extracted_text = PromptStudioHelper.dynamic_extractor(
+        extract_result = PromptStudioHelper.dynamic_extractor(
             profile_manager=profile_manager,
             file_path=file_path,
             org_id=org_id,
             document_id=document_id,
             run_id=run_id,
             enable_highlight=tool.enable_highlight,
         )
+        extracted_text = extract_result.text
 
         is_summary = tool.summarize_as_source
         if is_summary:
@@ -847,6 +861,9 @@ def build_fetch_response_payload(
         tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
             settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
         )
+        PromptStudioHelper._inject_signature_data_into_tool_settings(
+            tool_settings, extract_result
+        )
 
         file_hash = fs_instance.get_hash_from_file(path=extract_path)
 
@@ -962,14 +979,15 @@ def build_bulk_fetch_response_payload(
         )
 
         # Extract ONCE (blocking, usually cached)
-        extracted_text = PromptStudioHelper.dynamic_extractor(
+        extract_result = PromptStudioHelper.dynamic_extractor(
             profile_manager=profile_manager,
             file_path=file_path,
             org_id=org_id,
             document_id=document_id,
             run_id=run_id,
             enable_highlight=tool.enable_highlight,
         )
+        extracted_text = extract_result.text
 
         is_summary = tool.summarize_as_source
         if is_summary:
@@ -1037,6 +1055,9 @@ def build_bulk_fetch_response_payload(
         tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
             settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
         )
+        PromptStudioHelper._inject_signature_data_into_tool_settings(
+            tool_settings, extract_result
+        )
 
         file_hash = fs_instance.get_hash_from_file(path=extract_path)
 
@@ -1137,7 +1158,7 @@ def build_single_pass_payload(
         )
 
         # Extract (blocking, usually cached)
-        PromptStudioHelper.dynamic_extractor(
+        extract_result = PromptStudioHelper.dynamic_extractor(
             profile_manager=default_profile,
             file_path=doc_path,
             org_id=org_id,
@@ -1176,6 +1197,9 @@ def build_single_pass_payload(
             or TSPKeys.SIMPLE,
             TSPKeys.SIMILARITY_TOP_K: default_profile.similarity_top_k,
         }
+        PromptStudioHelper._inject_signature_data_into_tool_settings(
+            tool_settings, extract_result
+        )
 
         lookup_configs = get_lookup_configs_for_tool(tool, prompts=prompts)
         if lookup_configs:
@@ -1381,14 +1405,15 @@ def index_document(
             tool=util,
         )
 
-        extracted_text = PromptStudioHelper.dynamic_extractor(
+        extract_result = PromptStudioHelper.dynamic_extractor(
             profile_manager=default_profile,
             file_path=file_path,
             org_id=org_id,
             document_id=document_id,
             run_id=run_id,
             enable_highlight=tool.enable_highlight,
         )
+        extracted_text = extract_result.text
         if tool.summarize_context:
             summarize_file_path = PromptStudioHelper.summarize(
                 file_name, org_id, run_id, tool
@@ -1626,7 +1651,7 @@ def _execute_single_prompt(
             # Validation responses are user-facing; DRF renders them as-is.
             raise
         except Exception as e:
-            logger.error(
+            logger.exception(
                 f"[{tool.tool_id}] Error while fetching response for "
                 f"prompt {id} and doc {document_id}: {e}"
             )
@@ -1694,7 +1719,7 @@ def _execute_prompts_in_single_pass(
             # Validation responses are user-facing; DRF renders them as-is.
             raise
         except Exception as e:
-            logger.error(
+            logger.exception(
                 f"[{tool.tool_id}] Error while fetching single pass response: {e}"
             )
             PromptStudioHelper._publish_log(
@@ -1856,14 +1881,15 @@ def _fetch_response(
             tool=util,
         )
         logger.info(f"Extracting text from {file_path} for {doc_id}")
-        extracted_text = PromptStudioHelper.dynamic_extractor(
+        extract_result = PromptStudioHelper.dynamic_extractor(
             profile_manager=profile_manager,
             file_path=file_path,
             org_id=org_id,
             document_id=document_id,
             run_id=run_id,
             enable_highlight=tool.enable_highlight,
         )
+        extracted_text = extract_result.text
         logger.info(f"Extracted text from {file_path} for {doc_id}")
         if is_summary:
             profile_manager.chunk_size = 0
@@ -1974,6 +2000,9 @@ def _fetch_response(
         tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
             settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
         )
+        PromptStudioHelper._inject_signature_data_into_tool_settings(
+            tool_settings, extract_result
+        )
         file_hash = fs_instance.get_hash_from_file(path=doc_path)
 
         payload = {
@@ -2184,7 +2213,7 @@ def dynamic_indexer(
                 msg = e.actual_err.response.json().get("error", str(e))
 
             msg = f"Error while indexing '{filename}'. {msg}"
-            logger.error(msg, stack_info=True, exc_info=True)
+            logger.exception(msg, stack_info=True)
             PromptStudioHelper._publish_log(
                 {"tool_id": tool_id, "run_id": run_id, "doc_name": filename},
                 LogLevels.ERROR,
@@ -2235,7 +2264,7 @@ def _fetch_single_pass_response(
         file_path = os.path.join(
             directory, "extract", os.path.splitext(filename)[0] + ".txt"
         )
-        PromptStudioHelper.dynamic_extractor(
+        extract_result = PromptStudioHelper.dynamic_extractor(
             profile_manager=default_profile,
             file_path=input_file_path,
             org_id=org_id,
@@ -2273,6 +2302,9 @@ def _fetch_single_pass_response(
             default_profile.retrieval_strategy or TSPKeys.SIMPLE
         )
         tool_settings[TSPKeys.SIMILARITY_TOP_K] = default_profile.similarity_top_k
+        PromptStudioHelper._inject_signature_data_into_tool_settings(
+            tool_settings, extract_result
+        )
         for prompt in prompts:
             if not prompt.prompt:
                 raise EmptyPromptError()
@@ -2332,6 +2364,83 @@ def get_tool_from_tool_id(tool_id: str) -> CustomTool | None:
         except CustomTool.DoesNotExist:
             return None
 
+    @staticmethod
+    def _log_signature_capture(
+        signature_metadata: dict[str, Any] | None,
+        signature_page_references: dict[str, Any] | None,
+        document_id: str,
+    ) -> None:
+        """Log signature data capture from a fresh extract dispatch."""
+        if not (signature_metadata or signature_page_references):
+            return
+        logger.info(
+            "DOC_INSIGHTS dynamic_extractor: captured signature data "
+            "(pages=%s, refs=%s) for document %s",
+            list(signature_metadata.keys()) if signature_metadata else [],
+            list(signature_page_references.keys()) if signature_page_references else [],
+            document_id,
+        )
+
+    @staticmethod
+    def _inject_signature_data_into_tool_settings(
+        tool_settings: dict[str, Any],
+        extract_result: "ExtractResult",
+    ) -> None:
+        """Inject ``signature_metadata`` / ``signature_page_references``
+        from the extract result into ``tool_settings`` (mutated in place).
+
+        No-op when document_insights mode produced no signature data.
+        """
+        if extract_result.signature_metadata:
+            tool_settings[TSPKeys.SIGNATURE_METADATA] = extract_result.signature_metadata
+        if extract_result.signature_page_references:
+            tool_settings[TSPKeys.SIGNATURE_PAGE_REFERENCES] = (
+                extract_result.signature_page_references
+            )
+
+    @staticmethod
+    def _signature_sidecar_path(extract_file_path: str) -> str:
+        p = Path(extract_file_path)
+        return str(p.with_suffix("")) + ".doc_insights.json"
+
+    @staticmethod
+    def _load_signature_sidecar(
+        extract_file_path: str,
+        fs_instance: Any,
+    ) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
+        """Return ``(signature_metadata, signature_page_references)`` from the
+        sidecar, or ``(None, None)`` if the sidecar is missing or unreadable.
+
+        Signature data is only written by the executor when a document
+        contains signatures in document_insights mode; cache-hit calls
+        for documents extracted in other modes legitimately have no
+        sidecar, so absence is not an error.
+        """
+        sidecar_path = PromptStudioHelper._signature_sidecar_path(extract_file_path)
+        try:
+            raw = fs_instance.read(path=sidecar_path, mode="r")
+        except FileNotFoundError:
+            return None, None
+        except Exception as e:
+            logger.warning(
+                "DOC_INSIGHTS sidecar: failed to read %s: %s",
+                sidecar_path,
+                e,
+            )
+            return None, None
+        try:
+            data = json.loads(raw)
+        except (TypeError, ValueError) as e:
+            logger.warning(
+                "DOC_INSIGHTS sidecar: failed to parse %s: %s",
+                sidecar_path,
+                e,
+            )
+            return None, None
+        sig_meta = data.get("signature_metadata") or None
+        sig_refs = data.get("signature_page_references") or None
+        return sig_meta, sig_refs
+
     @staticmethod
     def dynamic_extractor(
         file_path: str,
@@ -2340,7 +2449,7 @@ def dynamic_extractor(
         org_id: str,
         profile_manager: ProfileManager,
         document_id: str,
-    ) -> str:
+    ) -> ExtractResult:
         # Guard against None metadata (when adapter_metadata_b is None)
         metadata = profile_manager.x2text.metadata or {}
         x2text_config_hash = ToolUtils.hash_str(json.dumps(metadata, sort_keys=True))
@@ -2370,7 +2479,15 @@ def dynamic_extractor(
             try:
                 extracted_text = fs_instance.read(path=extract_file_path, mode="r")
                 logger.info("Extracted text found. Reading from file..")
-                return extracted_text
+                sig_meta, sig_refs = PromptStudioHelper._load_signature_sidecar(
+                    extract_file_path=extract_file_path,
+                    fs_instance=fs_instance,
+                )
+                return ExtractResult(
+                    text=extracted_text,
+                    signature_metadata=sig_meta,
+                    signature_page_references=sig_refs,
+                )
             except FileNotFoundError as e:
                 logger.warning(
                     f"File not found for extraction. {extract_file_path}. {e}"
@@ -2424,6 +2541,11 @@ def dynamic_extractor(
             )
 
         extracted_text = result.data.get("extracted_text", "")
+        signature_metadata = result.data.get("signature_metadata")
+        signature_page_references = result.data.get("signature_page_references")
+        PromptStudioHelper._log_signature_capture(
+            signature_metadata, signature_page_references, document_id
+        )
         success = PromptStudioIndexHelper.mark_extraction_status(
             document_id=document_id,
             profile_manager=profile_manager,
@@ -2436,7 +2558,11 @@ def dynamic_extractor(
                 f"Extraction completed but status not saved."
             )
 
-        return extracted_text
+        return ExtractResult(
+            text=extracted_text,
+            signature_metadata=signature_metadata,
+            signature_page_references=signature_page_references,
+        )
 
     @staticmethod
     def export_project_settings(tool: CustomTool) -> dict:

diff --git a/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx b/frontend/src/components/custom-tools/prompt-card/DisplayPromptResult.jsx
@@ -415,14 +415,22 @@ const TextResult = ({
 
   const confidence = getConfidenceForText();
 
-  return enableHighlight ? (
+  // Make the answer clickable when the tool has highlighting enabled OR
+  // when the backend produced highlight_data (e.g. signature page refs
+  // from LLMWhisperer's document_insights mode), so signature highlights
+  // still work without requiring the separate enable_highlight toggle.
+  const hasHighlightData =
+    Array.isArray(highlightData) && highlightData.length > 0;
+  const isClickable = enableHighlight || hasHighlightData;
+
+  return isClickable ? (
     <Typography.Text
       wrap
       onClick={() =>
         onSelectHighlight(highlightData, promptId, profileId, confidence)
       }
       className={`prompt-output-result json-value ${
-        highlightData ? "clickable" : ""
+        hasHighlightData ? "clickable" : ""
       } ${selectedHighlight?.highlightedPrompt === promptId ? "selected" : ""}`}
     >
       {parsedOutput}