Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/prompt_studio/prompt_studio_core_v2/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ class ToolStudioPromptKeys:
EXECUTION_SOURCE = "execution_source"
LINE_ITEM = "line-item"
CUSTOM_DATA = "custom_data"
SIGNATURE_METADATA = "signature_metadata"
SIGNATURE_PAGE_REFERENCES = "signature_page_references"
# Webhook postprocessing settings
ENABLE_POSTPROCESSING_WEBHOOK = "enable_postprocessing_webhook"
POSTPROCESSING_WEBHOOK_URL = "postprocessing_webhook_url"
Expand Down
6 changes: 4 additions & 2 deletions backend/prompt_studio/prompt_studio_core_v2/internal_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,13 @@ def indexing_status(request):
user_id = data.get("user_id", "")
doc_id_key = data.get("doc_id_key", "")

if not action or not org_id or not user_id or not doc_id_key:
# user_id may be empty (e.g. mock auth users) - it's only used as a
# Redis cache key fragment, so empty is acceptable.
if not action or not org_id or not doc_id_key:
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return JsonResponse(
{
"success": False,
"error": "action, org_id, user_id, doc_id_key are required",
"error": "action, org_id, doc_id_key are required",
},
status=status.HTTP_400_BAD_REQUEST,
)
Expand Down
154 changes: 140 additions & 14 deletions backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import time
import uuid
from pathlib import Path
from typing import Any
from typing import Any, NamedTuple

from account_v2.constants import Common
from account_v2.models import User
Expand Down Expand Up @@ -85,7 +85,20 @@
CHOICES_JSON = "/static/select_choices.json"
ERROR_MSG = "User %s doesn't have access to adapter %s"

logger = logging.getLogger(__name__)

class ExtractResult(NamedTuple):
"""Return value of ``PromptStudioHelper.dynamic_extractor``.

``signature_metadata`` and ``signature_page_references`` are populated
only when the x2text adapter is LLMWhisperer V2 in ``document_insights``
mode and the document contains signatures. They are read either from
the live extract dispatch result (cache miss) or from the on-disk
``.doc_insights.json`` sidecar (cache hit).
"""

text: str
signature_metadata: dict[str, Any] | None = None
signature_page_references: dict[str, Any] | None = None


class PromptStudioHelper:
Expand Down Expand Up @@ -742,14 +755,15 @@ def build_fetch_response_payload(
)

# Extract (blocking, usually cached)
extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=profile_manager,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text

is_summary = tool.summarize_as_source
if is_summary:
Expand Down Expand Up @@ -847,6 +861,9 @@ def build_fetch_response_payload(
tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
)
PromptStudioHelper._inject_signature_data_into_tool_settings(
tool_settings, extract_result
)

file_hash = fs_instance.get_hash_from_file(path=extract_path)

Expand Down Expand Up @@ -962,14 +979,15 @@ def build_bulk_fetch_response_payload(
)

# Extract ONCE (blocking, usually cached)
extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=profile_manager,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text

is_summary = tool.summarize_as_source
if is_summary:
Expand Down Expand Up @@ -1037,6 +1055,9 @@ def build_bulk_fetch_response_payload(
tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
)
PromptStudioHelper._inject_signature_data_into_tool_settings(
tool_settings, extract_result
)

file_hash = fs_instance.get_hash_from_file(path=extract_path)

Expand Down Expand Up @@ -1137,7 +1158,7 @@ def build_single_pass_payload(
)

# Extract (blocking, usually cached)
PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=default_profile,
file_path=doc_path,
org_id=org_id,
Expand Down Expand Up @@ -1176,6 +1197,9 @@ def build_single_pass_payload(
or TSPKeys.SIMPLE,
TSPKeys.SIMILARITY_TOP_K: default_profile.similarity_top_k,
}
PromptStudioHelper._inject_signature_data_into_tool_settings(
tool_settings, extract_result
)

lookup_configs = get_lookup_configs_for_tool(tool, prompts=prompts)
if lookup_configs:
Expand Down Expand Up @@ -1381,14 +1405,15 @@ def index_document(
tool=util,
)

extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=default_profile,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text
if tool.summarize_context:
summarize_file_path = PromptStudioHelper.summarize(
file_name, org_id, run_id, tool
Expand Down Expand Up @@ -1626,7 +1651,7 @@ def _execute_single_prompt(
# Validation responses are user-facing; DRF renders them as-is.
raise
except Exception as e:
logger.error(
logger.exception(
f"[{tool.tool_id}] Error while fetching response for "
f"prompt {id} and doc {document_id}: {e}"
)
Expand Down Expand Up @@ -1694,7 +1719,7 @@ def _execute_prompts_in_single_pass(
# Validation responses are user-facing; DRF renders them as-is.
raise
except Exception as e:
logger.error(
logger.exception(
f"[{tool.tool_id}] Error while fetching single pass response: {e}"
)
PromptStudioHelper._publish_log(
Expand Down Expand Up @@ -1856,14 +1881,15 @@ def _fetch_response(
tool=util,
)
logger.info(f"Extracting text from {file_path} for {doc_id}")
extracted_text = PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=profile_manager,
file_path=file_path,
org_id=org_id,
document_id=document_id,
run_id=run_id,
enable_highlight=tool.enable_highlight,
)
extracted_text = extract_result.text
logger.info(f"Extracted text from {file_path} for {doc_id}")
if is_summary:
profile_manager.chunk_size = 0
Expand Down Expand Up @@ -1974,6 +2000,9 @@ def _fetch_response(
tool_settings[TSPKeys.WORD_CONFIDENCE_POSTAMBLE] = getattr(
settings, TSPKeys.WORD_CONFIDENCE_POSTAMBLE.upper(), ""
)
PromptStudioHelper._inject_signature_data_into_tool_settings(
tool_settings, extract_result
)
file_hash = fs_instance.get_hash_from_file(path=doc_path)

payload = {
Expand Down Expand Up @@ -2184,7 +2213,7 @@ def dynamic_indexer(
msg = e.actual_err.response.json().get("error", str(e))

msg = f"Error while indexing '{filename}'. {msg}"
logger.error(msg, stack_info=True, exc_info=True)
logger.exception(msg, stack_info=True)
PromptStudioHelper._publish_log(
{"tool_id": tool_id, "run_id": run_id, "doc_name": filename},
LogLevels.ERROR,
Expand Down Expand Up @@ -2235,7 +2264,7 @@ def _fetch_single_pass_response(
file_path = os.path.join(
directory, "extract", os.path.splitext(filename)[0] + ".txt"
)
PromptStudioHelper.dynamic_extractor(
extract_result = PromptStudioHelper.dynamic_extractor(
profile_manager=default_profile,
file_path=input_file_path,
org_id=org_id,
Expand Down Expand Up @@ -2273,6 +2302,9 @@ def _fetch_single_pass_response(
default_profile.retrieval_strategy or TSPKeys.SIMPLE
)
tool_settings[TSPKeys.SIMILARITY_TOP_K] = default_profile.similarity_top_k
PromptStudioHelper._inject_signature_data_into_tool_settings(
tool_settings, extract_result
)
for prompt in prompts:
if not prompt.prompt:
raise EmptyPromptError()
Expand Down Expand Up @@ -2332,6 +2364,83 @@ def get_tool_from_tool_id(tool_id: str) -> CustomTool | None:
except CustomTool.DoesNotExist:
return None

@staticmethod
def _log_signature_capture(
signature_metadata: dict[str, Any] | None,
signature_page_references: dict[str, Any] | None,
document_id: str,
) -> None:
"""Log signature data capture from a fresh extract dispatch."""
if not (signature_metadata or signature_page_references):
return
logger.info(
"DOC_INSIGHTS dynamic_extractor: captured signature data "
"(pages=%s, refs=%s) for document %s",
list(signature_metadata.keys()) if signature_metadata else [],
list(signature_page_references.keys()) if signature_page_references else [],
document_id,
)

@staticmethod
def _inject_signature_data_into_tool_settings(
tool_settings: dict[str, Any],
extract_result: "ExtractResult",
) -> None:
"""Inject ``signature_metadata`` / ``signature_page_references``
from the extract result into ``tool_settings`` (mutated in place).

No-op when document_insights mode produced no signature data.
"""
if extract_result.signature_metadata:
tool_settings[TSPKeys.SIGNATURE_METADATA] = extract_result.signature_metadata
if extract_result.signature_page_references:
tool_settings[TSPKeys.SIGNATURE_PAGE_REFERENCES] = (
extract_result.signature_page_references
)

@staticmethod
def _signature_sidecar_path(extract_file_path: str) -> str:
p = Path(extract_file_path)
return str(p.with_suffix("")) + ".doc_insights.json"

@staticmethod
def _load_signature_sidecar(
extract_file_path: str,
fs_instance: Any,
) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
"""Return ``(signature_metadata, signature_page_references)`` from the
sidecar, or ``(None, None)`` if the sidecar is missing or unreadable.

Signature data is only written by the executor when a document
contains signatures in document_insights mode; cache-hit calls
for documents extracted in other modes legitimately have no
sidecar, so absence is not an error.
"""
sidecar_path = PromptStudioHelper._signature_sidecar_path(extract_file_path)
try:
raw = fs_instance.read(path=sidecar_path, mode="r")
except FileNotFoundError:
return None, None
except Exception as e:
logger.warning(
"DOC_INSIGHTS sidecar: failed to read %s: %s",
sidecar_path,
e,
)
return None, None
try:
data = json.loads(raw)
except (TypeError, ValueError) as e:
logger.warning(
"DOC_INSIGHTS sidecar: failed to parse %s: %s",
sidecar_path,
e,
)
return None, None
sig_meta = data.get("signature_metadata") or None
sig_refs = data.get("signature_page_references") or None
return sig_meta, sig_refs
Comment thread
coderabbitai[bot] marked this conversation as resolved.

@staticmethod
def dynamic_extractor(
file_path: str,
Expand All @@ -2340,7 +2449,7 @@ def dynamic_extractor(
org_id: str,
profile_manager: ProfileManager,
document_id: str,
) -> str:
) -> ExtractResult:
# Guard against None metadata (when adapter_metadata_b is None)
metadata = profile_manager.x2text.metadata or {}
x2text_config_hash = ToolUtils.hash_str(json.dumps(metadata, sort_keys=True))
Expand Down Expand Up @@ -2370,7 +2479,15 @@ def dynamic_extractor(
try:
extracted_text = fs_instance.read(path=extract_file_path, mode="r")
logger.info("Extracted text found. Reading from file..")
return extracted_text
sig_meta, sig_refs = PromptStudioHelper._load_signature_sidecar(
extract_file_path=extract_file_path,
fs_instance=fs_instance,
)
return ExtractResult(
text=extracted_text,
signature_metadata=sig_meta,
signature_page_references=sig_refs,
)
except FileNotFoundError as e:
logger.warning(
f"File not found for extraction. {extract_file_path}. {e}"
Expand Down Expand Up @@ -2424,6 +2541,11 @@ def dynamic_extractor(
)

extracted_text = result.data.get("extracted_text", "")
signature_metadata = result.data.get("signature_metadata")
signature_page_references = result.data.get("signature_page_references")
PromptStudioHelper._log_signature_capture(
signature_metadata, signature_page_references, document_id
)
success = PromptStudioIndexHelper.mark_extraction_status(
document_id=document_id,
profile_manager=profile_manager,
Expand All @@ -2436,7 +2558,11 @@ def dynamic_extractor(
f"Extraction completed but status not saved."
)

return extracted_text
return ExtractResult(
text=extracted_text,
signature_metadata=signature_metadata,
signature_page_references=signature_page_references,
)

@staticmethod
def export_project_settings(tool: CustomTool) -> dict:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,14 +415,22 @@ const TextResult = ({

const confidence = getConfidenceForText();

return enableHighlight ? (
// Make the answer clickable when the tool has highlighting enabled OR
// when the backend produced highlight_data (e.g. signature page refs
// from LLMWhisperer's document_insights mode), so signature highlights
// still work without requiring the separate enable_highlight toggle.
const hasHighlightData =
Array.isArray(highlightData) && highlightData.length > 0;
const isClickable = enableHighlight || hasHighlightData;

return isClickable ? (
<Typography.Text
wrap
onClick={() =>
onSelectHighlight(highlightData, promptId, profileId, confidence)
}
className={`prompt-output-result json-value ${
highlightData ? "clickable" : ""
hasHighlightData ? "clickable" : ""
} ${selectedHighlight?.highlightedPrompt === promptId ? "selected" : ""}`}
>
{parsedOutput}
Expand Down
Loading