From ad8db2e87c77978928d046c95565e9e60c1b1f4e Mon Sep 17 00:00:00 2001
From: suguanyang <wangbinqi77@gmail.com>
Date: Fri, 15 May 2026 05:15:19 -0700
Subject: [PATCH] feat: sync SDK with current worker ZIP contract and agentic
 retrieval API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add typed ChunkMetadata model for known worker metadata fields while
preserving forward compatibility via extra="allow".

Issue #21 — ParseResult ZIP models
Issue #22 — Agentic retrieval API
---
 docs/usage.md                       |  77 ++++---
 src/knowhere/lib/result_parser.py   |  77 ++-----
 src/knowhere/resources/retrieval.py |   6 +
 src/knowhere/types/result.py        | 145 +++++++++----
 src/knowhere/types/retrieval.py     |  13 +-
 tests/test_models.py                |  57 +-----
 tests/test_result_parser.py         | 305 ++++++++++++++++++----------
 tests/test_retrieval.py             |  93 +++++++++
 8 files changed, 490 insertions(+), 283 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index e1c0714..d982cb0 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -1,5 +1,9 @@
 # Knowhere Python SDK — Usage Guide
 
+> **Recent changes:** Chunk metadata fields (`tokens`, `keywords`, `summary`,
+> `length`, etc.) are no longer flattened to the chunk surface. Access them
+> through `chunk.metadata` instead. See [Chunk Types](#chunk-types).
+
 Comprehensive reference for every feature, parameter, and pattern in the SDK.
 
 ## Table of Contents
@@ -219,8 +223,13 @@ result.table_chunks               # List[TableChunk]
 # Lookup by ID
 chunk = result.getChunk("chunk_42")
 
-# Hierarchy data (document structure tree, if available)
-result.hierarchy
+# Document navigation tree (from doc_nav.json, current worker output)
+result.doc_nav                # DocNav | None
+result.doc_nav.sections       # List[DocNavSection] — tree of titles/paths/levels
+result.doc_nav.resources      # DocNavResources — image/table resource summaries
+
+# Legacy hierarchy (from hierarchy.json, older worker output)
+result.hierarchy              # Any | None
 
 # Raw ZIP bytes (for archival)
 result.raw_zip
@@ -239,49 +248,48 @@ result.save("./output/report/")
 
 ## Chunk Types
 
-Every chunk shares a base set of fields (`chunk_id`, `type`, `content`, `path`). Each type adds its own fields.
+Every chunk shares a base set of fields (`chunk_id`, `type`, `content`, `path`,
+`metadata`). Worker metadata is kept in the `metadata` dict — it is **not**
+flattened to top-level chunk properties.
 
-### TextChunk
+### Base fields (all chunk types)
 
 | Field | Type | Description |
 |-------|------|-------------|
 | `chunk_id` | `str` | Unique identifier |
-| `type` | `str` | Always `"text"` |
-| `content` | `str` | The text content |
-| `path` | `str \| None` | Document structure path (e.g. `"Section 1 > Subsection 2"`) |
-| `length` | `int` | Character count |
-| `tokens` | `List[str] \| None` | Tokenized words returned by the parser pipeline |
-| `keywords` | `List[str] \| None` | Extracted keywords (requires `summary_txt: True`) |
-| `summary` | `str \| None` | AI-generated summary (requires `summary_txt: True`) |
-| `relationships` | `List \| None` | Relationships to other chunks |
+| `type` | `str` | `"text"`, `"image"`, or `"table"` |
+| `content` | `str` | Text content or placeholder |
+| `path` | `str \| None` | Document structure path |
+| `metadata` | `dict` | Raw worker metadata (tokens, keywords, summary, length, page_nums, etc.) |
+
+### TextChunk
 
 ```python
 for chunk in result.text_chunks:
     print(f"[{chunk.chunk_id}] {chunk.content[:60]}...")
-    if chunk.keywords:
-        print(f"  Keywords: {', '.join(chunk.keywords)}")
-    if chunk.summary:
-        print(f"  Summary: {chunk.summary}")
+    # Metadata is in chunk.metadata, not flattened:
+    keywords = chunk.metadata.get("keywords", [])
+    summary = chunk.metadata.get("summary")
+    if keywords:
+        print(f"  Keywords: {', '.join(keywords)}")
+    if summary:
+        print(f"  Summary: {summary}")
 ```
 
 ### ImageChunk
 
 | Field | Type | Description |
 |-------|------|-------------|
-| `chunk_id` | `str` | Unique identifier |
-| `type` | `str` | Always `"image"` |
-| `content` | `str` | Text content associated with the image |
 | `file_path` | `str \| None` | Path within the ZIP |
-| `original_name` | `str \| None` | Original filename |
-| `summary` | `str \| None` | AI-generated image description (requires `summary_image: True`) |
 | `data` | `bytes` | Raw image bytes (loaded from ZIP) |
 | `format` | `str \| None` | Image format inferred from extension (property) |
 
 ```python
 for img in result.image_chunks:
     print(f"{img.file_path} ({len(img.data)} bytes, {img.format})")
-    if img.summary:
-        print(f"  Description: {img.summary}")
+    summary = img.metadata.get("summary")
+    if summary:
+        print(f"  Description: {summary}")
     img.save("./output/images/")  # writes to disk
 ```
 
@@ -289,13 +297,7 @@ for img in result.image_chunks:
 
 | Field | Type | Description |
 |-------|------|-------------|
-| `chunk_id` | `str` | Unique identifier |
-| `type` | `str` | Always `"table"` |
-| `content` | `str` | Text representation of the table |
 | `file_path` | `str \| None` | Path within the ZIP |
-| `original_name` | `str \| None` | Original filename |
-| `table_type` | `str \| None` | Table classification |
-| `summary` | `str \| None` | AI-generated table summary (requires `summary_table: True`) |
 | `html` | `str` | Full HTML of the table (loaded from ZIP) |
 
 ```python
@@ -471,6 +473,19 @@ response = client.retrieval.query(
     top_k=5,
 )
 
+# Agentic mode (LLM navigation + answer synthesis)
+response = client.retrieval.query(
+    namespace="support-center",
+    query="How do I pair a Bluetooth headset?",
+    use_agentic=True,
+    top_k=5,
+)
+print(response.answer_text)          # LLM-generated natural-language answer
+print(response.router_used)          # "workflow_single_step", "small_kb_all", etc.
+for ref in response.referenced_chunks:
+    print(ref.get("chunk_id"), ref.get("asset_url"))
+
+# Legacy results are always available
 for result in response.results:
     print(result.content)
     print(result.score)
@@ -479,6 +494,10 @@ for result in response.results:
     print(result.source.section_path)
 ```
 
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `use_agentic` | `bool \| None` | `None` | Force agentic (`True`) or legacy (`False`) retrieval. `None` uses server default. |
+
 Retrieval results expose `content`, not the older parse-result `text` field.
 Media results may include `asset_url` when the server can sign the referenced
 artifact.
diff --git a/src/knowhere/lib/result_parser.py b/src/knowhere/lib/result_parser.py
index 95f6241..eac4579 100644
--- a/src/knowhere/lib/result_parser.py
+++ b/src/knowhere/lib/result_parser.py
@@ -13,13 +13,13 @@
 from knowhere._logging import getLogger
 from knowhere.types.result import (
     Chunk,
+    DocNav,
     ImageChunk,
     Manifest,
     ParseResult,
     SlimChunk,
     TableChunk,
     TextChunk,
-    TextChunkTokens,
 )
 
 _logger = getLogger()
@@ -81,38 +81,6 @@ def _extractFilePath(raw: Dict[str, Any]) -> Optional[str]:
     return fallback
 
 
-def _normalizeTokenList(raw_tokens: List[Any]) -> List[str]:
-    """Return a string-only token list with empty values removed."""
-    normalized_tokens: List[str] = []
-    for raw_token in raw_tokens:
-        token_text: str = str(raw_token).strip()
-        if token_text:
-            normalized_tokens.append(token_text)
-    return normalized_tokens
-
-
-def _parseTextChunkTokens(
-    raw_tokens: Any,
-    *,
-    chunk_id: str,
-) -> Optional[TextChunkTokens]:
-    """Normalize text chunk tokens from the current backend payload."""
-    if raw_tokens is None:
-        return None
-    if isinstance(raw_tokens, bool):
-        raise KnowhereError(
-            f"Invalid tokens payload for text chunk '{chunk_id}': expected list[str], got bool."
-        )
-    if isinstance(raw_tokens, list):
-        return _normalizeTokenList(raw_tokens)
-
-    raise KnowhereError(
-        "Invalid tokens payload for text chunk "
-        f"'{chunk_id}': expected list[str], "
-        f"got {type(raw_tokens).__name__}."
-    )
-
-
 def _buildChunks(
     raw_chunks: List[Dict[str, Any]],
     zf: zipfile.ZipFile,
@@ -125,58 +93,39 @@ def _buildChunks(
 
         if chunk_type == "image":
             image_data: bytes = b""
-            # file_path may be at top level, inside metadata, or use path as fallback
             file_path: Optional[str] = _extractFilePath(raw)
             if file_path:
                 image_data = _readZipBytes(zf, file_path) or b""
-            metadata: Dict[str, Any] = raw.get("metadata", {})
             chunk: Chunk = ImageChunk(
                 chunk_id=raw.get("chunk_id", ""),
                 type="image",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
-                page_nums=metadata.get("page_nums", raw.get("page_nums")),
-                length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
-                original_name=metadata.get("original_name", raw.get("original_name")),
-                summary=metadata.get("summary", raw.get("summary")),
                 data=image_data,
+                metadata=raw.get("metadata", {}),
             )
         elif chunk_type == "table":
             table_html: str = ""
             file_path = _extractFilePath(raw)
             if file_path:
                 table_html = _readZipText(zf, file_path) or ""
-            metadata = raw.get("metadata", {})
             chunk = TableChunk(
                 chunk_id=raw.get("chunk_id", ""),
                 type="table",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
-                page_nums=metadata.get("page_nums", raw.get("page_nums")),
-                length=metadata.get("length", raw.get("length", 0)),
                 file_path=file_path,
-                original_name=metadata.get("original_name", raw.get("original_name")),
-                table_type=metadata.get("table_type", raw.get("table_type")),
-                summary=metadata.get("summary", raw.get("summary")),
                 html=table_html,
+                metadata=raw.get("metadata", {}),
             )
         else:
-            metadata = raw.get("metadata", {})
-            chunk_id: str = raw.get("chunk_id", "")
-            raw_tokens: Any = metadata.get("tokens", raw.get("tokens"))
             chunk = TextChunk(
-                chunk_id=chunk_id,
+                chunk_id=raw.get("chunk_id", ""),
                 type="text",
                 content=raw.get("content", ""),
                 path=raw.get("path"),
-                page_nums=metadata.get("page_nums", raw.get("page_nums")),
-                length=metadata.get("length", raw.get("length", 0)),
-                tokens=_parseTextChunkTokens(raw_tokens, chunk_id=chunk_id),
-                keywords=metadata.get("keywords", raw.get("keywords")),
-                summary=metadata.get("summary", raw.get("summary")),
-                connect_to=metadata.get("connect_to", raw.get("connect_to")),
-                relationships=metadata.get("relationships", raw.get("relationships")),
+                metadata=raw.get("metadata", {}),
             )
 
         chunks.append(chunk)
@@ -229,7 +178,15 @@ def parseResultZip(
     # -- Full markdown --
     full_markdown: str = _readZipText(zf, "full.md") or ""
 
-    # -- Hierarchy --
+    # -- DocNav (current worker output) --
+    doc_nav_text: Optional[str] = _readZipText(zf, "doc_nav.json")
+    doc_nav: Optional[DocNav] = (
+        DocNav.model_validate(json.loads(doc_nav_text))
+        if doc_nav_text
+        else None
+    )
+
+    # -- Hierarchy (legacy — current worker no longer emits this) --
     hierarchy_text: Optional[str] = _readZipText(zf, "hierarchy.json")
     hierarchy: Optional[Any] = (
         json.loads(hierarchy_text) if hierarchy_text else None
@@ -263,11 +220,13 @@ def parseResultZip(
     return ParseResult(
         manifest=manifest,
         chunks=chunks,
-        chunks_slim=chunks_slim,
         full_markdown=full_markdown,
+        raw_zip=zip_bytes,
+        doc_nav=doc_nav,
+        # Legacy — the current worker no longer emits these files
+        chunks_slim=chunks_slim,
         hierarchy=hierarchy,
         toc_hierarchies=toc_hierarchies,
         kb_csv=kb_csv,
         hierarchy_view_html=hierarchy_view_html,
-        raw_zip=zip_bytes,
     )
diff --git a/src/knowhere/resources/retrieval.py b/src/knowhere/resources/retrieval.py
index 3b6b36c..9100be8 100644
--- a/src/knowhere/resources/retrieval.py
+++ b/src/knowhere/resources/retrieval.py
@@ -22,6 +22,7 @@ def query(
         query: str,
         namespace: Optional[str] = None,
         top_k: Optional[int] = None,
+        use_agentic: Optional[bool] = None,
         data_type: Optional[int] = None,
         signal_paths: Optional[list[str]] = None,
         filter_mode: Optional[RetrievalFilterMode] = None,
@@ -39,6 +40,8 @@ def query(
             body["namespace"] = namespace
         if top_k is not None:
             body["top_k"] = top_k
+        if use_agentic is not None:
+            body["use_agentic"] = use_agentic
         if data_type is not None:
             body["data_type"] = data_type
         if signal_paths is not None:
@@ -77,6 +80,7 @@ async def query(
         query: str,
         namespace: Optional[str] = None,
         top_k: Optional[int] = None,
+        use_agentic: Optional[bool] = None,
         data_type: Optional[int] = None,
         signal_paths: Optional[list[str]] = None,
         filter_mode: Optional[RetrievalFilterMode] = None,
@@ -94,6 +98,8 @@ async def query(
             body["namespace"] = namespace
         if top_k is not None:
             body["top_k"] = top_k
+        if use_agentic is not None:
+            body["use_agentic"] = use_agentic
         if data_type is not None:
             body["data_type"] = data_type
         if signal_paths is not None:
diff --git a/src/knowhere/types/result.py b/src/knowhere/types/result.py
index df83c19..0cce2da 100644
--- a/src/knowhere/types/result.py
+++ b/src/knowhere/types/result.py
@@ -9,7 +9,6 @@
 from typing import Any, Dict, List, Optional, Union
 
 from pydantic import BaseModel, Field
-from typing_extensions import TypeAlias
 
 from knowhere._exceptions import ValidationError
 
@@ -138,6 +137,44 @@ class Manifest(BaseModel):
     checksum: Optional[Checksum] = None
     statistics: Optional[Statistics] = None
     files: Optional[FileIndex] = None
+    hierarchy: Optional[Any] = Field(default=None, alias="HIERARCHY")
+
+
+# ---------------------------------------------------------------------------
+# DocNav models
+# ---------------------------------------------------------------------------
+
+
+class DocNavResourceItem(BaseModel):
+    """A single image or table resource entry in ``doc_nav.json``."""
+
+    path: str
+    summary: Optional[str] = None
+
+
+class DocNavResources(BaseModel):
+    """Image and table resource summaries from ``doc_nav.json``."""
+
+    images: List[DocNavResourceItem] = Field(default_factory=list)
+    tables: List[DocNavResourceItem] = Field(default_factory=list)
+
+
+class DocNavSection(BaseModel):
+    """A document section entry in the ``doc_nav.json`` navigation tree."""
+
+    title: str
+    path: str
+    level: int
+    summary: Optional[str] = None
+    chunk_count: int = 0
+    children: List["DocNavSection"] = Field(default_factory=list)
+
+
+class DocNav(BaseModel):
+    """Top-level document navigation structure from ``doc_nav.json``."""
+
+    sections: List[DocNavSection] = Field(default_factory=list)
+    resources: Optional[DocNavResources] = None
 
 
 # ---------------------------------------------------------------------------
@@ -145,6 +182,27 @@ class Manifest(BaseModel):
 # ---------------------------------------------------------------------------
 
 
+class ChunkMetadata(BaseModel):
+    """Known worker metadata fields for a chunk.
+
+    All fields are optional.  Unknown fields added by future worker
+    versions are preserved thanks to ``model_config``.
+    """
+
+    model_config = {"extra": "allow"}
+
+    length: Optional[int] = None
+    page_nums: Optional[List[int]] = None
+    tokens: Optional[List[str]] = None
+    keywords: Optional[List[str]] = None
+    summary: Optional[str] = None
+    connect_to: Optional[List[Dict[str, Any]]] = None
+    file_path: Optional[str] = None
+    original_name: Optional[str] = None
+    table_type: Optional[str] = None
+    document_top_summary: Optional[str] = None
+
+
 class BaseChunk(BaseModel):
     """Fields shared by every chunk type."""
 
@@ -152,32 +210,20 @@ class BaseChunk(BaseModel):
     type: str
     content: str = ""
     path: Optional[str] = None
-    page_nums: Optional[List[int]] = None
-
-
-TextChunkTokens: TypeAlias = List[str]
+    metadata: ChunkMetadata = Field(default_factory=ChunkMetadata)
 
 
 class TextChunk(BaseChunk):
     """A text chunk extracted from the document."""
 
     type: str = "text"
-    length: int = 0
-    tokens: Optional[TextChunkTokens] = None
-    keywords: Optional[List[str]] = None
-    summary: Optional[str] = None
-    connect_to: Optional[List[Dict[str, Any]]] = None
-    relationships: Optional[List[Union[Dict[str, Any], str]]] = None
 
 
 class ImageChunk(BaseChunk):
     """An image chunk — carries raw bytes loaded from the ZIP."""
 
     type: str = "image"
-    length: int = 0
     file_path: Optional[str] = None
-    original_name: Optional[str] = None
-    summary: Optional[str] = None
     data: bytes = Field(default=b"", exclude=True)
 
     model_config = {"arbitrary_types_allowed": True}
@@ -193,13 +239,13 @@ def format(self) -> Optional[str]:
     def save(self, directory: Union[str, Path]) -> Path:
         """Write the image bytes to *directory*, returning the output path.
 
-        The filename is derived from ``original_name`` or ``file_path``,
-        sanitised for cross-platform safety.
+        The filename is derived from ``file_path``, sanitised for
+        cross-platform safety.
         """
         dir_path: Path = Path(directory)
         dir_path.mkdir(parents=True, exist_ok=True)
 
-        raw_name: str = self.original_name or os.path.basename(
+        raw_name: str = os.path.basename(
             self.file_path or f"{self.chunk_id}.bin"
         )
         safe_name: str = _sanitizeFilename(raw_name)
@@ -214,11 +260,7 @@ class TableChunk(BaseChunk):
     """A table chunk — carries HTML loaded from the ZIP."""
 
     type: str = "table"
-    length: int = 0
     file_path: Optional[str] = None
-    original_name: Optional[str] = None
-    table_type: Optional[str] = None
-    summary: Optional[str] = None
     html: str = Field(default="", exclude=True)
 
     def save(self, directory: Union[str, Path]) -> Path:
@@ -226,7 +268,7 @@ def save(self, directory: Union[str, Path]) -> Path:
         dir_path: Path = Path(directory)
         dir_path.mkdir(parents=True, exist_ok=True)
 
-        raw_name: str = self.original_name or os.path.basename(
+        raw_name: str = os.path.basename(
             self.file_path or f"{self.chunk_id}.html"
         )
         safe_name: str = _sanitizeFilename(raw_name)
@@ -242,12 +284,11 @@ def save(self, directory: Union[str, Path]) -> Path:
 
 
 class SlimChunk(BaseModel):
-    """Minimal chunk entry emitted in chunks_slim.json."""
+    """Minimal chunk entry emitted in chunks_slim.json (legacy)."""
 
     type: str
     path: Optional[str] = None
     content: str = ""
-    summary: Optional[str] = None
 
 
 # ---------------------------------------------------------------------------
@@ -259,48 +300,59 @@ class ParseResult:
     """Eagerly-loaded result of a document parsing job.
 
     Contains the manifest, all chunks (with image bytes and table HTML
-    already loaded), the full markdown, hierarchy data, and the raw ZIP
-    bytes for archival purposes.
+    already loaded), the full markdown, the document navigation tree,
+    and the raw ZIP bytes for archival purposes.
+
+    Legacy fields (``chunks_slim``, ``hierarchy``, ``toc_hierarchies``,
+    ``kb_csv``, ``hierarchy_view_html``) are kept for backward
+    compatibility with older result ZIPs.  The current worker does not
+    emit ``chunks_slim.json`` or ``hierarchy.json``.
     """
 
     manifest: Manifest
     chunks: List[Chunk]
-    chunks_slim: Optional[List[SlimChunk]]
     full_markdown: str
+    raw_zip: bytes
+    namespace: Optional[str]
+    document_id: Optional[str]
+    # Current worker output
+    doc_nav: Optional[DocNav]
+    # Legacy — the current worker no longer emits these files
+    chunks_slim: Optional[List[SlimChunk]]
     hierarchy: Optional[Any]
     toc_hierarchies: Optional[Any]
     kb_csv: Optional[str]
     hierarchy_view_html: Optional[str]
-    raw_zip: bytes
-    namespace: Optional[str]
-    document_id: Optional[str]
 
     def __init__(
         self,
         *,
         manifest: Manifest,
         chunks: List[Chunk],
-        chunks_slim: Optional[List[SlimChunk]],
         full_markdown: str,
-        hierarchy: Optional[Any],
-        toc_hierarchies: Optional[Any],
-        kb_csv: Optional[str],
-        hierarchy_view_html: Optional[str],
         raw_zip: bytes,
+        doc_nav: Optional[DocNav] = None,
         namespace: Optional[str] = None,
         document_id: Optional[str] = None,
+        # Legacy — the current worker no longer emits these files
+        chunks_slim: Optional[List[SlimChunk]] = None,
+        hierarchy: Optional[Any] = None,
+        toc_hierarchies: Optional[Any] = None,
+        kb_csv: Optional[str] = None,
+        hierarchy_view_html: Optional[str] = None,
     ) -> None:
         self.manifest = manifest
         self.chunks = chunks
-        self.chunks_slim = chunks_slim
         self.full_markdown = full_markdown
+        self.raw_zip = raw_zip
+        self.doc_nav = doc_nav
+        self.namespace = namespace
+        self.document_id = document_id
+        self.chunks_slim = chunks_slim
         self.hierarchy = hierarchy
         self.toc_hierarchies = toc_hierarchies
         self.kb_csv = kb_csv
         self.hierarchy_view_html = hierarchy_view_html
-        self.raw_zip = raw_zip
-        self.namespace = namespace
-        self.document_id = document_id
 
     # -- convenience properties --
 
@@ -344,11 +396,17 @@ def save(self, directory: Union[str, Path]) -> Path:
         """Save the full result to *directory*.
 
         Creates the directory if needed and writes:
+        * ``manifest.json`` — result manifest
+        * ``chunks.json`` — all chunks
+        * ``doc_nav.json`` — document navigation tree (if present)
         * ``full.md`` — the full markdown
         * ``images/`` — all image chunks
         * ``tables/`` — all table chunks
         * ``result.zip`` — the raw ZIP archive
 
+        Legacy files (``chunks_slim.json``, ``hierarchy.json``, etc.) are
+        also written when present for backward compatibility.
+
         Returns the resolved directory path.
         """
         dir_path: Path = Path(directory)
@@ -357,7 +415,7 @@ def save(self, directory: Union[str, Path]) -> Path:
         # Manifest / chunks
         manifest_path: Path = dir_path / "manifest.json"
         manifest_path.write_text(
-            self.manifest.model_dump_json(indent=2),
+            self.manifest.model_dump_json(indent=2, by_alias=True),
             encoding="utf-8",
         )
 
@@ -367,6 +425,13 @@ def save(self, directory: Union[str, Path]) -> Path:
             encoding="utf-8",
         )
 
+        if self.doc_nav is not None:
+            doc_nav_path: Path = dir_path / "doc_nav.json"
+            doc_nav_path.write_text(
+                self.doc_nav.model_dump_json(indent=2),
+                encoding="utf-8",
+            )
+
         if self.chunks_slim is not None:
             chunks_slim_path: Path = dir_path / "chunks_slim.json"
             chunks_slim_path.write_text(
diff --git a/src/knowhere/types/retrieval.py b/src/knowhere/types/retrieval.py
index 47b07a8..ebb13cf 100644
--- a/src/knowhere/types/retrieval.py
+++ b/src/knowhere/types/retrieval.py
@@ -2,9 +2,9 @@
 
 from __future__ import annotations
 
-from typing import Literal, Optional, TypedDict
+from typing import Any, Dict, List, Literal, Optional, TypedDict
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 
 RetrievalChannel = Literal["path", "content", "term"]
@@ -37,9 +37,16 @@ class RetrievalResult(BaseModel):
 
 
 class RetrievalQueryResponse(BaseModel):
-    """Response from ``POST /v1/retrieval/query``."""
+    """Response from ``POST /v1/retrieval/query``.
+
+    Agentic fields (``answer_text``, ``referenced_chunks``) are only
+    populated when ``use_agentic=True``.  In legacy retrieval mode they
+    default to ``None`` and ``[]`` respectively.
+    """
 
     namespace: str
     query: str
     router_used: Optional[str] = None
+    answer_text: Optional[str] = None
+    referenced_chunks: List[Dict[str, Any]] = Field(default_factory=list)
     results: list[RetrievalResult]
diff --git a/tests/test_models.py b/tests/test_models.py
index 92b9732..4314cfa 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -414,13 +414,17 @@ def test_defaults(self) -> None:
         chunk: BaseChunk = BaseChunk(chunk_id="chunk_2", type="text")
         assert chunk.content == ""
         assert chunk.path is None
-        assert chunk.page_nums is None
+        assert chunk.metadata.length is None
+        assert chunk.metadata.tokens is None
 
-    def test_page_nums_supported(self) -> None:
+    def test_metadata_accessible(self) -> None:
         chunk: BaseChunk = BaseChunk(
-            chunk_id="chunk_3", type="text", page_nums=[1, 2]
+            chunk_id="chunk_3",
+            type="text",
+            metadata={"tokens": ["a", "b"], "length": 10},
         )
-        assert chunk.page_nums == [1, 2]
+        assert chunk.metadata.tokens == ["a", "b"]
+        assert chunk.metadata.length == 10
 
 
 # ---------------------------------------------------------------------------
@@ -436,48 +440,21 @@ def test_from_dict(self) -> None:
             chunk_id="text_1",
             content="Some text content",
             path="doc/section1",
-            length=17,
-            page_nums=[1, 2],
-            tokens=["Some", "text", "content"],
-            keywords=["text", "content"],
-            summary="A text chunk",
-            connect_to=[{"target": "img_1", "relation": "embeds"}],
-            relationships=[{"target": "text_2", "type": "follows"}],
         )
         assert chunk.chunk_id == "text_1"
         assert chunk.type == "text"
         assert chunk.content == "Some text content"
-        assert chunk.length == 17
-        assert chunk.page_nums == [1, 2]
-        assert chunk.tokens == ["Some", "text", "content"]
-        assert chunk.keywords == ["text", "content"]
-        assert chunk.summary == "A text chunk"
-        assert chunk.connect_to is not None
-        assert len(chunk.connect_to) == 1
-        assert chunk.relationships is not None
-        assert len(chunk.relationships) == 1
 
     def test_defaults(self) -> None:
         chunk: TextChunk = TextChunk(chunk_id="text_2")
         assert chunk.type == "text"
-        assert chunk.length == 0
-        assert chunk.tokens is None
-        assert chunk.keywords is None
-        assert chunk.summary is None
-        assert chunk.connect_to is None
-        assert chunk.relationships is None
+        assert chunk.content == ""
+        assert chunk.path is None
 
     def test_is_instance_of_base_chunk(self) -> None:
         chunk: TextChunk = TextChunk(chunk_id="text_3")
         assert isinstance(chunk, BaseChunk)
 
-    def test_accepts_tokens_list(self) -> None:
-        chunk: TextChunk = TextChunk(
-            chunk_id="text_4",
-            tokens=["attention", "transformer"],
-        )
-        assert chunk.tokens == ["attention", "transformer"]
-
 
 # ---------------------------------------------------------------------------
 # ImageChunk model
@@ -492,24 +469,18 @@ def test_from_dict(self) -> None:
             chunk_id="IMG_1",
             content="A photo of a cat",
             file_path="images/IMG_1.jpg",
-            original_name="cat.jpg",
-            summary="Cat photo",
             data=b"\xff\xd8\xff\xe0",
         )
         assert chunk.chunk_id == "IMG_1"
         assert chunk.type == "image"
         assert chunk.content == "A photo of a cat"
         assert chunk.file_path == "images/IMG_1.jpg"
-        assert chunk.original_name == "cat.jpg"
         assert chunk.data == b"\xff\xd8\xff\xe0"
 
     def test_defaults(self) -> None:
         chunk: ImageChunk = ImageChunk(chunk_id="IMG_2")
         assert chunk.type == "image"
-        assert chunk.length == 0
         assert chunk.file_path is None
-        assert chunk.original_name is None
-        assert chunk.summary is None
         assert chunk.data == b""
 
     def test_format_property_from_file_path(self) -> None:
@@ -547,22 +518,16 @@ def test_from_dict(self) -> None:
             chunk_id="TBL_1",
             content="Revenue table",
             file_path="tables/TBL_1.html",
-            original_name="revenue.html",
-            table_type="financial",
-            summary="Revenue data",
             html="<table><tr><td>100</td></tr></table>",
         )
         assert chunk.chunk_id == "TBL_1"
         assert chunk.type == "table"
-        assert chunk.table_type == "financial"
         assert chunk.html == "<table><tr><td>100</td></tr></table>"
 
     def test_defaults(self) -> None:
         chunk: TableChunk = TableChunk(chunk_id="TBL_2")
         assert chunk.type == "table"
-        assert chunk.length == 0
         assert chunk.file_path is None
-        assert chunk.table_type is None
         assert chunk.html == ""
 
     def test_is_instance_of_base_chunk(self) -> None:
@@ -602,7 +567,6 @@ def _build_parse_result(
         TextChunk(
             chunk_id="text_1",
             content="Hello world",
-            length=11,
         ),
         ImageChunk(
             chunk_id="img_1",
@@ -624,7 +588,6 @@ def _build_parse_result(
                 type="text",
                 path="doc/section1",
                 content="Hello world",
-                summary="Greeting",
             )
         ],
         full_markdown="# Test\n\nHello world",
diff --git a/tests/test_result_parser.py b/tests/test_result_parser.py
index dfa276c..3ff3f24 100644
--- a/tests/test_result_parser.py
+++ b/tests/test_result_parser.py
@@ -14,6 +14,7 @@
 from knowhere._exceptions import ChecksumError, KnowhereError
 from knowhere.lib.result_parser import parseResultZip
 from knowhere.types.result import (
+    DocNav,
     ImageChunk,
     Manifest,
     ParseResult,
@@ -34,30 +35,56 @@
         "type": "text",
         "content": "Hello world",
         "path": "test/section1",
-        "length": 11,
-        "tokens": ["Hello", "world"],
-        "keywords": ["hello"],
-        "summary": "A greeting",
-        "relationships": [],
     },
     {
         "chunk_id": "IMAGE_test1",
         "type": "image",
         "content": "A test image",
         "path": "test/images",
-        "length": 12,
         "file_path": "images/IMAGE_test1.jpg",
-        "original_name": "test-image.jpg",
-        "summary": "Test image",
     },
 ]
 
-TEXT_TOKENS_LIST: List[str] = ["Ashish", "Vaswani", "attention", "transformer"]
-
 MARKDOWN: str = "# Test\n\nHello world"
 IMAGE_BYTES: bytes = b"\xff\xd8\xff\xe0"
 TABLE_HTML: str = "<table><tr><td>Optimized</td></tr></table>"
 
+DOC_NAV_JSON: Dict[str, Any] = {
+    "sections": [
+        {
+            "title": "Introduction",
+            "path": "Default_Root/test.pdf-->Introduction",
+            "level": 1,
+            "summary": "Overview of the topic",
+            "chunk_count": 3,
+            "children": [
+                {
+                    "title": "Background",
+                    "path": "Default_Root/test.pdf-->Introduction-->Background",
+                    "level": 2,
+                    "summary": "Historical context",
+                    "chunk_count": 2,
+                    "children": [],
+                }
+            ],
+        }
+    ],
+    "resources": {
+        "images": [
+            {
+                "path": "images/IMAGE_test1.jpg",
+                "summary": "Test image summary",
+            }
+        ],
+        "tables": [
+            {
+                "path": "tables/table-optimized.html",
+                "summary": "Optimized table",
+            }
+        ],
+    },
+}
+
 
 def _build_zip(
     manifest: Dict[str, Any],
@@ -160,48 +187,20 @@ def _make_optimized_chunks() -> List[Dict[str, Any]]:
             "type": "text",
             "content": "Text chunk with embedded resources.",
             "path": "Default_Root/optimized.pdf-->Section 1",
-            "metadata": {
-                "length": 35,
-                "summary": "",
-                "page_nums": [1, 2],
-                "tokens": ["Text", "chunk"],
-                "keywords": ["optimized"],
-                "connect_to": [
-                    {
-                        "target": "image_chunk_optimized",
-                        "relation": "embeds",
-                        "ref": "[images/IMAGE_test1.jpg]",
-                    }
-                ],
-            },
         },
         {
             "chunk_id": "image_chunk_optimized",
             "type": "image",
             "content": "[images/IMAGE_test1.jpg]",
             "path": "images/IMAGE_test1.jpg",
-            "metadata": {
-                "length": 1,
-                "summary": "Optimized image chunk",
-                "page_nums": [2],
-                "file_path": "images/IMAGE_test1.jpg",
-                "keywords": [],
-                "tokens": [],
-            },
+            "file_path": "images/IMAGE_test1.jpg",
         },
         {
             "chunk_id": "table_chunk_optimized",
             "type": "table",
             "content": TABLE_HTML,
             "path": "tables/table-optimized.html",
-            "metadata": {
-                "length": 1,
-                "summary": "Optimized table chunk",
-                "page_nums": [3],
-                "file_path": "tables/table-optimized.html",
-                "keywords": ["optimized"],
-                "tokens": [],
-            },
+            "file_path": "tables/table-optimized.html",
         },
     ]
 
@@ -235,20 +234,18 @@ def test_loads_text_chunks(self) -> None:
         assert text_chunks[0].chunk_id == "text_chunk_1"
         assert text_chunks[0].content == "Hello world"
 
-    def test_accepts_text_chunk_tokens_as_list(self) -> None:
+    def test_metadata_accessible_on_chunks(self) -> None:
         manifest: Dict[str, Any] = _make_manifest()
         chunks: List[Dict[str, Any]] = [
             {
-                "chunk_id": "text_chunk_tokens_list",
+                "chunk_id": "text_with_meta",
                 "type": "text",
-                "content": "Attention is all you need",
-                "path": "paper/abstract",
+                "content": "Text with metadata",
+                "path": "doc/section1",
                 "metadata": {
-                    "length": 25,
-                    "tokens": TEXT_TOKENS_LIST,
-                    "keywords": ["attention", "transformer"],
-                    "summary": "Transformer introduction",
-                    "relationships": [],
+                    "length": 42,
+                    "tokens": ["hello", "world"],
+                    "summary": "A summary",
                 },
             }
         ]
@@ -256,52 +253,10 @@ def test_accepts_text_chunk_tokens_as_list(self) -> None:
 
         result: ParseResult = parseResultZip(zip_bytes, verify_checksum=False)
 
-        assert len(result.text_chunks) == 1
-        assert result.text_chunks[0].tokens == TEXT_TOKENS_LIST
-
-    def test_rejects_legacy_text_chunk_tokens_string(self) -> None:
-        manifest: Dict[str, Any] = _make_manifest()
-        chunks: List[Dict[str, Any]] = [
-            {
-                "chunk_id": "text_chunk_tokens_string",
-                "type": "text",
-                "content": "Attention is all you need",
-                "path": "paper/abstract",
-                "metadata": {
-                    "length": 25,
-                    "tokens": "Ashish;Vaswani;attention;transformer",
-                    "keywords": ["attention", "transformer"],
-                    "summary": "Transformer introduction",
-                    "relationships": [],
-                },
-            }
-        ]
-        zip_bytes: bytes = _build_zip(manifest, chunks=chunks)
-
-        with pytest.raises(KnowhereError, match="expected list\\[str\\]"):
-            parseResultZip(zip_bytes, verify_checksum=False)
-
-    def test_rejects_integer_text_chunk_tokens(self) -> None:
-        manifest: Dict[str, Any] = _make_manifest()
-        chunks: List[Dict[str, Any]] = [
-            {
-                "chunk_id": "text_chunk_tokens_int",
-                "type": "text",
-                "content": "Attention is all you need",
-                "path": "paper/abstract",
-                "metadata": {
-                    "length": 25,
-                    "tokens": 4,
-                    "keywords": ["attention", "transformer"],
-                    "summary": "Transformer introduction",
-                    "relationships": [],
-                },
-            }
-        ]
-        zip_bytes: bytes = _build_zip(manifest, chunks=chunks)
-
-        with pytest.raises(KnowhereError, match="expected list\\[str\\]"):
-            parseResultZip(zip_bytes, verify_checksum=False)
+        chunk = result.text_chunks[0]
+        assert chunk.metadata.length == 42
+        assert chunk.metadata.tokens == ["hello", "world"]
+        assert chunk.metadata.summary == "A summary"
 
     def test_loads_image_chunks_with_data(self) -> None:
         manifest: Dict[str, Any] = _make_manifest()
@@ -381,16 +336,9 @@ def test_exposes_optimized_payload_metadata_and_sidecar_assets(self) -> None:
         assert result.manifest.processing.billing_status == "charged"
         assert result.manifest.processing.cost is not None
         assert result.manifest.processing.cost.micro_dollars == 60000
-        assert result.text_chunks[0].page_nums == [1, 2]
-        assert result.image_chunks[0].page_nums == [2]
-        assert result.table_chunks[0].page_nums == [3]
-        assert result.text_chunks[0].connect_to == [
-            {
-                "target": "image_chunk_optimized",
-                "relation": "embeds",
-                "ref": "[images/IMAGE_test1.jpg]",
-            }
-        ]
+        assert result.text_chunks[0].chunk_id == "text_chunk_optimized"
+        assert result.image_chunks[0].chunk_id == "image_chunk_optimized"
+        assert result.table_chunks[0].chunk_id == "table_chunk_optimized"
         assert result.chunks_slim is not None
         assert len(result.chunks_slim) == 1
         assert result.kb_csv == "chunk_id,type\ntext_chunk_optimized,text\n"
@@ -449,6 +397,153 @@ def test_save_preserves_optimized_sidecar_files(self, tmp_path: Path) -> None:
         assert (output_dir / "result.zip").exists()
 
 
+# ---------------------------------------------------------------------------
+# Current worker contract tests (doc_nav, HIERARCHY)
+# ---------------------------------------------------------------------------
+
+
+def _make_current_contract_manifest() -> Dict[str, Any]:
+    """Manifest matching the current worker contract with HIERARCHY."""
+    return {
+        "version": "2.0",
+        "job_id": "job_current123",
+        "data_id": None,
+        "source_file_name": "current.pdf",
+        "processing_date": "2026-05-01T00:00:00Z",
+        "HIERARCHY": {
+            "Default_Root": {
+                "current.pdf": {
+                    "sections": ["Introduction", "Methods", "Results"],
+                }
+            }
+        },
+        "statistics": {
+            "total_chunks": 2,
+            "text_chunks": 1,
+            "image_chunks": 1,
+            "table_chunks": 0,
+            "total_pages": None,
+        },
+    }
+
+
+class TestCurrentWorkerContract:
+    """Tests against the current worker output contract."""
+
+    # -- doc_nav.json --
+
+    def test_parses_doc_nav(self) -> None:
+        manifest = _make_optimized_manifest()
+        chunks = _make_optimized_chunks()
+        zip_bytes = _build_zip(
+            manifest,
+            chunks=chunks,
+            extra_entries={
+                "doc_nav.json": json.dumps(DOC_NAV_JSON).encode("utf-8"),
+                "tables/table-optimized.html": TABLE_HTML.encode("utf-8"),
+            },
+        )
+
+        result = parseResultZip(zip_bytes, verify_checksum=False)
+
+        assert result.doc_nav is not None
+        doc_nav: DocNav = result.doc_nav
+        assert len(doc_nav.sections) == 1
+        assert doc_nav.sections[0].title == "Introduction"
+        assert doc_nav.sections[0].level == 1
+        assert doc_nav.sections[0].chunk_count == 3
+        assert len(doc_nav.sections[0].children) == 1
+        assert doc_nav.sections[0].children[0].title == "Background"
+        assert doc_nav.resources is not None
+        assert len(doc_nav.resources.images) == 1
+        assert doc_nav.resources.images[0].path == "images/IMAGE_test1.jpg"
+        assert len(doc_nav.resources.tables) == 1
+        assert doc_nav.resources.tables[0].path == "tables/table-optimized.html"
+
+    def test_doc_nav_none_when_missing(self) -> None:
+        manifest = _make_optimized_manifest()
+        zip_bytes = _build_zip(manifest)
+
+        result = parseResultZip(zip_bytes, verify_checksum=False)
+
+        assert result.doc_nav is None
+
+    def test_save_writes_doc_nav(self, tmp_path: Path) -> None:
+        manifest = _make_optimized_manifest()
+        chunks = _make_optimized_chunks()
+        zip_bytes = _build_zip(
+            manifest,
+            chunks=chunks,
+            extra_entries={
+                "doc_nav.json": json.dumps(DOC_NAV_JSON).encode("utf-8"),
+                "tables/table-optimized.html": TABLE_HTML.encode("utf-8"),
+            },
+        )
+
+        result = parseResultZip(zip_bytes, verify_checksum=False)
+        output_dir = tmp_path / "with-doc-nav"
+        result.save(output_dir)
+
+        assert (output_dir / "doc_nav.json").exists()
+
+    # -- Manifest HIERARCHY --
+
+    def test_manifest_hierarchy_alias(self) -> None:
+        manifest = _make_current_contract_manifest()
+        zip_bytes = _build_zip(manifest)
+
+        result = parseResultZip(zip_bytes, verify_checksum=False)
+
+        assert result.manifest.hierarchy is not None
+        assert "Default_Root" in result.manifest.hierarchy
+
+    def test_manifest_without_hierarchy(self) -> None:
+        manifest = _make_optimized_manifest()
+        zip_bytes = _build_zip(manifest)
+
+        result = parseResultZip(zip_bytes, verify_checksum=False)
+
+        assert result.manifest.hierarchy is None
+
+    # -- Graceful handling of missing legacy files --
+
+    def test_parses_without_chunks_slim(self) -> None:
+        """Current worker doesn't emit chunks_slim.json — parse must succeed."""
+        manifest = _make_optimized_manifest()
+        chunks = _make_optimized_chunks()
+        zip_bytes = _build_zip(
+            manifest,
+            chunks=chunks,
+            extra_entries={
+                "tables/table-optimized.html": TABLE_HTML.encode("utf-8"),
+            },
+            # No chunks_slim.json in extra_entries
+        )
+
+        result = parseResultZip(zip_bytes, verify_checksum=False)
+
+        assert result.chunks_slim is None
+        assert len(result.chunks) == 3
+
+    def test_parses_without_hierarchy_json(self) -> None:
+        """Current worker doesn't emit hierarchy.json — parse must succeed."""
+        manifest = _make_optimized_manifest()
+        chunks = _make_optimized_chunks()
+        zip_bytes = _build_zip(
+            manifest,
+            chunks=chunks,
+            extra_entries={
+                "tables/table-optimized.html": TABLE_HTML.encode("utf-8"),
+            },
+            # No hierarchy.json in extra_entries
+        )
+
+        result = parseResultZip(zip_bytes, verify_checksum=False)
+
+        assert result.hierarchy is None
+        assert result.manifest is not None
+
+
 # ---------------------------------------------------------------------------
 # Checksum verification
 # ---------------------------------------------------------------------------
diff --git a/tests/test_retrieval.py b/tests/test_retrieval.py
index 4925e30..2029120 100644
--- a/tests/test_retrieval.py
+++ b/tests/test_retrieval.py
@@ -20,6 +20,35 @@ def _make_retrieval_response() -> Dict[str, Any]:
         "namespace": "support-center",
         "query": "refund policy",
         "router_used": "discovery+agent",
+        "answer_text": "Annual plans may be refunded within 30 days of purchase.",
+        "referenced_chunks": [
+            {
+                "chunk_id": "chunk_001",
+                "document_id": "doc_123",
+                "asset_url": "https://example.com/assets/chunk_001",
+            }
+        ],
+        "results": [
+            {
+                "chunk_type": "text",
+                "content": "Annual plans may be refunded within 30 days.",
+                "score": 1.0,
+                "source": {
+                    "document_id": "doc_123",
+                    "source_file_name": "refund-policy.md",
+                    "section_path": "Policies / Billing / Refunds",
+                },
+            }
+        ],
+    }
+
+
+def _make_legacy_retrieval_response() -> Dict[str, Any]:
+    """Legacy-mode response without agentic fields (backward compatibility)."""
+    return {
+        "namespace": "support-center",
+        "query": "refund policy",
+        "router_used": "discovery+legacy",
         "results": [
             {
                 "chunk_type": "text",
@@ -93,6 +122,11 @@ def test_query_sends_request_and_returns_results(self, sync_client: Any) -> None
         assert response.results[0].source.document_id == "doc_123"
         assert response.results[0].source.source_file_name == "refund-policy.md"
         assert response.results[0].source.section_path == "Policies / Billing / Refunds"
+        assert response.answer_text == (
+            "Annual plans may be refunded within 30 days of purchase."
+        )
+        assert len(response.referenced_chunks) == 1
+        assert response.referenced_chunks[0]["chunk_id"] == "chunk_001"
         assert not hasattr(response.results[0], "citation")
         assert not hasattr(response.results[0], "chunk_id")
         assert not hasattr(response.results[0], "section_id")
@@ -127,3 +161,62 @@ async def test_async_query_sends_request_and_returns_results(
         assert route.called
         assert response.router_used == "discovery+agent"
         assert response.results[0].source.document_id == "doc_123"
+
+    @respx.mock
+    def test_use_agentic_sends_parameter(self, sync_client: Any) -> None:
+        """use_agentic=True is sent to the server."""
+        route = respx.post(RETRIEVAL_QUERY_URL).mock(
+            return_value=httpx.Response(200, json=_make_retrieval_response())
+        )
+
+        sync_client.retrieval.query(query="refund policy", use_agentic=True)
+
+        request_body: Dict[str, Any] = json.loads(route.calls[0].request.read())
+        assert request_body["use_agentic"] is True
+
+    @respx.mock
+    def test_use_agentic_omitted_when_none(self, sync_client: Any) -> None:
+        """use_agentic=None omits the parameter (server default)."""
+        route = respx.post(RETRIEVAL_QUERY_URL).mock(
+            return_value=httpx.Response(200, json=_make_retrieval_response())
+        )
+
+        sync_client.retrieval.query(query="refund policy")
+
+        request_body: Dict[str, Any] = json.loads(route.calls[0].request.read())
+        assert "use_agentic" not in request_body
+
+    @respx.mock
+    def test_agentic_response_fields(self, sync_client: Any) -> None:
+        """Agentic response exposes answer_text and referenced_chunks."""
+        route = respx.post(RETRIEVAL_QUERY_URL).mock(
+            return_value=httpx.Response(200, json=_make_retrieval_response())
+        )
+
+        response = sync_client.retrieval.query(
+            query="refund policy",
+            use_agentic=True,
+        )
+
+        assert response.answer_text == (
+            "Annual plans may be refunded within 30 days of purchase."
+        )
+        assert len(response.referenced_chunks) == 1
+        assert response.referenced_chunks[0]["chunk_id"] == "chunk_001"
+        assert response.referenced_chunks[0]["asset_url"] == (
+            "https://example.com/assets/chunk_001"
+        )
+
+    @respx.mock
+    def test_legacy_response_without_agentic_fields(self, sync_client: Any) -> None:
+        """Legacy-mode response (no agentic fields) parses without error."""
+        route = respx.post(RETRIEVAL_QUERY_URL).mock(
+            return_value=httpx.Response(
+                200, json=_make_legacy_retrieval_response()
+            )
+        )
+
+        response = sync_client.retrieval.query(query="refund policy")
+
+        assert response.answer_text is None
+        assert response.referenced_chunks == []