From c454a57e50a15e6b48de8d7ad4d4dc10909d7cdc Mon Sep 17 00:00:00 2001 From: suguanyang Date: Fri, 15 May 2026 05:15:43 -0700 Subject: [PATCH] feat: sync SDK with current worker ZIP contract and agentic retrieval API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add typed ChunkMetadata interface for known worker metadata fields with index signature for forward compatibility. Issue #80 — ParseResult ZIP types Issue #81 — Agentic retrieval API --- .../sync-worker-contract-and-agentic-api.md | 12 + README.md | 11 +- src/__tests__/client.test.ts | 3 +- src/lib/__tests__/result-parser.test.ts | 229 +++++++++++++++--- src/lib/result-parser.ts | 113 +++------ src/resources/__tests__/retrieval.test.ts | 67 +++++ src/types/result.ts | 117 ++++++--- src/types/retrieval.ts | 12 + 8 files changed, 398 insertions(+), 166 deletions(-) create mode 100644 .changeset/sync-worker-contract-and-agentic-api.md diff --git a/.changeset/sync-worker-contract-and-agentic-api.md b/.changeset/sync-worker-contract-and-agentic-api.md new file mode 100644 index 0000000..9956f90 --- /dev/null +++ b/.changeset/sync-worker-contract-and-agentic-api.md @@ -0,0 +1,12 @@ +--- +'@ontos-ai/knowhere-sdk': minor +--- + +Sync SDK with current worker ZIP contract and agentic retrieval API: + +- Add `DocNav` types for `doc_nav.json` with section tree and resource summaries +- Expose `HIERARCHY` field on manifest from current worker output +- Add `documentTopSummary` to all chunk types +- Mark legacy fields (`tableType`, `chunksSlim`, `hierarchy`) as deprecated +- Add `useAgentic` parameter to retrieval query +- Add `answerText` and `referencedChunks` to retrieval response diff --git a/README.md b/README.md index 269207a..984f064 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,11 @@ console.log(`Found ${result.textChunks.length} text chunks`); console.log(`Found ${result.imageChunks.length} images`); console.log(`Found ${result.tableChunks.length} tables`); -// Work with chunks +// Work with chunks — worker metadata is in chunk.metadata result.textChunks.forEach((chunk) => { console.log(chunk.content); - console.log(chunk.keywords); - console.log(chunk.summary); + console.log(chunk.metadata.keywords); + console.log(chunk.metadata.summary); }); // Save results to disk @@ -188,12 +188,17 @@ if (!documentId) { console.log(documentId); +// Agentic mode (LLM navigation + answer synthesis) const response = await client.retrieval.query({ namespace: 'support-center', query: 'How do I reset Bluetooth pairing?', topK: 5, + useAgentic: true, }); +console.log(response.answerText); // LLM-generated answer +console.log(response.referencedChunks); // cited evidence chunks + for (const result of response.results) { console.log(result.content); console.log(result.score); diff --git a/src/__tests__/client.test.ts b/src/__tests__/client.test.ts index 5e6158e..5fc5282 100644 --- a/src/__tests__/client.test.ts +++ b/src/__tests__/client.test.ts @@ -133,8 +133,7 @@ describe('Knowhere Client', () => { type: 'text', content: 'Sample content', path: 'page-1', - length: 100, - tokens: ['Sample', 'content'], + metadata: {}, }; const mockParseResult: ParseResult = { diff --git a/src/lib/__tests__/result-parser.test.ts b/src/lib/__tests__/result-parser.test.ts index b79e48c..0980407 100644 --- a/src/lib/__tests__/result-parser.test.ts +++ b/src/lib/__tests__/result-parser.test.ts @@ -383,7 +383,7 @@ describe('Result Parser', () => { expect(result.chunks[0].chunkId).toBe('chunk-001'); }); - it('should extract chunk metadata fields from nested metadata', async () => { + it('should extract chunk file paths from nested metadata', async () => { const mockZipBuffer = await createMockResultZip({ includeImages: true, includeTables: true, @@ -394,34 +394,8 @@ describe('Result Parser', () => { const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); - expect(result.textChunks[0].summary).toBe('Sample text chunk'); - expect(result.textChunks[0].tokens).toEqual(['token-a', 'token-b']); - expect(result.textChunks[0].relationships).toEqual(['chunk-002']); expect(result.imageChunks[0].filePath).toBe('images/image-001.jpg'); - expect(result.imageChunks[0].summary).toBe('Test image'); expect(result.tableChunks[0].filePath).toBe('tables/table-001.html'); - expect(result.tableChunks[0].tableType).toBe('data'); - expect(result.tableChunks[0].summary).toBe('Test table'); - }); - - it('should parse text chunk tokens as string arrays from the current payload shape', async () => { - const mockZipBuffer = await createMockResultZip(); - mockHttpClient.download.mockResolvedValue(mockZipBuffer); - - const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); - - expect(result.textChunks[0].tokens).toEqual(['token-a', 'token-b']); - }); - - it('should drop legacy numeric text chunk tokens instead of leaking the wrong runtime type', async () => { - const mockZipBuffer = await createMockResultZip({ - useLegacyNumericTokens: true, - }); - mockHttpClient.download.mockResolvedValue(mockZipBuffer); - - const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); - - expect(result.textChunks[0].tokens).toBeUndefined(); }); it('should extract image chunks with data', async () => { @@ -635,17 +609,10 @@ describe('Result Parser', () => { expect(result.manifest.version).toBe('2.0'); expect(result.manifest.processing?.pageCount).toBe(12); expect(result.manifest.processing?.timing?.startedAt).toBeInstanceOf(Date); - expect(result.textChunks[0].pageNums).toEqual([1, 2]); - expect(result.imageChunks[0].pageNums).toEqual([2]); - expect(result.tableChunks[0].pageNums).toEqual([3]); - expect(result.chunksSlim).toEqual([ - { - type: 'text', - path: 'Default_Root/optimized.pdf-->Section 1', - content: 'Text chunk with embedded resources.', - summary: '', - }, - ]); + expect(result.chunksSlim).toBeDefined(); + expect(result.chunksSlim!.length).toBe(1); + expect(result.chunksSlim![0].type).toBe('text'); + expect(result.chunksSlim![0].content).toBe('Text chunk with embedded resources.'); expect(result.kbCsv).toContain('chunk_id,type'); expect(result.tocHierarchies).toEqual([{ tocRange: [1, 3], scanRange: [1, 10] }]); expect(result.hierarchyViewHtml).toContain('Optimized hierarchy view'); @@ -876,6 +843,192 @@ describe('Result Parser', () => { }); }); + describe('Current worker contract (doc_nav, HIERARCHY)', () => { + async function createCurrentContractZip(): Promise { + const zip = new JSZip(); + + zip.file( + 'manifest.json', + JSON.stringify({ + version: '2.0', + job_id: 'job-current-123', + source_file_name: 'current.pdf', + processing_date: '2026-05-01T00:00:00Z', + HIERARCHY: { + Default_Root: { + 'current.pdf': { + sections: ['Introduction', 'Methods'], + }, + }, + }, + statistics: { + total_chunks: 2, + text_chunks: 1, + image_chunks: 1, + table_chunks: 0, + total_pages: null, + }, + }), + ); + + zip.file( + 'chunks.json', + JSON.stringify({ + chunks: [ + { + chunk_id: 'text-with-dts', + type: 'text', + content: 'Section overview.', + path: 'Default_Root/current.pdf-->Introduction', + metadata: { + length: 15, + summary: 'Intro text', + page_nums: [1], + tokens: ['overview'], + keywords: [], + }, + }, + { + chunk_id: 'image-with-dts', + type: 'image', + content: '[images/diagram.png]', + path: 'images/diagram.png', + metadata: { + length: 1, + summary: 'Architecture diagram', + page_nums: [2], + file_path: 'images/diagram.png', + }, + }, + ], + }), + ); + + zip.file( + 'doc_nav.json', + JSON.stringify({ + sections: [ + { + title: 'Introduction', + path: 'Default_Root/current.pdf-->Introduction', + level: 1, + summary: 'Overview of the topic', + chunk_count: 2, + children: [ + { + title: 'Background', + path: 'Default_Root/current.pdf-->Introduction-->Background', + level: 2, + summary: 'Historical context', + chunk_count: 1, + children: [], + }, + ], + }, + ], + resources: { + images: [{ path: 'images/diagram.png', summary: 'Architecture overview' }], + tables: [], + }, + }), + ); + + zip.file('images/diagram.png', Buffer.from('fake-png-data')); + zip.file('full.md', '# Current Result\n\nBody'); + + return await zip.generateAsync({ type: 'nodebuffer' }); + } + + it('should parse doc_nav.json', async () => { + const zipBuffer = await createCurrentContractZip(); + mockHttpClient.download.mockResolvedValue(zipBuffer); + + const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); + + expect(result.docNav).toBeDefined(); + expect(result.docNav?.sections).toHaveLength(1); + expect(result.docNav?.sections[0].title).toBe('Introduction'); + expect(result.docNav?.sections[0].level).toBe(1); + expect(result.docNav?.sections[0].chunkCount).toBe(2); + expect(result.docNav?.sections[0].children).toHaveLength(1); + expect(result.docNav?.sections[0].children[0].title).toBe('Background'); + expect(result.docNav?.resources?.images).toHaveLength(1); + expect(result.docNav?.resources?.images[0].path).toBe('images/diagram.png'); + }); + + it('should leave docNav undefined when doc_nav.json is missing', async () => { + const mockZipBuffer = await createMockResultZip(); + mockHttpClient.download.mockResolvedValue(mockZipBuffer); + + const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); + + expect(result.docNav).toBeUndefined(); + }); + + it('should write doc_nav.json in save()', async () => { + const zipBuffer = await createCurrentContractZip(); + mockHttpClient.download.mockResolvedValue(zipBuffer); + const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); + + await result.save(testOutputDir); + + const docNavExists = await fs + .access(join(testOutputDir, 'doc_nav.json')) + .then(() => true) + .catch(() => false); + expect(docNavExists).toBe(true); + }); + + it('should expose manifest HIERARCHY field', async () => { + const zipBuffer = await createCurrentContractZip(); + mockHttpClient.download.mockResolvedValue(zipBuffer); + + const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); + + expect(result.manifest.HIERARCHY).toBeDefined(); + expect(result.manifest.HIERARCHY?.Default_Root).toBeDefined(); + }); + + it('should parse successfully without chunks_slim.json', async () => { + const mockZipBuffer = await createMockResultZip({ + includeImages: true, + includeTables: true, + useMetadata: true, + wrapChunks: true, + }); + mockHttpClient.download.mockResolvedValue(mockZipBuffer); + + const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); + + expect(result.chunksSlim).toBeUndefined(); + expect(result.chunks.length).toBeGreaterThan(0); + }); + + it('should expose raw metadata on chunks', async () => { + const mockZipBuffer = await createMockResultZip({ + useMetadata: true, + wrapChunks: true, + }); + mockHttpClient.download.mockResolvedValue(mockZipBuffer); + + const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); + + expect(result.textChunks[0].metadata).toBeDefined(); + expect(result.textChunks[0].metadata.length).toBe(250); + expect(result.textChunks[0].metadata.tokens).toEqual(['token-a', 'token-b']); + }); + + it('should parse successfully without hierarchy.json', async () => { + const mockZipBuffer = await createMockResultZip(); + mockHttpClient.download.mockResolvedValue(mockZipBuffer); + + const result = await parseResult(mockHttpClient, 'https://s3.example.com/result.zip'); + + expect(result.hierarchy).toBeUndefined(); + expect(result.manifest).toBeDefined(); + }); + }); + describe('verifyChecksum', () => { it('should verify valid checksum', () => { const data = Buffer.from('test data'); diff --git a/src/lib/result-parser.ts b/src/lib/result-parser.ts index 9257f8e..300d402 100644 --- a/src/lib/result-parser.ts +++ b/src/lib/result-parser.ts @@ -11,44 +11,20 @@ import type { ImageChunk, TableChunk, Statistics, - ConnectTo, SlimChunk, + DocNav, } from '../types/result.js'; import type { LoadOptions } from '../types/params.js'; import { ChecksumError, KnowhereError } from '../errors/index.js'; import { sanitizePath, getFileExtension, parseDates, keysToCamel } from './utils.js'; -type ChunkMetadata = { - length?: number; - pageNums?: unknown; - tokens?: unknown; - keywords?: string[]; - summary?: string; - /** schema v2.1: primary relationship field */ - connectTo?: ConnectTo[]; - /** @deprecated legacy field, no longer emitted by API */ - relationships?: string[]; - filePath?: string; - tableType?: string; -}; - type RawChunk = { chunkId?: string; type?: string; content?: string; path?: string; - length?: number; - pageNums?: unknown; - tokens?: unknown; - keywords?: string[]; - summary?: string; - /** schema v2.1: primary relationship field (camelCased from connect_to) */ - connectTo?: ConnectTo[]; - /** @deprecated legacy field */ - relationships?: string[]; filePath?: string; - tableType?: string; - metadata?: ChunkMetadata; + metadata?: Record; }; type ChunkPayload = RawChunk[] | { chunks?: RawChunk[] }; @@ -111,6 +87,15 @@ export async function parseResult( fullMarkdown = await fullMdFile.async('string'); } + // DocNav (current worker output) + let docNav: DocNav | undefined; + const docNavFile = zip.file('doc_nav.json'); + if (docNavFile) { + const docNavContent = await docNavFile.async('string'); + const rawDocNav: unknown = JSON.parse(docNavContent); + docNav = keysToCamel(rawDocNav); + } + let hierarchy: unknown; const hierarchyFile = zip.file('hierarchy.json'); if (hierarchyFile) { @@ -150,13 +135,15 @@ export async function parseResult( const result: ParseResult = { manifest, chunks, - chunksSlim, + docNav, fullMarkdown, + rawZip: zipBuffer, + // Legacy + chunksSlim, hierarchy, tocHierarchies, kbCsv, hierarchyViewHtml, - rawZip: zipBuffer, get textChunks(): TextChunk[] { return chunks.filter((c): c is TextChunk => c.type === 'text'); @@ -189,6 +176,11 @@ export async function parseResult( // Save manifest await fs.writeFile(join(directory, 'manifest.json'), JSON.stringify(manifest, null, 2)); + // Save doc_nav + if (docNav) { + await fs.writeFile(join(directory, 'doc_nav.json'), JSON.stringify(docNav, null, 2)); + } + // Save chunks await fs.writeFile(join(directory, 'chunks.json'), JSON.stringify(chunks, null, 2)); @@ -268,70 +260,27 @@ function extractSlimChunks(payload: SlimChunkPayload): SlimChunk[] { return []; } -function getChunkMetadata(chunkData: RawChunk): ChunkMetadata { - if (!chunkData.metadata) { - return {}; - } - - return chunkData.metadata; -} - function getChunkFilePath(chunkData: RawChunk): string | undefined { - const metadata = getChunkMetadata(chunkData); - return chunkData.filePath ?? metadata.filePath ?? chunkData.path; + const metadata = chunkData.metadata; + return chunkData.filePath ?? (metadata?.filePath as string | undefined) ?? chunkData.path; } -function normalizePageNums(pageNums: unknown): number[] | undefined { - if (!Array.isArray(pageNums)) { - return undefined; - } - - const normalized = pageNums.filter((pageNum): pageNum is number => typeof pageNum === 'number'); - return normalized.length > 0 ? normalized : undefined; -} - -function normalizeTokens(tokens: unknown): string[] | undefined { - if (!Array.isArray(tokens)) { - return undefined; - } - - if (!tokens.every((token) => typeof token === 'string')) { - return undefined; - } - - return tokens; -} - -function normalizeTextChunk(chunkData: RawChunk): TextChunk { - const metadata = getChunkMetadata(chunkData); - - // schema v2.1: prefer connect_to (camelCased to connectTo after keysToCamel) - // Fall back to legacy relationships for backward compatibility - const connectTo = metadata.connectTo ?? chunkData.connectTo; - const relationships = metadata.relationships ?? chunkData.relationships; - +function buildTextChunk(chunkData: RawChunk): TextChunk { return { chunkId: chunkData.chunkId ?? '', type: 'text', content: chunkData.content ?? '', path: chunkData.path ?? '', - pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums), - length: metadata.length ?? chunkData.length ?? 0, - tokens: normalizeTokens(metadata.tokens ?? chunkData.tokens), - keywords: metadata.keywords ?? chunkData.keywords, - summary: metadata.summary ?? chunkData.summary, - ...(connectTo !== undefined && { connectTo }), - ...(relationships !== undefined && { relationships }), + metadata: chunkData.metadata ?? {}, }; } async function processChunk(zip: JSZip, chunkData: RawChunk): Promise { if (chunkData.type === 'text') { - return normalizeTextChunk(chunkData); + return buildTextChunk(chunkData); } if (chunkData.type === 'image') { - const metadata = getChunkMetadata(chunkData); const filePath = getChunkFilePath(chunkData); if (!filePath) { @@ -353,11 +302,9 @@ async function processChunk(zip: JSZip, chunkData: RawChunk): Promise { type: 'image', content: chunkData.content ?? '', path: chunkData.path ?? '', - pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums), - length: metadata.length ?? chunkData.length ?? 0, filePath, - summary: metadata.summary ?? chunkData.summary, data: imageBuffer, + metadata: chunkData.metadata ?? {}, get format(): string { return getFileExtension(this.filePath); @@ -376,7 +323,6 @@ async function processChunk(zip: JSZip, chunkData: RawChunk): Promise { } if (chunkData.type === 'table') { - const metadata = getChunkMetadata(chunkData); const filePath = getChunkFilePath(chunkData); if (!filePath) { @@ -398,12 +344,9 @@ async function processChunk(zip: JSZip, chunkData: RawChunk): Promise { type: 'table', content: chunkData.content ?? '', path: chunkData.path ?? '', - pageNums: normalizePageNums(metadata.pageNums ?? chunkData.pageNums), - length: metadata.length ?? chunkData.length ?? 0, filePath, - tableType: metadata.tableType ?? chunkData.tableType, - summary: metadata.summary ?? chunkData.summary, html, + metadata: chunkData.metadata ?? {}, async save(directory: string): Promise { const outputPath = join(directory, sanitizePath(this.filePath)); @@ -417,7 +360,7 @@ async function processChunk(zip: JSZip, chunkData: RawChunk): Promise { return enrichedChunk; } - return normalizeTextChunk(chunkData); + return buildTextChunk(chunkData); } /** diff --git a/src/resources/__tests__/retrieval.test.ts b/src/resources/__tests__/retrieval.test.ts index 91dc414..70ff825 100644 --- a/src/resources/__tests__/retrieval.test.ts +++ b/src/resources/__tests__/retrieval.test.ts @@ -104,4 +104,71 @@ describe('Retrieval Resource', () => { query: 'refund policy', }); }); + + it('should send useAgentic parameter', async () => { + mockHttpClient.post.mockResolvedValue({ + namespace: 'default', + query: 'test', + routerUsed: 'workflow_single_step', + answerText: 'Generated answer', + referencedChunks: [{ chunkId: 'chunk-1', assetUrl: 'https://example.com' }], + results: [], + }); + + await retrieval.query({ query: 'test', useAgentic: true }); + + expect(mockHttpClient.post).toHaveBeenCalledWith('/v1/retrieval/query', { + query: 'test', + useAgentic: true, + }); + }); + + it('should handle agentic response fields', async () => { + mockHttpClient.post.mockResolvedValue({ + namespace: 'default', + query: 'test', + routerUsed: 'workflow_single_step', + answerText: 'LLM-generated answer', + referencedChunks: [ + { chunkId: 'chunk-1', documentId: 'doc-1', assetUrl: 'https://example.com/1' }, + ], + results: [], + }); + + const response = await retrieval.query({ query: 'test', useAgentic: true }); + + expect(response.answerText).toBe('LLM-generated answer'); + expect(response.referencedChunks).toHaveLength(1); + expect(response.referencedChunks?.[0]?.chunkId).toBe('chunk-1'); + }); + + it('should handle legacy response without agentic fields', async () => { + mockHttpClient.post.mockResolvedValue({ + namespace: 'default', + query: 'refund policy', + results: [], + }); + + const response = await retrieval.query({ query: 'refund policy' }); + + expect(response.answerText).toBeUndefined(); + expect(response.referencedChunks).toBeUndefined(); + expect(response.results).toEqual([]); + }); + + it('should handle null answerText', async () => { + mockHttpClient.post.mockResolvedValue({ + namespace: 'default', + query: 'test', + routerUsed: 'small_kb_all', + answerText: null, + referencedChunks: [], + results: [], + }); + + const response = await retrieval.query({ query: 'test', useAgentic: true }); + + expect(response.answerText).toBeNull(); + expect(response.referencedChunks).toEqual([]); + }); }); diff --git a/src/types/result.ts b/src/types/result.ts index 375a282..c645db5 100644 --- a/src/types/result.ts +++ b/src/types/result.ts @@ -60,7 +60,6 @@ export interface Manifest { dataId?: string; /** Original source file name */ sourceFileName: string; - /** Processing completion date */ /** Processing completion date (optional: only present if emitted by the worker) */ processingDate?: Date; /** Worker-side processing metadata emitted by manifest v2 */ @@ -69,6 +68,13 @@ export interface Manifest { statistics: Statistics; /** Legacy file index from earlier ZIP manifests */ files?: FileIndex; + /** + * Document hierarchy emitted by the current worker. + * + * The key remains all-caps at runtime because ``keysToCamel()`` only + * transforms snake_case keys. + */ + HIERARCHY?: Record; } /** @@ -87,6 +93,63 @@ export interface ConnectTo { keywords?: string[]; } +/** + * A single image or table resource entry in ``doc_nav.json``. + */ +export interface DocNavResourceItem { + path: string; + summary?: string; +} + +/** + * Image and table resource summaries from ``doc_nav.json``. + */ +export interface DocNavResources { + images: DocNavResourceItem[]; + tables: DocNavResourceItem[]; +} + +/** + * A document section in the ``doc_nav.json`` navigation tree. + */ +export interface DocNavSection { + title: string; + path: string; + level: number; + summary?: string; + chunkCount: number; + children: DocNavSection[]; +} + +/** + * Top-level document navigation structure from ``doc_nav.json``. + */ +export interface DocNav { + sections: DocNavSection[]; + resources?: DocNavResources; +} + +/** + * Known worker metadata fields for a chunk. + * + * All fields are optional. Unknown fields added by future worker + * versions are accessible through the index signature. + */ +export interface ChunkMetadata { + length?: number; + pageNums?: number[]; + tokens?: string[]; + keywords?: string[]; + summary?: string; + connectTo?: ConnectTo[]; + filePath?: string; + originalName?: string; + tableType?: string; + documentTopSummary?: string; + /** Allow forward-compatible access to unknown fields. */ + [key: string]: unknown; +} + /** * Base chunk properties */ @@ -99,18 +162,17 @@ export interface BaseChunk { content: string; /** Relative path in ZIP */ path: string; - /** Page numbers spanned by this chunk when provided by the backend */ - pageNums?: number[]; + /** Worker metadata for this chunk */ + metadata: ChunkMetadata; } /** - * Minimal chunk representation emitted in chunks_slim.json + * Minimal chunk representation emitted in chunks_slim.json (legacy). */ export interface SlimChunk { type: 'text' | 'image' | 'table'; path: string; content: string; - summary?: string; } /** @@ -118,21 +180,6 @@ export interface SlimChunk { */ export interface TextChunk extends BaseChunk { type: 'text'; - /** Content length */ - length: number; - /** Extracted tokens from the current backend payload */ - tokens?: string[]; - /** Extracted keywords */ - keywords?: string[]; - /** Generated summary */ - summary?: string; - /** Chunk relationships (schema v2.1: metadata.connect_to) */ - connectTo?: ConnectTo[]; - /** - * @deprecated Use connectTo instead. Retained for backward compatibility. - * Previously populated from metadata.relationships which is no longer emitted by the API. - */ - relationships?: string[]; } /** @@ -140,12 +187,8 @@ export interface TextChunk extends BaseChunk { */ export interface ImageChunk extends BaseChunk { type: 'image'; - /** Content length */ - length: number; /** Relative file path in ZIP */ filePath: string; - /** Generated summary */ - summary?: string; /** Image data buffer */ data: Buffer; /** Image format (derived from file extension) */ @@ -159,14 +202,8 @@ export interface ImageChunk extends BaseChunk { */ export interface TableChunk extends BaseChunk { type: 'table'; - /** Content length */ - length: number; /** Relative file path in ZIP */ filePath: string; - /** Table type */ - tableType?: string; - /** Generated summary */ - summary?: string; /** HTML representation */ html: string; /** Save table HTML to disk */ @@ -186,20 +223,24 @@ export interface ParseResult { manifest: Manifest; /** All chunks */ chunks: Chunk[]; - /** Minimal chunk projection from chunks_slim.json (if available) */ - chunksSlim?: SlimChunk[]; + /** Document navigation tree from doc_nav.json (current worker output) */ + docNav?: DocNav; /** Full document as Markdown (if available) */ fullMarkdown?: string; - /** Document hierarchy (if available) */ + /** Raw ZIP buffer */ + rawZip: Buffer; + + // Legacy — the current worker no longer emits these files + /** @deprecated Current worker no longer emits chunks_slim.json */ + chunksSlim?: SlimChunk[]; + /** @deprecated Current worker no longer emits hierarchy.json */ hierarchy?: unknown; - /** Table-of-contents hierarchy hints (if available) */ + /** @deprecated Table-of-contents hierarchy hints (if available) */ tocHierarchies?: unknown; - /** Knowledge-base CSV export (if available) */ + /** @deprecated Knowledge-base CSV export (if available) */ kbCsv?: string; - /** Pre-rendered hierarchy HTML view (if available) */ + /** @deprecated Pre-rendered hierarchy HTML view (if available) */ hierarchyViewHtml?: string; - /** Raw ZIP buffer */ - rawZip: Buffer; /** Text chunks only */ readonly textChunks: TextChunk[]; diff --git a/src/types/retrieval.ts b/src/types/retrieval.ts index 92a4485..e804e3a 100644 --- a/src/types/retrieval.ts +++ b/src/types/retrieval.ts @@ -28,6 +28,14 @@ export interface RetrievalQueryParams { namespace?: string; /** Maximum number of results to return */ topK?: number; + /** + * Force retrieval mode. + * + * - ``true`` — agentic (LLM navigation + answer synthesis) + * - ``false`` — legacy 3-channel RRF only + * - ``undefined`` / omitted — server default + */ + useAgentic?: boolean; /** Chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table */ dataType?: 1 | 2 | 3 | 4 | 5 | 6; /** Path keywords for include/exclude filtering */ @@ -88,6 +96,10 @@ export interface RetrievalQueryResponse { query: string; /** Retrieval router path used by the API for this query */ routerUsed?: string; + /** LLM-generated natural-language answer (agentic mode only) */ + answerText?: string | null; + /** Cited evidence chunks with asset URLs (agentic mode only) */ + referencedChunks?: Array> | null; /** Ranked retrieval results */ results: RetrievalResult[]; }