diff --git a/graphile/graphile-search/src/__tests__/unified-search.test.ts b/graphile/graphile-search/src/__tests__/unified-search.test.ts index 86973af45..8f491e5b0 100644 --- a/graphile/graphile-search/src/__tests__/unified-search.test.ts +++ b/graphile/graphile-search/src/__tests__/unified-search.test.ts @@ -594,7 +594,7 @@ describe('graphile-search (unified search plugin)', () => { const allResult = await query(` query { allDocuments( - where: { unifiedSearch: "machine learning" } + where: { unifiedSearch: { text: "machine learning" } } orderBy: BODY_BM25_SCORE_ASC ) { nodes { rowId title bodyBm25Score } @@ -609,7 +609,7 @@ describe('graphile-search (unified search plugin)', () => { const limitResult = await query(` query { allDocuments( - where: { unifiedSearch: "machine learning" } + where: { unifiedSearch: { text: "machine learning" } } orderBy: BODY_BM25_SCORE_ASC first: 1 ) { @@ -732,38 +732,26 @@ describe('graphile-search (unified search plugin)', () => { } }); - it('mega query v2: unifiedSearch + searchScore with composite ordering', async () => { - // Mega Query v2 — New-style: uses the unified `unifiedSearch` composite - // filter that fans out to all text-compatible algorithms (tsvector, BM25, trgm) - // with a single string, plus a manual pgvector filter for semantic search. - // Orders by composite searchScore (highest overall relevance first). + it('mega query v2: unifiedSearch { text } + separate vectorEmbedding filter', async () => { + // Mega Query v2 — uses the unified `unifiedSearch` input type with text field + // for text-compatible algorithms, plus a separate pgvector filter. const result = await query(` query MegaQueryV2_UnifiedSearch { allDocuments( where: { - # unifiedSearch: single string fans out to tsvector + BM25 + trgm - # automatically — no need to specify each algorithm separately - unifiedSearch: "machine learning" - - # pgvector still needs its own filter (vectors aren't text) + unifiedSearch: { text: "machine learning" } vectorEmbedding: { vector: [1, 0, 0], metric: COSINE } } - # Order by composite searchScore (higher = more relevant across all algorithms), - # then by vector distance as tiebreaker (lower = semantically closer) orderBy: [SEARCH_SCORE_DESC, EMBEDDING_VECTOR_DISTANCE_ASC] ) { nodes { rowId title body - - # Per-adapter scores — populated by unifiedSearch for text algorithms tsvRank bodyBm25Score titleTrgmSimilarity embeddingVectorDistance - - # Composite normalized score — the single number that blends everything searchScore } } @@ -776,16 +764,70 @@ describe('graphile-search (unified search plugin)', () => { expect(nodes!.length).toBeGreaterThan(0); for (const node of nodes!) { - // searchScore should be populated (composite of active algorithms) expect(typeof node.searchScore).toBe('number'); expect(node.searchScore).toBeGreaterThanOrEqual(0); expect(node.searchScore).toBeLessThanOrEqual(1); - // Vector distance should be populated (manual filter) expect(typeof node.embeddingVectorDistance).toBe('number'); expect(node.embeddingVectorDistance).toBeGreaterThanOrEqual(0); } }); + + it('mega query v3: true hybrid via unifiedSearch { text, vector }', async () => { + // Mega Query v3 — true hybrid: text + vector in a SINGLE unifiedSearch input. + // WHERE clauses are OR-combined (match text OR vector). + // searchScore blends all adapters into a single 0..1 relevance number. + const result = await query(` + query MegaQueryV3_HybridUnified { + allDocuments( + where: { + unifiedSearch: { + text: "machine learning" + vector: [1, 0, 0] + metric: COSINE + } + } + orderBy: SEARCH_SCORE_DESC + ) { + nodes { + rowId + title + tsvRank + bodyBm25Score + titleTrgmSimilarity + embeddingVectorDistance + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes; + expect(nodes).toBeDefined(); + expect(nodes!.length).toBeGreaterThan(0); + + for (const node of nodes!) { + // searchScore blends ALL active signals (text + vector) + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + + // At least one node should have vector distance populated (from the vector path) + const hasVectorScore = nodes!.some( + (n) => n.embeddingVectorDistance != null && n.embeddingVectorDistance >= 0 + ); + expect(hasVectorScore).toBe(true); + + // At least one node should have a text score populated (from the text path) + const hasTextScore = nodes!.some( + (n) => (n.tsvRank != null && n.tsvRank > 0) || + (n.bodyBm25Score != null) || + (n.titleTrgmSimilarity != null && n.titleTrgmSimilarity > 0) + ); + expect(hasTextScore).toBe(true); + }); }); // ─── unifiedSearch composite filter ──────────────────────────────────── @@ -795,7 +837,7 @@ describe('graphile-search (unified search plugin)', () => { const result = await query(` query { allDocuments(where: { - unifiedSearch: "learning" + unifiedSearch: { text: "learning" } }) { nodes { title @@ -813,7 +855,7 @@ describe('graphile-search (unified search plugin)', () => { const result = await query(` query { allDocuments(where: { - unifiedSearch: "machine learning" + unifiedSearch: { text: "machine learning" } }) { nodes { title @@ -843,7 +885,7 @@ describe('graphile-search (unified search plugin)', () => { const result = await query(` query { allDocuments(where: { - unifiedSearch: "learning" + unifiedSearch: { text: "learning" } tsvTsv: "machine" }) { nodes { @@ -864,7 +906,7 @@ describe('graphile-search (unified search plugin)', () => { const result = await query(` query { allDocuments(where: { - unifiedSearch: "xyzzy_nonexistent_term_12345" + unifiedSearch: { text: "xyzzy_nonexistent_term_12345" } }) { nodes { title @@ -877,6 +919,104 @@ describe('graphile-search (unified search plugin)', () => { const nodes = result.data?.allDocuments?.nodes ?? []; expect(nodes.length).toBe(0); }); + + it('vector-only via unifiedSearch: { vector } (no text)', async () => { + const result = await query(` + query { + allDocuments(where: { + unifiedSearch: { + vector: [1, 0, 0] + metric: COSINE + distance: 1.5 + } + }) { + nodes { + rowId + title + embeddingVectorDistance + searchScore + } + } + } + `); + + expect(result.errors).toBeUndefined(); + const nodes = result.data?.allDocuments?.nodes ?? []; + expect(nodes.length).toBeGreaterThan(0); + + for (const node of nodes) { + // Vector distance should be populated + expect(typeof node.embeddingVectorDistance).toBe('number'); + expect(node.embeddingVectorDistance).toBeGreaterThanOrEqual(0); + expect(node.embeddingVectorDistance).toBeLessThanOrEqual(1.5); + + // searchScore should blend the vector signal + expect(typeof node.searchScore).toBe('number'); + expect(node.searchScore).toBeGreaterThanOrEqual(0); + expect(node.searchScore).toBeLessThanOrEqual(1); + } + }); + + it('hybrid text + vector: returns broader results than either alone', async () => { + // Text-only results + const textResult = await query(` + query { + allDocuments(where: { + unifiedSearch: { text: "quantum" } + }) { + nodes { rowId } + } + } + `); + + // Vector-only results (close to [0, 0, 1]) + const vectorResult = await query(` + query { + allDocuments(where: { + unifiedSearch: { vector: [0, 0, 1], metric: COSINE, distance: 0.5 } + }) { + nodes { rowId } + } + } + `); + + // Hybrid results (text OR vector) + const hybridResult = await query(` + query { + allDocuments(where: { + unifiedSearch: { + text: "quantum" + vector: [0, 0, 1] + metric: COSINE + distance: 0.5 + } + }) { + nodes { rowId } + } + } + `); + + expect(textResult.errors).toBeUndefined(); + expect(vectorResult.errors).toBeUndefined(); + expect(hybridResult.errors).toBeUndefined(); + + const textIds = new Set((textResult.data?.allDocuments?.nodes ?? []).map((n) => n.rowId)); + const vectorIds = new Set((vectorResult.data?.allDocuments?.nodes ?? []).map((n) => n.rowId)); + const hybridIds = new Set((hybridResult.data?.allDocuments?.nodes ?? []).map((n) => n.rowId)); + + // Hybrid results should be a superset of both text and vector results + // (since WHERE is OR-combined: match text OR match vector) + for (const id of textIds) { + expect(hybridIds.has(id)).toBe(true); + } + for (const id of vectorIds) { + expect(hybridIds.has(id)).toBe(true); + } + + // Hybrid should have at least as many results as either alone + expect(hybridIds.size).toBeGreaterThanOrEqual(textIds.size); + expect(hybridIds.size).toBeGreaterThanOrEqual(vectorIds.size); + }); }); // ─── Pagination ───────────────────────────────────────────────────────── diff --git a/graphile/graphile-search/src/plugin.ts b/graphile/graphile-search/src/plugin.ts index a35f6a232..4d7d371dd 100644 --- a/graphile/graphile-search/src/plugin.ts +++ b/graphile/graphile-search/src/plugin.ts @@ -344,6 +344,63 @@ export function createUnifiedSearchPlugin( adapter.registerTypes(build); } + // Register UnifiedSearchInput — accepts text, optional vector, metric, and distance. + // When both text and vector are provided, all adapters participate (true hybrid search). + if (enableUnifiedSearch) { + const hasVectorAdapter = adapters.some((a) => a.name === 'vector'); + const { + graphql: { GraphQLString: GQLString, GraphQLFloat: GQLFloat, GraphQLList, GraphQLNonNull, GraphQLBoolean }, + } = build; + + build.registerInputObjectType( + 'UnifiedSearchInput', + {}, + () => { + const fields: Record = { + text: { + type: GQLString, + description: + 'Text query dispatched to all text-compatible adapters ' + + '(tsvector, BM25, pg_trgm). At least one of text or vector must be provided.', + }, + }; + + if (hasVectorAdapter) { + const VectorMetricEnum = build.getTypeByName('VectorMetric') as any; + fields.vector = { + type: new GraphQLList(new GraphQLNonNull(GQLFloat)), + description: + 'Query vector for semantic similarity search via pgvector. ' + + 'When provided alongside text, both text and vector results are OR-combined ' + + 'for true hybrid retrieval. When a Graphile LLM plugin is loaded, ' + + 'this field may be auto-populated from the text input.', + }; + fields.metric = { + type: VectorMetricEnum, + description: 'Similarity metric for vector search (default: COSINE).', + }; + fields.distance = { + type: GQLFloat, + description: 'Maximum vector distance threshold. Only rows within this distance are included.', + }; + fields.includeChunks = { + type: GraphQLBoolean, + description: + 'When true (default), vector search includes chunks for tables with @hasChunks.', + }; + } + + return { + description: + 'Unified search input. Provide text for keyword search, vector for semantic search, ' + + 'or both for hybrid retrieval. WHERE clauses from all active adapters are OR-combined.', + fields: () => fields, + }; + }, + 'UnifiedSearchPlugin registering UnifiedSearchInput type' + ); + } + // Register StringTrgmFilter — a variant of StringFilter that includes // trgm operators (similarTo, wordSimilarTo). Only string columns on // tables that qualify for trgm will use this type instead of StringFilter. @@ -858,106 +915,166 @@ export function createUnifiedSearchPlugin( } // ── unifiedSearch composite filter ── - // Adds a single `unifiedSearch: String` field that fans out the same - // text query to all adapters where supportsTextSearch is true. - // WHERE clauses are combined with OR (match ANY algorithm). + // Accepts UnifiedSearchInput { text, vector, metric, distance, includeChunks }. + // Text is dispatched to all text-compatible adapters (tsvector, BM25, trgm). + // Vector is dispatched to pgvector. WHERE clauses are OR-combined. if (enableUnifiedSearch) { // Collect text-compatible adapters and their columns for this codec const textAdapterColumns = adapterColumns.filter( (ac) => ac.adapter.supportsTextSearch && ac.adapter.buildTextSearchInput ); + // Collect vector adapter and its columns for this codec + const vectorAdapterColumns = adapterColumns.filter( + (ac) => ac.adapter.name === 'vector' + ); - if (textAdapterColumns.length > 0) { + // Need at least one text or vector adapter column to show the field + if (textAdapterColumns.length > 0 || vectorAdapterColumns.length > 0) { const fieldName = 'unifiedSearch'; + const UnifiedSearchInputType = build.getTypeByName('UnifiedSearchInput') as any; - newFields = build.extend( - newFields, - { - [fieldName]: fieldWithHooks( - { - fieldName, - isPgConnectionFilterField: true, - } as any, - { - description: build.wrapDescription( - 'Composite unified search. Provide a search string and it will be dispatched ' + - 'to all text-compatible search algorithms (tsvector, BM25, pg_trgm) simultaneously. ' + - 'Rows matching ANY algorithm are returned. All matching score fields are populated.', - 'field' - ), - type: build.graphql.GraphQLString as any, - apply: function plan($condition: any, val: any) { - if (val == null || (typeof val === 'string' && val.trim().length === 0)) return; - - const text = typeof val === 'string' ? val : String(val); - const qb = getQueryBuilder(build, $condition); - - // Collect all WHERE clauses (combined with OR) - const whereClauses: any[] = []; - - for (const { adapter, columns } of textAdapterColumns) { - for (const column of columns) { - // Convert text to adapter-specific filter input - const filterInput = adapter.buildTextSearchInput!(text); - - const result = adapter.buildFilterApply( - sql, - $condition.alias, - column, - filterInput, - build, - ); - if (!result) continue; - - // Collect WHERE clause for OR combination - if (result.whereClause) { - whereClauses.push(result.whereClause); + if (UnifiedSearchInputType) { + newFields = build.extend( + newFields, + { + [fieldName]: fieldWithHooks( + { + fieldName, + isPgConnectionFilterField: true, + } as any, + { + description: build.wrapDescription( + 'Unified hybrid search. Provide text for keyword search (dispatched to tsvector, BM25, pg_trgm), ' + + 'vector for semantic search (dispatched to pgvector), or both for true hybrid retrieval. ' + + 'WHERE clauses from all active adapters are OR-combined. ' + + 'All matching score fields (tsvRank, bm25Score, trgmSimilarity, vectorDistance) are populated.', + 'field' + ), + type: UnifiedSearchInputType, + apply: function plan($condition: any, val: any) { + if (val == null) return; + + const { text, vector, metric, distance, includeChunks } = val; + const hasText = typeof text === 'string' && text.trim().length > 0; + const hasVector = Array.isArray(vector) && vector.length > 0; + + if (!hasText && !hasVector) return; + + const qb = getQueryBuilder(build, $condition); + const whereClauses: any[] = []; + + // ── Text path: dispatch to all text-compatible adapters ── + if (hasText) { + for (const { adapter, columns } of textAdapterColumns) { + for (const column of columns) { + const filterInput = adapter.buildTextSearchInput!(text); + const result = adapter.buildFilterApply( + sql, + $condition.alias, + column, + filterInput, + build, + ); + if (!result) continue; + + if (result.whereClause) { + whereClauses.push(result.whereClause); + } + + if (qb && qb.mode === 'normal') { + const baseFieldName = inflection.attribute({ + codec: pgCodec as any, + attributeName: column.attributeName, + }); + const scoreMetaKey = `__unified_search_${adapter.name}_${baseFieldName}`; + const wrappedScoreSql = sql`${sql.parens(result.scoreExpression)}::text`; + const scoreIndex = qb.selectAndReturnIndex(wrappedScoreSql); + qb.setMeta(scoreMetaKey, { + selectIndex: scoreIndex, + } as SearchScoreDetails); + + const orderKey = `unified_order_${adapter.name}_${baseFieldName}`; + const dirs = _pendingOrderDirections.get($condition.alias); + const explicitDir = dirs?.[orderKey]; + if (explicitDir) { + qb.orderBy({ + fragment: result.scoreExpression, + codec: TYPES.float, + direction: explicitDir, + }); + } + } + } } + } - // Still inject score into SELECT so score fields are populated - if (qb && qb.mode === 'normal') { - const baseFieldName = inflection.attribute({ - codec: pgCodec as any, - attributeName: column.attributeName, - }); - const scoreMetaKey = `__unified_search_${adapter.name}_${baseFieldName}`; - const wrappedScoreSql = sql`${sql.parens(result.scoreExpression)}::text`; - const scoreIndex = qb.selectAndReturnIndex(wrappedScoreSql); - qb.setMeta(scoreMetaKey, { - selectIndex: scoreIndex, - } as SearchScoreDetails); - - // ORDER BY: read the direction stored by the orderBy - // enum (which ran first) via the shared alias key. - const orderKey = `unified_order_${adapter.name}_${baseFieldName}`; - const dirs = _pendingOrderDirections.get($condition.alias); - const explicitDir = dirs?.[orderKey]; - if (explicitDir) { - qb.orderBy({ - fragment: result.scoreExpression, - codec: TYPES.float, - direction: explicitDir, - }); + // ── Vector path: dispatch to pgvector adapter ── + if (hasVector) { + for (const { adapter, columns } of vectorAdapterColumns) { + for (const column of columns) { + const vectorFilterInput = { + vector, + metric: metric || undefined, + distance: distance ?? undefined, + includeChunks: includeChunks ?? undefined, + }; + + const result = adapter.buildFilterApply( + sql, + $condition.alias, + column, + vectorFilterInput, + build, + ); + if (!result) continue; + + if (result.whereClause) { + whereClauses.push(result.whereClause); + } + + if (qb && qb.mode === 'normal') { + const baseFieldName = inflection.attribute({ + codec: pgCodec as any, + attributeName: column.attributeName, + }); + const scoreMetaKey = `__unified_search_${adapter.name}_${baseFieldName}`; + const wrappedScoreSql = sql`${sql.parens(result.scoreExpression)}::text`; + const scoreIndex = qb.selectAndReturnIndex(wrappedScoreSql); + qb.setMeta(scoreMetaKey, { + selectIndex: scoreIndex, + } as SearchScoreDetails); + + const orderKey = `unified_order_${adapter.name}_${baseFieldName}`; + const dirs = _pendingOrderDirections.get($condition.alias); + const explicitDir = dirs?.[orderKey]; + if (explicitDir) { + qb.orderBy({ + fragment: result.scoreExpression, + codec: TYPES.float, + direction: explicitDir, + }); + } + } } } } - } - // Apply combined WHERE with OR - if (whereClauses.length > 0) { - if (whereClauses.length === 1) { - $condition.where(whereClauses[0]); - } else { - const combined = sql.fragment`(${sql.join(whereClauses, ' OR ')})`; - $condition.where(combined); + // Apply combined WHERE with OR (true hybrid: match ANY path) + if (whereClauses.length > 0) { + if (whereClauses.length === 1) { + $condition.where(whereClauses[0]); + } else { + const combined = sql.fragment`(${sql.join(whereClauses, ' OR ')})`; + $condition.where(combined); + } } - } - }, - } - ), - }, - `UnifiedSearchPlugin adding unifiedSearch composite filter on '${codec.name}'` - ); + }, + } + ), + }, + `UnifiedSearchPlugin adding unifiedSearch composite filter on '${codec.name}'` + ); + } } }