From ed10ce5b01fd56d2c0fb44bfe303e02a496ce9ac Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Mon, 15 Jun 2026 03:15:47 +0000 Subject: [PATCH 1/3] fix(lookup): skip empty cells in approximate MATCH/lookup search (HF-223) Excel and Google Sheets ignore genuinely empty cells (but not empty strings) when computing the lower/upper bound for an approximate match. HyperFormula instead landed on an empty cell during binary search and returned #N/A (its EmptyValue Symbol never matched the key), or, in descending mode, reported the wrong position. findLastOccurrenceInOrderedRange now runs the ordered search over a compacted list of non-empty cell indices and maps the result back to the original index space, so empty cells keep their slots and the matched non-empty cell's original 1-based position is reported unchanged. When the range has no non-empty cells the function returns NOT_FOUND, so an all-empty range yields #N/A for every direction/bound instead of falling through to the offset-0 branches. The in-memory ordered path in AdvancedFind.findNormalizedValue skips EmptyValue for the same reason. Empty strings are unaffected (text is ranked above numbers, so they still terminate a numeric run). Exact match (matchType 0) is untouched. Shared by approximate MATCH(+/-1), sorted VLOOKUP/HLOOKUP, and XLOOKUP(searchMode +/-2). Tests for the public test suite are tracked separately in handsontable/hyperformula-tests. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 4 ++++ src/Lookup/AdvancedFind.ts | 8 +++++++ src/interpreter/binarySearch.ts | 37 +++++++++++++++++++++++++++------ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 683e1da144..7732d3a276 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added an Indonesian (Bahasa Indonesia) language pack. [#1674](https://github.com/handsontable/hyperformula/pull/1674) +### Fixed + +- Fixed approximate `MATCH`, `VLOOKUP`, `HLOOKUP`, and `XLOOKUP` incorrectly returning `#N/A` (or a wrong position) when the search range contained empty cells; empty cells are now skipped during approximate matching, consistent with Excel and Google Sheets. [HF-223] + ## [3.3.0] - 2026-05-20 ### Added diff --git a/src/Lookup/AdvancedFind.ts b/src/Lookup/AdvancedFind.ts index 0c5f0d73bb..037629679c 100644 --- a/src/Lookup/AdvancedFind.ts +++ b/src/Lookup/AdvancedFind.ts @@ -5,6 +5,7 @@ import {DependencyGraph} from '../DependencyGraph' import { + EmptyValue, getRawValue, InternalScalarValue, RawInterpreterValue, @@ -88,6 +89,13 @@ export abstract class AdvancedFind { return i } + // Skip empty cells in the approximate search, consistent with findLastOccurrenceInOrderedRange: + // Excel/Google Sheets ignore genuinely empty cells (but not empty strings) when looking for the + // lower/upper bound. EmptyValue would otherwise be ranked below every value by compare(). + if (value === EmptyValue) { + continue + } + if (compareFn(value, searchKey) > 0) { continue } diff --git a/src/interpreter/binarySearch.ts b/src/interpreter/binarySearch.ts index 2823011a45..c332cad9e4 100644 --- a/src/interpreter/binarySearch.ts +++ b/src/interpreter/binarySearch.ts @@ -20,6 +20,12 @@ const NOT_FOUND = -1 * * If the search range contains duplicates, returns the last matching value. If no value found in the range satisfies the above, returns -1. * + * Empty cells (EmptyValue) are skipped: they are not treated as ordered values during the + * approximate search. This mirrors Excel/Google Sheets, where MATCH/VLOOKUP/HLOOKUP/XLOOKUP + * ignore genuinely empty cells (but not empty strings) when looking for the lower/upper bound. + * The returned offset is always relative to the original range, so empty cells keep their slots + * and the position of the matched non-empty cell is reported unchanged. + * * Note: this function does not normalize input strings. */ export function findLastOccurrenceInOrderedRange( @@ -35,12 +41,31 @@ export function findLastOccurrenceInOrderedRange( ? (index: number) => getRawValue(dependencyGraph.getCellValue(simpleCellAddress(range.sheet, index, range.start.row))) : (index: number) => getRawValue(dependencyGraph.getCellValue(simpleCellAddress(range.sheet, range.start.col, index))) + // Collect the original indices of the non-empty cells, preserving their order. Empty cells break + // the sort invariant binary search relies on (compare() ranks EmptyValue below every other value), + // so the search runs over the compacted, empty-free index list and the result is mapped back to the + // original index space afterwards. + const nonEmptyIndices: number[] = [] + for (let index = start; index <= end; index++) { + if (getValueFromIndexFn(index) !== EmptyValue) { + nonEmptyIndices.push(index) + } + } + + // With no non-empty cells there is nothing to match against. Return early so the + // ifNoMatch branches below (which treat NOT_FOUND as "key past the edge of a non-empty + // list" and may return offset 0) are not reached for an all-empty range. + if (nonEmptyIndices.length === 0) { + return NOT_FOUND + } + const compareFn = orderingDirection === 'asc' ? (left: RawNoErrorScalarValue, right: RawInterpreterValue) => compare(left, right) : (left: RawNoErrorScalarValue, right: RawInterpreterValue) => -compare(left, right) - const foundIndex = findLastMatchingIndex(index => compareFn(searchKey, getValueFromIndexFn(index)) >= 0, start, end) - const foundValue = getValueFromIndexFn(foundIndex) + const foundCompactedIndex = findLastMatchingIndex(compactedIndex => compareFn(searchKey, getValueFromIndexFn(nonEmptyIndices[compactedIndex])) >= 0, 0, nonEmptyIndices.length - 1) + const foundIndex = foundCompactedIndex === NOT_FOUND ? NOT_FOUND : nonEmptyIndices[foundCompactedIndex] + const foundValue = foundIndex === NOT_FOUND ? EmptyValue : getValueFromIndexFn(foundIndex) if (foundValue === searchKey) { return foundIndex - start @@ -61,8 +86,8 @@ export function findLastOccurrenceInOrderedRange( } // orderingDirection === 'desc' - const nextIndex = foundIndex+1 - return nextIndex <= end ? nextIndex - start : NOT_FOUND + const nextIndex = nonEmptyIndices[foundCompactedIndex + 1] + return nextIndex !== undefined ? nextIndex - start : NOT_FOUND } if (ifNoMatch === 'returnUpperBound') { @@ -80,8 +105,8 @@ export function findLastOccurrenceInOrderedRange( } // orderingDirection === 'asc' - const nextIndex = foundIndex+1 - return nextIndex <= end ? nextIndex - start : NOT_FOUND + const nextIndex = nonEmptyIndices[foundCompactedIndex + 1] + return nextIndex !== undefined ? nextIndex - start : NOT_FOUND } // ifNoMatch === 'returnNotFound' From a8c72faf0815afb6d32b887631739e033d20874c Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Tue, 23 Jun 2026 07:47:48 +0000 Subject: [PATCH 2/3] docs(lookup): document O(n) compaction trade-off + findNormalizedValue JSDoc (HF-223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address prep-flip review notes: explain why the empty-cell compaction is O(n) (correctness requires it; empties make the predicate non-monotonic) and add a JSDoc block to AdvancedFind.findNormalizedValue for family consistency with findLastOccurrenceInOrderedRange. Comment/JSDoc only — no behavior change. Co-Authored-By: Claude Opus 4.8 --- src/Lookup/AdvancedFind.ts | 7 +++++++ src/interpreter/binarySearch.ts | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/src/Lookup/AdvancedFind.ts b/src/Lookup/AdvancedFind.ts index 037629679c..5c5943cde5 100644 --- a/src/Lookup/AdvancedFind.ts +++ b/src/Lookup/AdvancedFind.ts @@ -62,6 +62,13 @@ export abstract class AdvancedFind { ) } + /** + * Linear search over an in-memory array for the value equal to `searchKey`, or — when `ifNoMatch` + * is `returnLowerBound`/`returnUpperBound` — the closest non-exceeding/non-preceding value. + * Genuinely empty cells (`EmptyValue`) are skipped, consistent with `findLastOccurrenceInOrderedRange` + * and with Excel/Google Sheets, which ignore empty cells (but not empty strings) in approximate search. + * Returns the 0-based index into `searchArray`, or `NOT_FOUND` (-1) when nothing matches. + */ protected findNormalizedValue(searchKey: RawNoErrorScalarValue, searchArray: InternalScalarValue[], ifNoMatch: 'returnLowerBound' | 'returnUpperBound' | 'returnNotFound' = 'returnNotFound', returnOccurrence: 'first' | 'last' = 'first'): number { const normalizedArray = searchArray .map(getRawValue) diff --git a/src/interpreter/binarySearch.ts b/src/interpreter/binarySearch.ts index c332cad9e4..9930e7f9f4 100644 --- a/src/interpreter/binarySearch.ts +++ b/src/interpreter/binarySearch.ts @@ -45,6 +45,11 @@ export function findLastOccurrenceInOrderedRange( // the sort invariant binary search relies on (compare() ranks EmptyValue below every other value), // so the search runs over the compacted, empty-free index list and the result is mapped back to the // original index space afterwards. + // + // This pre-scan is O(n) over the range, which trades away the binary search's O(log n) guarantee. + // It is required for correctness: with empty cells interspersed the search predicate is no longer + // monotonic, so the binary search cannot run directly on the original range. A future optimization + // could skip the compaction (and keep O(log n)) when the range is statically known to be empty-free. const nonEmptyIndices: number[] = [] for (let index = start; index <= end; index++) { if (getValueFromIndexFn(index) !== EmptyValue) { From dde3c8a5619041e230feabdcaefe2bf80b5b856b Mon Sep 17 00:00:00 2001 From: marcin-kordas-hoc Date: Thu, 25 Jun 2026 10:35:31 +0000 Subject: [PATCH 3/3] fix(lookup): keep exact match O(log n) and not skipping empty cells (HF-223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review: the empty-skipping compaction must apply to approximate (bound) lookups only. Run exact-match mode (ifNoMatch === 'returnNotFound') directly over the original range so it keeps its O(log n) guarantee and its pre-empty-skip behaviour — an exact search neither matches a blank nor is redirected past one. CHANGELOG reworded to the house style (PR link). Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 2 +- src/interpreter/binarySearch.ts | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7732d3a276..8d3204cbd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed -- Fixed approximate `MATCH`, `VLOOKUP`, `HLOOKUP`, and `XLOOKUP` incorrectly returning `#N/A` (or a wrong position) when the search range contained empty cells; empty cells are now skipped during approximate matching, consistent with Excel and Google Sheets. [HF-223] +- Fixed approximate `MATCH`, `VLOOKUP`, `HLOOKUP`, and `XLOOKUP` incorrectly returning `#N/A` (or a wrong position) when the search range contained empty cells. [#1697](https://github.com/handsontable/hyperformula/pull/1697) ## [3.3.0] - 2026-05-20 diff --git a/src/interpreter/binarySearch.ts b/src/interpreter/binarySearch.ts index 9930e7f9f4..bca91b8d6e 100644 --- a/src/interpreter/binarySearch.ts +++ b/src/interpreter/binarySearch.ts @@ -41,15 +41,29 @@ export function findLastOccurrenceInOrderedRange( ? (index: number) => getRawValue(dependencyGraph.getCellValue(simpleCellAddress(range.sheet, index, range.start.row))) : (index: number) => getRawValue(dependencyGraph.getCellValue(simpleCellAddress(range.sheet, range.start.col, index))) + const compareFn = orderingDirection === 'asc' + ? (left: RawNoErrorScalarValue, right: RawInterpreterValue) => compare(left, right) + : (left: RawNoErrorScalarValue, right: RawInterpreterValue) => -compare(left, right) + + // Exact-match mode (returnNotFound) does not skip empty cells: HF-223 changes only the approximate + // (bound) lookups, and an exact search must neither match a blank nor be redirected by one. Run the + // binary search directly over the original range — this keeps the mode O(log n) and preserves its + // pre-HF-223 behaviour: an exact hit is returned, everything else is NOT_FOUND. (The empty-skipping + // compaction below would otherwise let an exact search land on, and report, a non-empty cell that + // sits past an interspersed blank — silently changing exact-match results.) + if (ifNoMatch === 'returnNotFound') { + const exactIndex = findLastMatchingIndex(index => compareFn(searchKey, getValueFromIndexFn(index)) >= 0, start, end) + return exactIndex !== NOT_FOUND && getValueFromIndexFn(exactIndex) === searchKey ? exactIndex - start : NOT_FOUND + } + // Collect the original indices of the non-empty cells, preserving their order. Empty cells break // the sort invariant binary search relies on (compare() ranks EmptyValue below every other value), - // so the search runs over the compacted, empty-free index list and the result is mapped back to the - // original index space afterwards. + // so the bound search runs over the compacted, empty-free index list and the result is mapped back + // to the original index space afterwards. // // This pre-scan is O(n) over the range, which trades away the binary search's O(log n) guarantee. - // It is required for correctness: with empty cells interspersed the search predicate is no longer - // monotonic, so the binary search cannot run directly on the original range. A future optimization - // could skip the compaction (and keep O(log n)) when the range is statically known to be empty-free. + // It is required for correctness in the bound modes: with empty cells interspersed the search + // predicate is no longer monotonic, so the binary search cannot run directly on the original range. const nonEmptyIndices: number[] = [] for (let index = start; index <= end; index++) { if (getValueFromIndexFn(index) !== EmptyValue) { @@ -64,10 +78,6 @@ export function findLastOccurrenceInOrderedRange( return NOT_FOUND } - const compareFn = orderingDirection === 'asc' - ? (left: RawNoErrorScalarValue, right: RawInterpreterValue) => compare(left, right) - : (left: RawNoErrorScalarValue, right: RawInterpreterValue) => -compare(left, right) - const foundCompactedIndex = findLastMatchingIndex(compactedIndex => compareFn(searchKey, getValueFromIndexFn(nonEmptyIndices[compactedIndex])) >= 0, 0, nonEmptyIndices.length - 1) const foundIndex = foundCompactedIndex === NOT_FOUND ? NOT_FOUND : nonEmptyIndices[foundCompactedIndex] const foundValue = foundIndex === NOT_FOUND ? EmptyValue : getValueFromIndexFn(foundIndex)