From 1fa241738629e9fc55bd4c1efa87c88b93efdce6 Mon Sep 17 00:00:00 2001 From: buriedpot <1091378351@qq.com> Date: Wed, 1 Apr 2026 10:48:56 +0800 Subject: [PATCH 1/3] fix: avoid using uninitialized memory During a RUM index scan, if a concurrent VACUUM completes and removes all items from a posting tree leaf page or a posting list, entry->nlist can become zero. In this case, entry->curItem may point to uninitialized memory, leading to a crash. The commit fixes this bug. --- Makefile | 2 +- expected/rum_vacuum.out | 154 ++++++++++++++++++++++++++++++++++++++++ sql/rum_vacuum.sql | 54 ++++++++++++++ src/rumget.c | 97 +++++++++++++++++++++++-- 4 files changed, 299 insertions(+), 8 deletions(-) create mode 100644 expected/rum_vacuum.out create mode 100644 sql/rum_vacuum.sql diff --git a/Makefile b/Makefile index 12af6d0c6c..de00b1860f 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp \ int2 int4 int8 float4 float8 money oid \ time timetz date interval \ macaddr inet cidr text varchar char bytea bit varbit \ - numeric rum_weight expr array + numeric rum_weight expr array rum_vacuum TAP_TESTS = 1 diff --git a/expected/rum_vacuum.out b/expected/rum_vacuum.out new file mode 100644 index 0000000000..a3d1293c96 --- /dev/null +++ b/expected/rum_vacuum.out @@ -0,0 +1,154 @@ +-- Test RUM index scan correctness after concurrent VACUUM removes all +-- posting tree entry items. +SET enable_seqscan TO off; +SET enable_indexscan TO off; +SET enable_bitmapscan TO on; +CREATE TABLE test_rum_vacuum (id int, body tsvector); +ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i; +CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); +DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5; +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; +-- test normal result +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------------------- + 6 | 'ann':2 'great':1 +(1 row) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +-------+-------------------- + 10000 | 'great':1 'john':2 +(1 row) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 6 | 'ann':2 'great':1 + 10000 | 'great':1 'john':2 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 +(5 rows) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 10000 | 'great':1 'john':2 + 6 | 'ann':2 'great':1 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 +(5 rows) + +VACUUM test_rum_vacuum; +-- this shouldn't cause a core dump +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------------------- + 6 | 'ann':2 'great':1 +(1 row) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +-------+-------------------- + 10000 | 'great':1 'john':2 +(1 row) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 6 | 'ann':2 'great':1 + 10000 | 'great':1 'john':2 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 +(5 rows) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 10000 | 'great':1 'john':2 + 6 | 'ann':2 'great':1 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 +(5 rows) + +-- test that data can still be found after reinsertion +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i; +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + count +------- + 9998 +(1 row) + +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999; +VACUUM test_rum_vacuum; +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------------------- + 6 | 'ann':2 'great':1 +(1 row) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +-------+-------------------- + 20000 | 'great':1 'john':2 +(1 row) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 6 | 'ann':2 'great':1 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 + 20000 | 'great':1 'john':2 +(5 rows) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 20000 | 'great':1 'john':2 + 6 | 'ann':2 'great':1 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 +(5 rows) + +-- test if do while loop works when an entry has no non-empty posting tree pages +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum; +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------------------- + 6 | 'ann':2 'great':1 +(1 row) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +----+------ +(0 rows) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 6 | 'ann':2 'great':1 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 +(4 rows) + +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; + id | body +-------+-------------------- + 6 | 'ann':2 'great':1 + 10001 | 'great':1 'jame':2 + 10002 | 'great':1 'jame':2 + 10003 | 'great':1 'jame':2 +(4 rows) diff --git a/sql/rum_vacuum.sql b/sql/rum_vacuum.sql new file mode 100644 index 0000000000..cc9d087a01 --- /dev/null +++ b/sql/rum_vacuum.sql @@ -0,0 +1,54 @@ +-- Test RUM index scan correctness after concurrent VACUUM removes all +-- posting tree entry items. + +SET enable_seqscan TO off; +SET enable_indexscan TO off; +SET enable_bitmapscan TO on; + +CREATE TABLE test_rum_vacuum (id int, body tsvector); +ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); + +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i; + +CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); + +DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5; +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; + +-- test normal result +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; + +VACUUM test_rum_vacuum; + +-- this shouldn't cause a core dump +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; + +-- test that data can still be found after reinsertion +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i; +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999; + +VACUUM test_rum_vacuum; + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; + +-- test if do while loop works when an entry has no non-empty posting tree pages +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum; + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; +SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; diff --git a/src/rumget.c b/src/rumget.c index a36229f59f..bfd02e2907 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -684,9 +684,23 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) } LockBuffer(entry->buffer, RUM_UNLOCK); - entry->isFinished = setListPositionScanEntry(rumstate, entry); - if (!entry->isFinished) - entry->curItem = entry->list[entry->offset]; + + /* + * If the current page is empty (nlist == 0), we cannot assume the + * scan is complete, as subsequent pages may exist. Therefore, we + * set isFinished = false and leave entry->nlist = 0 and + * entry->offset = 0 to ensure that entryGetItem advances to the + * next page on the next call. Otherwise, initialize curItem to + * the first valid item. + */ + if (entry->nlist == 0) + entry->isFinished = false; + else + { + entry->isFinished = setListPositionScanEntry(rumstate, entry); + if (!entry->isFinished) + entry->curItem = entry->list[entry->offset]; + } } else if (RumGetNPosting(itup) > 0) { @@ -699,6 +713,16 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) if (!entry->isFinished) entry->curItem = entry->list[entry->offset]; } + /* + * Else, the posting list for this entry has been entirely vacuumed + * away (nlist == 0 after setListPositionScanEntry). We cannot assume + * the scan is complete, as subsequent pages may exist. Therefore, we + * set isFinished = false and leave entry->nlist = 0 and entry->offset + * = 0 to ensure that entryGetItem advances to the next page on the + * next call. + */ + else + entry->isFinished = false; if (entry->queryCategory == RUM_CAT_EMPTY_QUERY && entry->scanWithAddInfo) @@ -1011,6 +1035,13 @@ entryGetNextItem(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) LockBuffer(entry->buffer, RUM_UNLOCK); + /* + * No valid item if VACUUM removed all items concurrently. Go on + * next page. + */ + if (entry->nlist == 0) + break; + if (entry->offset < 0) { if (ScanDirectionIsForward(entry->scanDirection) && @@ -1044,6 +1075,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) RumItemSetMin(&entry->curItem); entry->offset = InvalidOffsetNumber; entry->list = NULL; + entry->nlist = 0; if (entry->gdi) { freeRumBtreeStack(entry->gdi->stack); @@ -1151,6 +1183,18 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) LockBuffer(entry->buffer, RUM_UNLOCK); entry->isFinished = false; + + /* + * Posting tree's first leaf page is empty due to concurrent VACUUM. + * Advance through empty pages until we find one with items or exhaust + * the tree. entryGetItem does not re-invoke entryGetNextItem after we + * return, so we must do it here to ensure curItem is valid on return. + */ + if (entry->nlist == 0) + { + entryGetNextItem(rumstate, entry, snapshot); + goto entry_done; + } } else if (RumGetNPosting(itup) > 0) { @@ -1161,12 +1205,21 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); entry->isFinished = setListPositionScanEntry(rumstate, entry); } + /* Posting list has been vacuumed. Go to the next entry. */ + else + { + ItemPointerSetInvalid(&entry->curItem.iptr); + entry->isFinished = true; + goto entry_done; + } Assert(entry->nlist > 0 && entry->list); entry->curItem = entry->list[entry->offset]; entry->offset += entry->scanDirection; +entry_done: + SCAN_ENTRY_GET_KEY(entry, rumstate, itup); /* @@ -1340,8 +1393,24 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps } else if (entry->stack) { - entry->offset++; - if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList) + /* + * We are responsible for ensuring that we keep advancing through + * ItemLists until we find one that contains at least one valid + * item. This is necessary because concurrent VACUUM may have + * removed all items from a page, leaving an empty ItemList. In + * such cases, we must continue to the next ItemList. + */ + bool success; + + Assert(!entry->isFinished); + + do + { + entry->isFinished = false; + success = entryGetNextItemList(rumstate, entry, snapshot); + } while (success && entry->nlist == 0); + + if (success && nextEntryList) *nextEntryList = true; } else @@ -1361,8 +1430,22 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps dropItem(entry)); if (entry->stack && entry->isFinished) { - entry->isFinished = false; - if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList) + /* + * We are responsible for ensuring that we keep advancing through + * ItemLists until we find one that contains at least one valid + * item. This is necessary because concurrent VACUUM may have + * removed all items from a page, leaving an empty ItemList. In + * such cases, we must continue to the next ItemList. + */ + bool success; + + do + { + entry->isFinished = false; + success = entryGetNextItemList(rumstate, entry, snapshot); + } while (success && entry->nlist == 0); + + if (success && nextEntryList) *nextEntryList = true; } } From f8f3b6b1f5fcad3fb6f00df2cea1b918df32fe47 Mon Sep 17 00:00:00 2001 From: Arseny Kositsyn Date: Tue, 26 May 2026 14:19:47 +0300 Subject: [PATCH 2/3] Review fixes. 1. Fixed incorrect handling of empty pages in the scanPage() function in case of backward scan direction. 2. Refactor the regression test to focus on index scan correctness after VACUUM removes all items from posting lists and posting trees: - Add backward scan tests to verify empty-page handling in scanPage(), which previously caused an out-of-bounds array access. - Simplify queries by removing subqueries. - Add detailed comments explaining each test phase (setup, vacuum, scan verification for RumFastScan/RumFullScan). --- expected/rum_vacuum.out | 403 ++++++++++++++++++++++++++++------------ sql/rum_vacuum.sql | 164 ++++++++++++---- src/rumget.c | 12 +- 3 files changed, 428 insertions(+), 151 deletions(-) diff --git a/expected/rum_vacuum.out b/expected/rum_vacuum.out index a3d1293c96..b65e9043d7 100644 --- a/expected/rum_vacuum.out +++ b/expected/rum_vacuum.out @@ -1,154 +1,325 @@ --- Test RUM index scan correctness after concurrent VACUUM removes all --- posting tree entry items. -SET enable_seqscan TO off; -SET enable_indexscan TO off; -SET enable_bitmapscan TO on; +-- The test verifies that the scan in different directions in the RUM index +-- correctly handles empty posting lists and empty pages of the posting tree +-- that have been vacuumed. +SET enable_seqscan = off; +SET enable_indexscan = off; +SET enable_bitmapscan = on; +-- Prepare the data. Creating a posting list for ann and a posting tree for john. CREATE TABLE test_rum_vacuum (id int, body tsvector); ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i; -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); -DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5; +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery; DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; --- test normal result +-- Check the scan before vacuum. +-- Scan with the extraction of results from the posting list (RumFastScan mode). +EXPLAIN (costs off) SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); - id | body -----+------------------- - 6 | 'ann':2 'great':1 -(1 row) + QUERY PLAN +------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('ann'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('ann'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------ +(0 rows) + +-- Scan with the extraction of results from the posting tree (RumFastScan mode). +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) +(4 rows) SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); - id | body --------+-------------------- - 10000 | 'great':1 'john':2 + id | body +-------+---------- + 10000 | 'john':1 (1 row) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 6 | 'ann':2 'great':1 - 10000 | 'great':1 'john':2 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 -(5 rows) +-- Full-pass index scan (RumFullScan mode). +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using test_rum_vacuum_body_idx on test_rum_vacuum + Order By: (body <=> to_tsquery('john'::text)) +(2 rows) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 10000 | 'great':1 'john':2 - 6 | 'ann':2 'great':1 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 -(5 rows) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | 16.44934 +(1 row) +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. VACUUM test_rum_vacuum; --- this shouldn't cause a core dump +-- Scan of an empty posting list. +EXPLAIN (costs off) SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); - id | body -----+------------------- - 6 | 'ann':2 'great':1 -(1 row) + QUERY PLAN +------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('ann'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('ann'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------ +(0 rows) + +-- After the vacuum, the left and right leaf pages will remain in the posting +-- tree (because the outermost pages are not deleted). The left page is empty, +-- and the right page has one element. Check that the scan skips an left empty +-- page and receives an item from the right non-empty page. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) +(4 rows) SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); - id | body --------+-------------------- - 10000 | 'great':1 'john':2 + id | body +-------+---------- + 10000 | 'john':1 (1 row) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 6 | 'ann':2 'great':1 - 10000 | 'great':1 'john':2 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 +-- Check that in RumFullScan mode, the scan correctly switches from an empty +-- posting list to a non-empty posting tree and receives 1 element. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using test_rum_vacuum_body_idx on test_rum_vacuum + Order By: (body <=> to_tsquery('john'::text)) +(2 rows) + +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | 16.44934 +(1 row) + +-- Delete the last item from the posting tree. +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum; +-- Check that the scan correctly skips all empty posting tree pages, reaches +-- the far right and ends. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +----+------ +(0 rows) + +-- Check that in RumFullScan mode, the scan correctly skips an empty posting +-- list and an empty posting tree. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using test_rum_vacuum_body_idx on test_rum_vacuum + Order By: (body <=> to_tsquery('john'::text)) +(2 rows) + +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + id | body | distance +----+------+---------- +(0 rows) + +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + QUERY PLAN +------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('ann'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('ann'::text)) (5 rows) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 10000 | 'great':1 'john':2 - 6 | 'ann':2 'great':1 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + count +------- + 5 +(1 row) + +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) (5 rows) --- test that data can still be found after reinsertion -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i; SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); count ------- - 9998 + 9995 (1 row) -DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999; -VACUUM test_rum_vacuum; -SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); - id | body -----+------------------- - 6 | 'ann':2 'great':1 -(1 row) +DROP TABLE test_rum_vacuum; +-- Check the backward scan direction. +SET enable_bitmapscan = off; +SET enable_indexscan = on; +-- Prepare the data. Creating a posting list for ann and a posting tree for john. +CREATE TABLE test_rum_vacuum_backward (id int, body tsvector); +ALTER TABLE test_rum_vacuum_backward SET (autovacuum_enabled = false); +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; +CREATE INDEX ON test_rum_vacuum_backward USING rum (body rum_tsvector_addon_ops, id) WITH (attach='id', to='body', order_by_attach='true'); +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery; +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery AND id <= 9999; +-- Check the backward scan before vacuum. +-- A scan with the results extracted from the posting list (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''ann'''::tsquery) + Order By: (id <=| 2) +(3 rows) -SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); - id | body --------+-------------------- - 20000 | 'great':1 'john':2 +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + id | body | distance +----+------+---------- +(0 rows) + +-- A scan with the results extracted from the posting tree (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | Infinity (1 row) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 6 | 'ann':2 'great':1 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 - 20000 | 'great':1 'john':2 -(5 rows) +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. +VACUUM test_rum_vacuum_backward; +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''ann'''::tsquery) + Order By: (id <=| 2) +(3 rows) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 20000 | 'great':1 'john':2 - 6 | 'ann':2 'great':1 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 -(5 rows) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + id | body | distance +----+------+---------- +(0 rows) --- test if do while loop works when an entry has no non-empty posting tree pages -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; -DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; -VACUUM test_rum_vacuum; -SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); - id | body -----+------------------- - 6 | 'ann':2 'great':1 +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | Infinity (1 row) -SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); - id | body -----+------ +-- Delete the last item from the posting tree and make the same check. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum_backward; +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + id | body | distance +----+------+---------- (0 rows) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 6 | 'ann':2 'great':1 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Aggregate + -> Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''ann'''::tsquery) + Order By: (id <=| 2) (4 rows) -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; - id | body --------+-------------------- - 6 | 'ann':2 'great':1 - 10001 | 'great':1 'jame':2 - 10002 | 'great':1 'jame':2 - 10003 | 'great':1 'jame':2 +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); + count +------- + 5 +(1 row) + +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Aggregate + -> Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) (4 rows) + +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); + count +------- + 9995 +(1 row) + +DROP TABLE test_rum_vacuum_backward; diff --git a/sql/rum_vacuum.sql b/sql/rum_vacuum.sql index cc9d087a01..2953f876cf 100644 --- a/sql/rum_vacuum.sql +++ b/sql/rum_vacuum.sql @@ -1,54 +1,154 @@ --- Test RUM index scan correctness after concurrent VACUUM removes all --- posting tree entry items. +-- The test verifies that the scan in different directions in the RUM index +-- correctly handles empty posting lists and empty pages of the posting tree +-- that have been vacuumed. -SET enable_seqscan TO off; -SET enable_indexscan TO off; -SET enable_bitmapscan TO on; +SET enable_seqscan = off; +SET enable_indexscan = off; +SET enable_bitmapscan = on; +-- Prepare the data. Creating a posting list for ann and a posting tree for john. CREATE TABLE test_rum_vacuum (id int, body tsvector); ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); - -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i; -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i; - +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); -DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5; +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery; DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; --- test normal result -SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); -SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; - -VACUUM test_rum_vacuum; +-- Check the scan before vacuum. --- this shouldn't cause a core dump +-- Scan with the extraction of results from the posting list (RumFastScan mode). +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + +-- Scan with the extraction of results from the posting tree (RumFastScan mode). +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; --- test that data can still be found after reinsertion -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i; -SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); -DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999; +-- Full-pass index scan (RumFullScan mode). +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. VACUUM test_rum_vacuum; +-- Scan of an empty posting list. +EXPLAIN (costs off) SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + +-- After the vacuum, the left and right leaf pages will remain in the posting +-- tree (because the outermost pages are not deleted). The left page is empty, +-- and the right page has one element. Check that the scan skips an left empty +-- page and receives an item from the right non-empty page. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; --- test if do while loop works when an entry has no non-empty posting tree pages -INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i; +-- Check that in RumFullScan mode, the scan correctly switches from an empty +-- posting list to a non-empty posting tree and receives 1 element. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + +-- Delete the last item from the posting tree. DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; VACUUM test_rum_vacuum; -SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +-- Check that the scan correctly skips all empty posting tree pages, reaches +-- the far right and ends. +EXPLAIN (costs off) SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC; -SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC; +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + +-- Check that in RumFullScan mode, the scan correctly skips an empty posting +-- list and an empty posting tree. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; + +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + +DROP TABLE test_rum_vacuum; + +-- Check the backward scan direction. + +SET enable_bitmapscan = off; +SET enable_indexscan = on; + +-- Prepare the data. Creating a posting list for ann and a posting tree for john. +CREATE TABLE test_rum_vacuum_backward (id int, body tsvector); +ALTER TABLE test_rum_vacuum_backward SET (autovacuum_enabled = false); +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; +CREATE INDEX ON test_rum_vacuum_backward USING rum (body rum_tsvector_addon_ops, id) WITH (attach='id', to='body', order_by_attach='true'); + +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery; +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery AND id <= 9999; + +-- Check the backward scan before vacuum. + +-- A scan with the results extracted from the posting list (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + +-- A scan with the results extracted from the posting tree (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. +VACUUM test_rum_vacuum_backward; + +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + +-- Delete the last item from the posting tree and make the same check. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum_backward; + +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; + +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); + +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); + +DROP TABLE test_rum_vacuum_backward; diff --git a/src/rumget.c b/src/rumget.c index bfd02e2907..2e6fffe9e3 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -1892,6 +1892,15 @@ scanPage(RumState * rumstate, RumScanEntry entry, RumItem *item, bool equalOk) Page page = BufferGetPage(entry->buffer); RumItemPointerSetMin(&iter_item.iptr); + ptr = RumDataPageGetData(page); + maxoff = RumPageGetOpaque(page)->maxoff; + + /* + * If there are no items on the page (it was vacuumed), immediately return + * false. + */ + if (maxoff < FirstOffsetNumber) + return false; if (ScanDirectionIsForward(entry->scanDirection) && !RumPageRightMost(page)) { @@ -1901,9 +1910,6 @@ scanPage(RumState * rumstate, RumScanEntry entry, RumItem *item, bool equalOk) return false; } - ptr = RumDataPageGetData(page); - maxoff = RumPageGetOpaque(page)->maxoff; - for (j = 0; j < RumDataLeafIndexCount; j++) { RumDataLeafItemIndex *index = &RumPageGetIndexes(page)[j]; From 06a36be173df2c9ec753af05c1fece2c8794bca3 Mon Sep 17 00:00:00 2001 From: Arseny Kositsyn Date: Tue, 26 May 2026 14:24:16 +0300 Subject: [PATCH 3/3] Review fixes. --- src/rumget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rumget.c b/src/rumget.c index 2e6fffe9e3..7621a7f9cd 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -714,12 +714,12 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) entry->curItem = entry->list[entry->offset]; } /* - * Else, the posting list for this entry has been entirely vacuumed - * away (nlist == 0 after setListPositionScanEntry). We cannot assume - * the scan is complete, as subsequent pages may exist. Therefore, we - * set isFinished = false and leave entry->nlist = 0 and entry->offset - * = 0 to ensure that entryGetItem advances to the next page on the - * next call. + * Else, the posting list for this IndexTuple has been entirely vacuumed + * away. We cannot assume that the scan is finished, as subsequent + * IndexTuples or pages may still contain valid results. Therefore, we + * set isFinished = false and keep entry->nlist = 0 and entry->offset = 0 + * to ensure that entryGetItem advances to the next page or IndexTuple + * on the next call. */ else entry->isFinished = false;