diff --git a/Makefile b/Makefile index 12af6d0c6c..de00b1860f 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp \ int2 int4 int8 float4 float8 money oid \ time timetz date interval \ macaddr inet cidr text varchar char bytea bit varbit \ - numeric rum_weight expr array + numeric rum_weight expr array rum_vacuum TAP_TESTS = 1 diff --git a/expected/rum_vacuum.out b/expected/rum_vacuum.out new file mode 100644 index 0000000000..b65e9043d7 --- /dev/null +++ b/expected/rum_vacuum.out @@ -0,0 +1,325 @@ +-- The test verifies that the scan in different directions in the RUM index +-- correctly handles empty posting lists and empty pages of the posting tree +-- that have been vacuumed. +SET enable_seqscan = off; +SET enable_indexscan = off; +SET enable_bitmapscan = on; +-- Prepare the data. Creating a posting list for ann and a posting tree for john. +CREATE TABLE test_rum_vacuum (id int, body tsvector); +ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; +CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery; +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; +-- Check the scan before vacuum. +-- Scan with the extraction of results from the posting list (RumFastScan mode). +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + QUERY PLAN +------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('ann'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('ann'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------ +(0 rows) + +-- Scan with the extraction of results from the posting tree (RumFastScan mode). +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +-------+---------- + 10000 | 'john':1 +(1 row) + +-- Full-pass index scan (RumFullScan mode). +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using test_rum_vacuum_body_idx on test_rum_vacuum + Order By: (body <=> to_tsquery('john'::text)) +(2 rows) + +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | 16.44934 +(1 row) + +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. +VACUUM test_rum_vacuum; +-- Scan of an empty posting list. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + QUERY PLAN +------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('ann'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('ann'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + id | body +----+------ +(0 rows) + +-- After the vacuum, the left and right leaf pages will remain in the posting +-- tree (because the outermost pages are not deleted). The left page is empty, +-- and the right page has one element. Check that the scan skips an left empty +-- page and receives an item from the right non-empty page. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +-------+---------- + 10000 | 'john':1 +(1 row) + +-- Check that in RumFullScan mode, the scan correctly switches from an empty +-- posting list to a non-empty posting tree and receives 1 element. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using test_rum_vacuum_body_idx on test_rum_vacuum + Order By: (body <=> to_tsquery('john'::text)) +(2 rows) + +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | 16.44934 +(1 row) + +-- Delete the last item from the posting tree. +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum; +-- Check that the scan correctly skips all empty posting tree pages, reaches +-- the far right and ends. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------- + Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) +(4 rows) + +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + id | body +----+------ +(0 rows) + +-- Check that in RumFullScan mode, the scan correctly skips an empty posting +-- list and an empty posting tree. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + QUERY PLAN +-------------------------------------------------------------- + Index Scan using test_rum_vacuum_body_idx on test_rum_vacuum + Order By: (body <=> to_tsquery('john'::text)) +(2 rows) + +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + id | body | distance +----+------+---------- +(0 rows) + +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + QUERY PLAN +------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('ann'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('ann'::text)) +(5 rows) + +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + count +------- + 5 +(1 row) + +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + QUERY PLAN +-------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on test_rum_vacuum + Recheck Cond: (body @@ to_tsquery('john'::text)) + -> Bitmap Index Scan on test_rum_vacuum_body_idx + Index Cond: (body @@ to_tsquery('john'::text)) +(5 rows) + +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + count +------- + 9995 +(1 row) + +DROP TABLE test_rum_vacuum; +-- Check the backward scan direction. +SET enable_bitmapscan = off; +SET enable_indexscan = on; +-- Prepare the data. Creating a posting list for ann and a posting tree for john. +CREATE TABLE test_rum_vacuum_backward (id int, body tsvector); +ALTER TABLE test_rum_vacuum_backward SET (autovacuum_enabled = false); +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; +CREATE INDEX ON test_rum_vacuum_backward USING rum (body rum_tsvector_addon_ops, id) WITH (attach='id', to='body', order_by_attach='true'); +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery; +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery AND id <= 9999; +-- Check the backward scan before vacuum. +-- A scan with the results extracted from the posting list (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''ann'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + id | body | distance +----+------+---------- +(0 rows) + +-- A scan with the results extracted from the posting tree (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | Infinity +(1 row) + +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. +VACUUM test_rum_vacuum_backward; +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''ann'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + id | body | distance +----+------+---------- +(0 rows) + +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + id | body | distance +-------+----------+---------- + 10000 | 'john':1 | Infinity +(1 row) + +-- Delete the last item from the posting tree and make the same check. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum_backward; +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + QUERY PLAN +----------------------------------------------------------------------------------- + Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) +(3 rows) + +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + id | body | distance +----+------+---------- +(0 rows) + +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Aggregate + -> Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''ann'''::tsquery) + Order By: (id <=| 2) +(4 rows) + +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); + count +------- + 5 +(1 row) + +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Aggregate + -> Index Scan using test_rum_vacuum_backward_body_id_idx on test_rum_vacuum_backward + Index Cond: (body @@ '''john'''::tsquery) + Order By: (id <=| 2) +(4 rows) + +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); + count +------- + 9995 +(1 row) + +DROP TABLE test_rum_vacuum_backward; diff --git a/sql/rum_vacuum.sql b/sql/rum_vacuum.sql new file mode 100644 index 0000000000..2953f876cf --- /dev/null +++ b/sql/rum_vacuum.sql @@ -0,0 +1,154 @@ +-- The test verifies that the scan in different directions in the RUM index +-- correctly handles empty posting lists and empty pages of the posting tree +-- that have been vacuumed. + +SET enable_seqscan = off; +SET enable_indexscan = off; +SET enable_bitmapscan = on; + +-- Prepare the data. Creating a posting list for ann and a posting tree for john. +CREATE TABLE test_rum_vacuum (id int, body tsvector); +ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false); +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; +CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops); + +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery; +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999; + +-- Check the scan before vacuum. + +-- Scan with the extraction of results from the posting list (RumFastScan mode). +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + +-- Scan with the extraction of results from the posting tree (RumFastScan mode). +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + +-- Full-pass index scan (RumFullScan mode). +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. +VACUUM test_rum_vacuum; + +-- Scan of an empty posting list. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + +-- After the vacuum, the left and right leaf pages will remain in the posting +-- tree (because the outermost pages are not deleted). The left page is empty, +-- and the right page has one element. Check that the scan skips an left empty +-- page and receives an item from the right non-empty page. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + +-- Check that in RumFullScan mode, the scan correctly switches from an empty +-- posting list to a non-empty posting tree and receives 1 element. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + +-- Delete the last item from the posting tree. +DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum; + +-- Check that the scan correctly skips all empty posting tree pages, reaches +-- the far right and ends. +EXPLAIN (costs off) +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + +-- Check that in RumFullScan mode, the scan correctly skips an empty posting +-- list and an empty posting tree. +EXPLAIN (costs off) +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; +SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY distance; + +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; + +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('ann'); + +EXPLAIN (costs off) +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); +SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john'); + +DROP TABLE test_rum_vacuum; + +-- Check the backward scan direction. + +SET enable_bitmapscan = off; +SET enable_indexscan = on; + +-- Prepare the data. Creating a posting list for ann and a posting tree for john. +CREATE TABLE test_rum_vacuum_backward (id int, body tsvector); +ALTER TABLE test_rum_vacuum_backward SET (autovacuum_enabled = false); +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(1, 5) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(6, 10000) i; +CREATE INDEX ON test_rum_vacuum_backward USING rum (body rum_tsvector_addon_ops, id) WITH (attach='id', to='body', order_by_attach='true'); + +-- Delete all the items from the posting list and all but one item from the +-- posting tree. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery; +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery AND id <= 9999; + +-- Check the backward scan before vacuum. + +-- A scan with the results extracted from the posting list (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + +-- A scan with the results extracted from the posting tree (in the backward direction). +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + +-- Remove the elements from the posting tree and the posting list and make the +-- same checks. +VACUUM test_rum_vacuum_backward; + +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY distance; + +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + +-- Delete the last item from the posting tree and make the same check. +DELETE FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery; +VACUUM test_rum_vacuum_backward; + +EXPLAIN (costs off) +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; +SELECT *, id <=| 2 AS distance FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY distance; + +-- Check that after the reinsertion, the new data can be found in the posting +-- list and posting tree. +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('ann') FROM generate_series(10001, 10005) i; +INSERT INTO test_rum_vacuum_backward SELECT i, to_tsvector('john') FROM generate_series(10006, 20000) i; + +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'ann'::tsquery ORDER BY id <=| 2); + +EXPLAIN (costs off) +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); +SELECT count(*) FROM (SELECT * FROM test_rum_vacuum_backward WHERE body @@ 'john'::tsquery ORDER BY id <=| 2); + +DROP TABLE test_rum_vacuum_backward; diff --git a/src/rumget.c b/src/rumget.c index a36229f59f..7621a7f9cd 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -684,9 +684,23 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) } LockBuffer(entry->buffer, RUM_UNLOCK); - entry->isFinished = setListPositionScanEntry(rumstate, entry); - if (!entry->isFinished) - entry->curItem = entry->list[entry->offset]; + + /* + * If the current page is empty (nlist == 0), we cannot assume the + * scan is complete, as subsequent pages may exist. Therefore, we + * set isFinished = false and leave entry->nlist = 0 and + * entry->offset = 0 to ensure that entryGetItem advances to the + * next page on the next call. Otherwise, initialize curItem to + * the first valid item. + */ + if (entry->nlist == 0) + entry->isFinished = false; + else + { + entry->isFinished = setListPositionScanEntry(rumstate, entry); + if (!entry->isFinished) + entry->curItem = entry->list[entry->offset]; + } } else if (RumGetNPosting(itup) > 0) { @@ -699,6 +713,16 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) if (!entry->isFinished) entry->curItem = entry->list[entry->offset]; } + /* + * Else, the posting list for this IndexTuple has been entirely vacuumed + * away. We cannot assume that the scan is finished, as subsequent + * IndexTuples or pages may still contain valid results. Therefore, we + * set isFinished = false and keep entry->nlist = 0 and entry->offset = 0 + * to ensure that entryGetItem advances to the next page or IndexTuple + * on the next call. + */ + else + entry->isFinished = false; if (entry->queryCategory == RUM_CAT_EMPTY_QUERY && entry->scanWithAddInfo) @@ -1011,6 +1035,13 @@ entryGetNextItem(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) LockBuffer(entry->buffer, RUM_UNLOCK); + /* + * No valid item if VACUUM removed all items concurrently. Go on + * next page. + */ + if (entry->nlist == 0) + break; + if (entry->offset < 0) { if (ScanDirectionIsForward(entry->scanDirection) && @@ -1044,6 +1075,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) RumItemSetMin(&entry->curItem); entry->offset = InvalidOffsetNumber; entry->list = NULL; + entry->nlist = 0; if (entry->gdi) { freeRumBtreeStack(entry->gdi->stack); @@ -1151,6 +1183,18 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) LockBuffer(entry->buffer, RUM_UNLOCK); entry->isFinished = false; + + /* + * Posting tree's first leaf page is empty due to concurrent VACUUM. + * Advance through empty pages until we find one with items or exhaust + * the tree. entryGetItem does not re-invoke entryGetNextItem after we + * return, so we must do it here to ensure curItem is valid on return. + */ + if (entry->nlist == 0) + { + entryGetNextItem(rumstate, entry, snapshot); + goto entry_done; + } } else if (RumGetNPosting(itup) > 0) { @@ -1161,12 +1205,21 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); entry->isFinished = setListPositionScanEntry(rumstate, entry); } + /* Posting list has been vacuumed. Go to the next entry. */ + else + { + ItemPointerSetInvalid(&entry->curItem.iptr); + entry->isFinished = true; + goto entry_done; + } Assert(entry->nlist > 0 && entry->list); entry->curItem = entry->list[entry->offset]; entry->offset += entry->scanDirection; +entry_done: + SCAN_ENTRY_GET_KEY(entry, rumstate, itup); /* @@ -1340,8 +1393,24 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps } else if (entry->stack) { - entry->offset++; - if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList) + /* + * We are responsible for ensuring that we keep advancing through + * ItemLists until we find one that contains at least one valid + * item. This is necessary because concurrent VACUUM may have + * removed all items from a page, leaving an empty ItemList. In + * such cases, we must continue to the next ItemList. + */ + bool success; + + Assert(!entry->isFinished); + + do + { + entry->isFinished = false; + success = entryGetNextItemList(rumstate, entry, snapshot); + } while (success && entry->nlist == 0); + + if (success && nextEntryList) *nextEntryList = true; } else @@ -1361,8 +1430,22 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps dropItem(entry)); if (entry->stack && entry->isFinished) { - entry->isFinished = false; - if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList) + /* + * We are responsible for ensuring that we keep advancing through + * ItemLists until we find one that contains at least one valid + * item. This is necessary because concurrent VACUUM may have + * removed all items from a page, leaving an empty ItemList. In + * such cases, we must continue to the next ItemList. + */ + bool success; + + do + { + entry->isFinished = false; + success = entryGetNextItemList(rumstate, entry, snapshot); + } while (success && entry->nlist == 0); + + if (success && nextEntryList) *nextEntryList = true; } } @@ -1809,6 +1892,15 @@ scanPage(RumState * rumstate, RumScanEntry entry, RumItem *item, bool equalOk) Page page = BufferGetPage(entry->buffer); RumItemPointerSetMin(&iter_item.iptr); + ptr = RumDataPageGetData(page); + maxoff = RumPageGetOpaque(page)->maxoff; + + /* + * If there are no items on the page (it was vacuumed), immediately return + * false. + */ + if (maxoff < FirstOffsetNumber) + return false; if (ScanDirectionIsForward(entry->scanDirection) && !RumPageRightMost(page)) { @@ -1818,9 +1910,6 @@ scanPage(RumState * rumstate, RumScanEntry entry, RumItem *item, bool equalOk) return false; } - ptr = RumDataPageGetData(page); - maxoff = RumPageGetOpaque(page)->maxoff; - for (j = 0; j < RumDataLeafIndexCount; j++) { RumDataLeafItemIndex *index = &RumPageGetIndexes(page)[j];