From 98b300be3a82fe47d33c8b8555b48e1519fc2c16 Mon Sep 17 00:00:00 2001 From: Tanuj Khurana Date: Wed, 21 Jan 2026 12:03:56 -0800 Subject: [PATCH 1/3] PHOENIX-7748 Empty column cell is not returned when scan has both EmptyColumnOnlyFilter and DistinctPrefixFilter --- .../phoenix/filter/EmptyColumnOnlyFilter.java | 15 +++++ .../apache/phoenix/end2end/EmptyColumnIT.java | 57 +++++++++++++++++++ .../end2end/index/GlobalIndexCheckerIT.java | 41 +++++++++++++ 3 files changed, 113 insertions(+) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java index 03e0168c3b9..924e0f80256 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java @@ -39,6 +39,7 @@ public class EmptyColumnOnlyFilter extends FilterBase implements Writable { private byte[] emptyCQ; private boolean found = false; private boolean first = true; + private Cell emptyColumnCell = null; public EmptyColumnOnlyFilter() { } @@ -54,6 +55,7 @@ public EmptyColumnOnlyFilter(byte[] emptyCF, byte[] emptyCQ) { public void reset() throws IOException { found = false; first = true; + emptyColumnCell = null; } // No @Override for HBase 3 compatibility @@ -68,6 +70,7 @@ public ReturnCode filterCell(final Cell cell) throws IOException { } if (ScanUtil.isEmptyColumn(cell, emptyCF, emptyCQ)) { found = true; + emptyColumnCell = cell; return ReturnCode.INCLUDE; } if (first) { @@ -80,7 +83,19 @@ public ReturnCode filterCell(final Cell cell) throws IOException { @Override public void filterRowCells(List kvs) throws IOException { if (kvs.size() > 1) { + // remove the first cell and only return the empty column cell kvs.remove(0); + } else if (kvs.size() == 1) { + // we only have 1 cell, check if it is the empty column cell or not + // since the empty column cell could have been excluded by another filter like the + // DistinctPrefixFilter. + Cell cell = kvs.get(0); + if (found && !ScanUtil.isEmptyColumn(cell, emptyCF, emptyCQ)) { + // we found the empty cell, but it was not included so replace the existing cell + // with the empty column cell + kvs.remove(0); + kvs.add(emptyColumnCell); + } } } diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/EmptyColumnIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/EmptyColumnIT.java index 328028ecb2c..5e0033bb4f9 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/EmptyColumnIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/EmptyColumnIT.java @@ -31,10 +31,14 @@ import static org.apache.phoenix.query.PhoenixTestBuilder.SchemaBuilder.TenantViewOptions; import static org.apache.phoenix.util.PhoenixRuntime.TENANT_ID_ATTRIB; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; import java.util.List; import java.util.Random; @@ -46,13 +50,17 @@ import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.phoenix.jdbc.PhoenixResultSet; import org.apache.phoenix.query.PhoenixTestBuilder.BasicDataWriter; import org.apache.phoenix.query.PhoenixTestBuilder.DataSupplier; import org.apache.phoenix.query.PhoenixTestBuilder.DataWriter; import org.apache.phoenix.query.PhoenixTestBuilder.SchemaBuilder; import org.apache.phoenix.query.QueryConstants; import org.apache.phoenix.schema.PTable; +import org.apache.phoenix.util.EnvironmentEdgeManager; import org.apache.phoenix.util.IndexUtil; +import org.apache.phoenix.util.ManualEnvironmentEdge; +import org.apache.phoenix.util.QueryUtil; import org.apache.phoenix.util.SchemaUtil; import org.apache.phoenix.util.TestUtil; import org.junit.Ignore; @@ -703,6 +711,55 @@ public List getValues(int rowIndex) { } } + /** + * Test that the empty column cell is returned by the scan when there is a DistinctPrefixFilter + * and an EmptyColumnOnlyFilter. If there is no empty column cell returned in the scan then TTL + * masking logic can break. + */ + @Test + public void testMaskingWithDistinctPrefixFilter() throws Exception { + try (Connection conn = DriverManager.getConnection(getUrl())) { + int ttl = 10; + String dataTableName = generateUniqueName(); + // not using column encoding so that we use EmptyColumnOnlyFilter + String ddl = "create table " + dataTableName + " (id1 varchar(10) not null , " + + "id2 varchar(10) not null , val1 varchar(10), val2 varchar(10) " + + "constraint PK primary key (id1, id2)) COLUMN_ENCODED_BYTES=0, TTL=" + ttl; + conn.createStatement().execute(ddl); + + String[] expectedValues = { "val1_1", "val1_2", "val1_3", "val1_4" }; + String dml = "UPSERT INTO " + dataTableName + " VALUES(?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(dml); + for (int id1 = 0; id1 < 5; ++id1) { + ps.setString(1, "id1_" + id1); + for (int id2 = 0; id2 < 5; ++id2) { + ps.setString(2, "id2_" + id2); + ps.setString(3, "val1_" + id1 % 2); + ps.setString(4, "val2_" + id2 % 2); + ps.executeUpdate(); + } + } + conn.commit(); + try { + ManualEnvironmentEdge injectEdge = new ManualEnvironmentEdge(); + // expire the rows + injectEdge.setValue(EnvironmentEdgeManager.currentTimeMillis() + ttl * 1000 + 2); + EnvironmentEdgeManager.injectEdge(injectEdge); + String distinctQuery = "SELECT DISTINCT id1 FROM " + dataTableName; + try (ResultSet rs = conn.createStatement().executeQuery(distinctQuery)) { + PhoenixResultSet prs = rs.unwrap(PhoenixResultSet.class); + String explainPlan = QueryUtil.getExplainPlan(prs.getUnderlyingIterator()); + assertTrue(explainPlan.contains("SERVER FILTER BY EMPTY COLUMN ONLY")); + assertTrue(explainPlan.contains("SERVER DISTINCT PREFIX FILTER OVER")); + // all the rows should have been masked + assertFalse(rs.next()); + } + } finally { + EnvironmentEdgeManager.reset(); + } + } + } + private void upsertDataAndRunValidations(int numRowsToUpsert, ExpectedTestResults expectedTestResults, DataWriter dataWriter, SchemaBuilder schemaBuilder, List overriddenColumnsPositions) throws Exception { diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java index ee9acb4aefb..0abe583a18a 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/GlobalIndexCheckerIT.java @@ -42,6 +42,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; +import java.util.Arrays; import java.util.Calendar; import java.util.Collection; import java.util.List; @@ -64,6 +65,7 @@ import org.apache.phoenix.util.ReadOnlyProps; import org.apache.phoenix.util.TestUtil; import org.junit.After; +import org.junit.Assume; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -1661,6 +1663,45 @@ public void testOnDuplicateKeyWithIndex() throws Exception { } } + @Test + public void testWithDistinctPrefixFilter() throws Exception { + Assume.assumeTrue(async == false); + try (Connection conn = DriverManager.getConnection(getUrl())) { + String dataTableName = generateUniqueName(); + String indexTableName = generateUniqueName(); + String ddl = "create table " + dataTableName + " (id varchar(10) not null primary key, " + + "val1 varchar(10), val2 varchar(10), val3 varchar(10))" + tableDDLOptions; + conn.createStatement().execute(ddl); + ddl = "create index " + indexTableName + " on " + dataTableName + + " (val1) include (val2, val3)" + this.indexDDLOptions; + conn.createStatement().execute(ddl); + String[] expectedValues = { "val1_1", "val1_2", "val1_3", "val1_4" }; + int rowCount = 20; + String dml = "UPSERT INTO " + dataTableName + " VALUES(?, ?, ?, ?)"; + PreparedStatement ps = conn.prepareStatement(dml); + for (int id = 0; id < rowCount; ++id) { + ps.setString(1, "id_" + id); + ps.setString(2, expectedValues[id % expectedValues.length]); + ps.setString(3, "val2_" + id % 2); + ps.setString(4, "val3"); + ps.executeUpdate(); + } + conn.commit(); + String distinctQuery = "SELECT DISTINCT val1 FROM " + dataTableName; + try (ResultSet rs = conn.createStatement().executeQuery(distinctQuery)) { + PhoenixResultSet prs = rs.unwrap(PhoenixResultSet.class); + String explainPlan = QueryUtil.getExplainPlan(prs.getUnderlyingIterator()); + assertTrue(explainPlan.contains(indexTableName)); + assertTrue(explainPlan.contains("SERVER DISTINCT PREFIX FILTER OVER")); + List actualValues = Lists.newArrayList(); + while (rs.next()) { + actualValues.add(rs.getString(1)); + } + assertEquals(Arrays.asList(expectedValues), actualValues); + } + } + } + public static void commitWithException(Connection conn) { try { conn.commit(); From a4f30f0275dab0d88ccbc70b2cc8e9f94507b148 Mon Sep 17 00:00:00 2001 From: Tanuj Khurana Date: Mon, 26 Jan 2026 14:38:12 -0800 Subject: [PATCH 2/3] Address review comments --- .../java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java index 924e0f80256..93bd0aaad7d 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java @@ -82,7 +82,9 @@ public ReturnCode filterCell(final Cell cell) throws IOException { @Override public void filterRowCells(List kvs) throws IOException { - if (kvs.size() > 1) { + if (kvs.size() > 2) { + throw new IOException("EmptyColumnOnlyFilter got unexpected cells: " + kvs.size()); + } else if (kvs.size() == 2) { // remove the first cell and only return the empty column cell kvs.remove(0); } else if (kvs.size() == 1) { From a2cd2b566677c4a1d5fca33f8aa2a0f1c3b93a60 Mon Sep 17 00:00:00 2001 From: Tanuj Khurana Date: Tue, 27 Jan 2026 10:21:32 -0800 Subject: [PATCH 3/3] Fix spotless warnings --- .../java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java index 93bd0aaad7d..a5d78112c24 100644 --- a/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java +++ b/phoenix-core-client/src/main/java/org/apache/phoenix/filter/EmptyColumnOnlyFilter.java @@ -83,7 +83,7 @@ public ReturnCode filterCell(final Cell cell) throws IOException { @Override public void filterRowCells(List kvs) throws IOException { if (kvs.size() > 2) { - throw new IOException("EmptyColumnOnlyFilter got unexpected cells: " + kvs.size()); + throw new IOException("EmptyColumnOnlyFilter got unexpected cells: " + kvs.size()); } else if (kvs.size() == 2) { // remove the first cell and only return the empty column cell kvs.remove(0);