diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index e83b178e4dc7..903517c10774 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -587,16 +587,12 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, // key as null, too. // for (int column : outerSmallTableKeyColumnMap) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; + batch.cols[column].clearValue(batchIndex); } // Small table values are set to null. for (int column : smallTableValueColumnMap) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; + batch.cols[column].clearValue(batchIndex); } } } @@ -746,15 +742,13 @@ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hi // for (int column : outerSmallTableKeyColumnMap) { ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[0] = true; + colVector.clearValue(0); colVector.isRepeating = true; } for (int column : smallTableValueColumnMap) { ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[0] = true; + colVector.clearValue(0); colVector.isRepeating = true; } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java new file mode 100644 index 000000000000..7a7b035a3c50 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VoidColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests that {@link VectorMapJoinOuterGenerateResultOperator} invokes + * {@code clearValue} on every small-table key and value column for each + * unmatched big-table row. The HIVE-29598 bug is that without that call, + * a stale {@code vector[i]} survives the null marking and leaks into + * downstream operators that read the slot without checking {@code isNull[i]}. + */ +class TestVectorMapJoinOuterGenerateResultOperator { + + /** + * A concrete subclass of the abstract operator with no-op stubs for the + * abstract methods, just enough to invoke {@code generateOuterNulls} and + * {@code generateOuterNullsRepeatedAll} directly from tests. + */ + private static class TestableOuterOp extends VectorMapJoinOuterGenerateResultOperator { + @Override + protected String getLoggingPrefix() { + return "test"; + } + + @Override + public void processBatch(VectorizedRowBatch batch) { + // No-op: tests invoke the generateOuterNulls* methods directly. + } + } + + /** + * LongColumnVector that records every {@code clearSlotValue} invocation. + * Used to assert that the operator dispatched through the new + * {@link org.apache.hadoop.hive.ql.exec.vector.ColumnVector#clearValue(int)} + * contract, rather than just observing the slot-clearing side effect + * (which could also be produced by another mechanism). + */ + private static class TrackingLongColumnVector extends LongColumnVector { + final List clearedIndices = new ArrayList<>(); + + TrackingLongColumnVector(int size) { + super(size); + } + + @Override + protected void clearSlotValue(int elementNum) { + super.clearSlotValue(elementNum); + clearedIndices.add(elementNum); + } + } + + @Test + void generateOuterNullsCallsClearValueOnEachMappedColumnForEachUnmatchedRow() throws HiveException, IOException { + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {0}; + op.smallTableValueColumnMap = new int[] {1, 2}; + + VectorizedRowBatch batch = new VectorizedRowBatch(3, 4); + TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4); + TrackingLongColumnVector valCol1 = new TrackingLongColumnVector(4); + TrackingLongColumnVector valCol2 = new TrackingLongColumnVector(4); + keyCol.vector[1] = 99L; // stale values that should be cleared + valCol1.vector[1] = 88L; + valCol2.vector[3] = 77L; + batch.cols[0] = keyCol; + batch.cols[1] = valCol1; + batch.cols[2] = valCol2; + + int[] noMatchs = new int[] {1, 3}; + op.generateOuterNulls(batch, noMatchs, noMatchs.length); + + // Each tracked column had clearSlotValue invoked at indices 1 and 3. + assertEquals(Arrays.asList(1, 3), keyCol.clearedIndices); + assertEquals(Arrays.asList(1, 3), valCol1.clearedIndices); + assertEquals(Arrays.asList(1, 3), valCol2.clearedIndices); + + // Bookkeeping side effect of clearValue (final base-class method). + assertFalse(keyCol.noNulls); + assertTrue(keyCol.isNull[1]); + assertTrue(keyCol.isNull[3]); + assertFalse(keyCol.isNull[0]); + assertFalse(keyCol.isNull[2]); + + // Stale slot values cleared to 0L. + assertEquals(0L, keyCol.vector[1]); + assertEquals(0L, valCol1.vector[1]); + assertEquals(0L, valCol2.vector[3]); + } + + @Test + void generateOuterNullsRepeatedAllCallsClearValueAtIndexZeroForEachMappedColumn() throws HiveException, IOException { + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {0}; + op.smallTableValueColumnMap = new int[] {1}; + + VectorizedRowBatch batch = new VectorizedRowBatch(2, 4); + TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4); + TrackingLongColumnVector valCol = new TrackingLongColumnVector(4); + keyCol.vector[0] = 42L; + valCol.vector[0] = 84L; + batch.cols[0] = keyCol; + batch.cols[1] = valCol; + + op.generateOuterNullsRepeatedAll(batch); + + // Each tracked column had clearSlotValue invoked exactly once at index 0. + assertEquals(Arrays.asList(0), keyCol.clearedIndices); + assertEquals(Arrays.asList(0), valCol.clearedIndices); + + // Bookkeeping plus isRepeating set by the operator after clearValue. + assertFalse(keyCol.noNulls); + assertTrue(keyCol.isNull[0]); + assertTrue(keyCol.isRepeating); + assertFalse(valCol.noNulls); + assertTrue(valCol.isNull[0]); + assertTrue(valCol.isRepeating); + + // Stale slot values cleared to 0L. + assertEquals(0L, keyCol.vector[0]); + assertEquals(0L, valCol.vector[0]); + } + + @Test + void generateOuterNullsSetsBookkeepingOnTypeWithNoClearSlotValueOverride() throws HiveException, IOException { + // VoidColumnVector inherits the base ColumnVector.clearSlotValue() no-op + // — no per-slot value to zero. This verifies the operator still drives + // the bookkeeping (isNull[i], noNulls) through the final clearValue() + // contract on a type that doesn't override the hook. + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {}; + op.smallTableValueColumnMap = new int[] {0}; + + VectorizedRowBatch batch = new VectorizedRowBatch(1, 4); + VoidColumnVector voidCol = new VoidColumnVector(4); + batch.cols[0] = voidCol; + + int[] noMatchs = new int[] {1, 3}; + op.generateOuterNulls(batch, noMatchs, noMatchs.length); + + assertFalse(voidCol.noNulls); + assertTrue(voidCol.isNull[1]); + assertTrue(voidCol.isNull[3]); + assertFalse(voidCol.isNull[0]); + assertFalse(voidCol.isNull[2]); + } + + /** + * Verifies that for every {@link ColumnVector} subclass whose + * {@code clearSlotValue} the PR overrides, the operator's call through + * {@code clearValue} ultimately reaches that override and zeroes the slot + * to the type's cleared state. Complements + * {@link #generateOuterNullsCallsClearValueOnEachMappedColumnForEachUnmatchedRow} + * which proves the dispatch chain on Long alone. + */ + @ParameterizedTest(name = "{0}") + @MethodSource("modifiedColumnVectorTypes") + void generateOuterNullsClearsSlotForEachModifiedType( + String typeName, + ColumnVector cv, + Runnable preLoad, + Runnable assertSlotCleared) throws HiveException, IOException { + + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {}; + op.smallTableValueColumnMap = new int[] {0}; + + VectorizedRowBatch batch = new VectorizedRowBatch(1, 4); + preLoad.run(); + batch.cols[0] = cv; + + int[] noMatchs = new int[] {2}; + op.generateOuterNulls(batch, noMatchs, noMatchs.length); + + assertTrue(cv.isNull[2]); + assertFalse(cv.noNulls); + assertSlotCleared.run(); + } + + static Stream modifiedColumnVectorTypes() { + final LongColumnVector longCv = new LongColumnVector(4); + final DoubleColumnVector doubleCv = new DoubleColumnVector(4); + final BytesColumnVector bytesCv = new BytesColumnVector(4); + final DecimalColumnVector decCv = new DecimalColumnVector(4, 18, 4); + final TimestampColumnVector tsCv = new TimestampColumnVector(4); + final IntervalDayTimeColumnVector ivCv = new IntervalDayTimeColumnVector(4); + + return Stream.of( + Arguments.of( + "LongColumnVector", + longCv, + (Runnable) () -> longCv.vector[2] = 999L, + (Runnable) () -> assertEquals(0L, longCv.vector[2])), + Arguments.of( + "DoubleColumnVector", + doubleCv, + (Runnable) () -> doubleCv.vector[2] = 3.14, + (Runnable) () -> assertEquals(0.0, doubleCv.vector[2])), + Arguments.of( + "BytesColumnVector", + bytesCv, + (Runnable) () -> { + bytesCv.vector[2] = "stale".getBytes(StandardCharsets.UTF_8); + bytesCv.start[2] = 1; + bytesCv.length[2] = 3; + }, + (Runnable) () -> { + assertNull(bytesCv.vector[2]); + assertEquals(0, bytesCv.start[2]); + assertEquals(0, bytesCv.length[2]); + }), + Arguments.of( + "DecimalColumnVector", + decCv, + (Runnable) () -> decCv.vector[2].setFromLong(999L), + (Runnable) () -> assertEquals(0L, decCv.vector[2].serialize64(decCv.scale))), + Arguments.of( + "TimestampColumnVector", + tsCv, + (Runnable) () -> { + tsCv.time[2] = 1234567890000L; + tsCv.nanos[2] = 999; + }, + (Runnable) () -> { + // setNullValue convention: time = 0, nanos = 1 + assertEquals(0L, tsCv.time[2]); + assertEquals(1, tsCv.nanos[2]); + }), + Arguments.of( + "IntervalDayTimeColumnVector", + ivCv, + (Runnable) () -> ivCv.set(2, new HiveIntervalDayTime(5, 0)), + (Runnable) () -> { + // setNullValue convention: totalSeconds = 0, nanos = 1 + assertEquals(0L, ivCv.getTotalSeconds(2)); + assertEquals(1, ivCv.getNanos(2)); + }) + ); + } +} diff --git a/ql/src/test/queries/clientpositive/vector_outer_join7.q b/ql/src/test/queries/clientpositive/vector_outer_join7.q new file mode 100644 index 000000000000..041601265441 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_outer_join7.q @@ -0,0 +1,61 @@ +set hive.explain.user=false; +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.fetch.task.conversion=none; + +-- SORT_QUERY_RESULTS + +-- Regression test for HIVE-29598: vectorized outer-join MapJoin can leave a +-- stale typed value in a scratch slot that the vectorizer has aliased to a +-- smallTableValueMapping target. For unmatched rows, generateOuterNulls() +-- flips isNull[i] = true but does not clear vector[i], so a downstream +-- ColOrCol -> IfExprLongScalarLongScalar chain that reads vector[i] without +-- consulting isNull[i] propagates the stale value into the result. +-- +-- Repro shape: a LEFT OUTER MapJoin whose ON predicate uses a CAST scratch +-- column that is then reused as the small-table boolean-value column; the +-- projection computes CAST((s.s_bool OR p.p_bool) AS INT) over the null-padded +-- rows. The MAX() aggregate barrier prevents Calcite from inlining and +-- simplifying the bug surface away. +-- +-- Expected: zero rows. Every probe row's (s_bool OR p_bool) is TRUE per SQL +-- three-valued logic (matched: FALSE OR TRUE; unmatched: NULL OR TRUE), so +-- CAST(... AS INT) is always 1 and WHERE observed_value = 0 matches nothing. +-- Without the fix: 'C 3 0 1' and 'D 3 0 1' leak through, since the stale int +-- from CastStringToLong ORed with 1 fails the strict == 1 check in +-- IfExprLongScalarLongScalar and stores 0. + +CREATE TABLE t (k STRING, v STRING) STORED AS ORC; + +INSERT INTO t VALUES + ('A','1'),('A','2'),('A','3'), + ('B','2'),('B','3'), + ('C','3'), + ('D','1'),('D','3'); + +WITH + probe AS ( + SELECT k, v, (CAST(v AS INT) > 0) AS p_bool + FROM t WHERE CAST(v AS INT) >= 3 + ), + small_side AS ( + SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool + FROM t + ), + classified AS ( + SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value + FROM probe p + LEFT JOIN small_side s + ON p.k = s.k + AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT) + ), + diagnosed AS ( + SELECT k, v, MAX(observed_value) AS observed_value + FROM classified + GROUP BY k, v + ) +SELECT k, v, + observed_value AS observed_value_returned_by_select, + 1 AS required_value_per_sql_semantics +FROM diagnosed +WHERE observed_value = 0; diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out new file mode 100644 index 000000000000..df755cfe4732 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: INSERT INTO t VALUES + ('A','1'),('A','2'),('A','3'), + ('B','2'),('B','3'), + ('C','3'), + ('D','1'),('D','3') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: INSERT INTO t VALUES + ('A','1'),('A','2'),('A','3'), + ('B','2'),('B','3'), + ('C','3'), + ('D','1'),('D','3') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.k SCRIPT [] +POSTHOOK: Lineage: t.v SCRIPT [] +PREHOOK: query: WITH + probe AS ( + SELECT k, v, (CAST(v AS INT) > 0) AS p_bool + FROM t WHERE CAST(v AS INT) >= 3 + ), + small_side AS ( + SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool + FROM t + ), + classified AS ( + SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value + FROM probe p + LEFT JOIN small_side s + ON p.k = s.k + AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT) + ), + diagnosed AS ( + SELECT k, v, MAX(observed_value) AS observed_value + FROM classified + GROUP BY k, v + ) +SELECT k, v, + observed_value AS observed_value_returned_by_select, + 1 AS required_value_per_sql_semantics +FROM diagnosed +WHERE observed_value = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: WITH + probe AS ( + SELECT k, v, (CAST(v AS INT) > 0) AS p_bool + FROM t WHERE CAST(v AS INT) >= 3 + ), + small_side AS ( + SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool + FROM t + ), + classified AS ( + SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value + FROM probe p + LEFT JOIN small_side s + ON p.k = s.k + AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT) + ), + diagnosed AS ( + SELECT k, v, MAX(observed_value) AS observed_value + FROM classified + GROUP BY k, v + ) +SELECT k, v, + observed_value AS observed_value_returned_by_select, + 1 AS required_value_per_sql_semantics +FROM diagnosed +WHERE observed_value = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index ec98d2ab5b8c..cce0cb9ad949 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -504,6 +504,13 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum] = null; + start[elementNum] = 0; + length[elementNum] = 0; + } + @Override public void init() { initBuffer(0); diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 9f611dfd313b..ee5e3f3885ee 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -231,6 +231,32 @@ public abstract void setElement(int outputElementNum, int inputElementNum, public abstract void copySelected( boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector); + /** + * Mark the slot null and put its underlying value into a defined cleared + * state. Sets {@code isNull[elementNum] = true} and {@code noNulls = false}, + * then dispatches to {@link #clearSlotValue(int)} for per-type clearing. + * + *

Defends against consumers that read {@code vector[i]} without first + * checking {@code isNull[i]}. Distinct from per-type {@code NULL_VALUE} + * sentinels (e.g. {@link LongColumnVector#NULL_VALUE}), which assume the + * isNull[]-first contract. Final by design — subclasses customize behavior + * by overriding {@link #clearSlotValue(int)}, never this method. + */ + public final void clearValue(int elementNum) { + noNulls = false; + isNull[elementNum] = true; + clearSlotValue(elementNum); + } + + /** + * Per-type slot-clearing hook invoked by {@link #clearValue(int)}. + * Subclasses override to zero out their value array at {@code elementNum}. + * Container and void types inherit the no-op default. + */ + protected void clearSlotValue(int elementNum) { + // Default no-op. + } + /** * Initialize the column vector. This method can be overridden by specific column vector types. * Use this method only if the individual type of the column vector is not known, otherwise its diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 5defd27623b2..e0cdd76de156 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -144,6 +144,11 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum].setFromLong(0L); + } + @Override public void stringifyValue(StringBuilder buffer, int row) { if (isRepeating) { diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index f833bde03f6e..fcf297585c63 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -220,6 +220,11 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum] = 0.0; + } + @Override public void stringifyValue(StringBuilder buffer, int row) { if (isRepeating) { diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java index 9324bc0c610d..2b61b09e38cb 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -311,6 +311,11 @@ public void setNullValue(int elementNum) { nanos[elementNum] = 1; } + @Override + protected void clearSlotValue(int elementNum) { + setNullValue(elementNum); + } + // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. @Override diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index bf423674b2aa..dc727b462a74 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -294,6 +294,11 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum] = 0L; + } + @Override public void stringifyValue(StringBuilder buffer, int row) { if (isRepeating) { diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index f97156c40381..c49149cdb281 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -378,6 +378,11 @@ public void setNullValue(int elementNum) { nanos[elementNum] = 1; } + @Override + protected void clearSlotValue(int elementNum) { + setNullValue(elementNum); + } + // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. @Override diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java index be4ff70935a7..2de07b4223e6 100644 --- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java @@ -25,7 +25,9 @@ import org.junit.Test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; @@ -220,4 +222,21 @@ private static byte[] writeToBytesColumnVector(int rowIdx, BytesColumnVector col col.setValPreallocated(rowIdx, writeSize); return bytes; } + + @Test + public void testClearValue() { + BytesColumnVector cv = new BytesColumnVector(4); + byte[] data = "hello".getBytes(StandardCharsets.UTF_8); + cv.vector[0] = data; + cv.start[0] = 1; + cv.length[0] = 3; + + cv.clearValue(0); + + assertTrue(cv.isNull[0]); + assertFalse(cv.noNulls); + assertNull(cv.vector[0]); + assertEquals(0, cv.start[0]); + assertEquals(0, cv.length[0]); + } } diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java new file mode 100644 index 000000000000..2644ff8f1bcb --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestDecimalColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + DecimalColumnVector cv = new DecimalColumnVector(4, 18, 4); + cv.vector[1].setFromLong(12345L); + cv.vector[2].setFromLong(67890L); + + cv.clearValue(1); + + assertTrue(cv.isNull[1]); + assertFalse(cv.noNulls); + assertEquals(0L, cv.vector[1].serialize64(cv.scale)); + // Neighbour slot untouched: still represents 67890. + assertEquals(67890L, cv.vector[2].serialize64((short) 0)); + assertFalse(cv.isNull[2]); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java new file mode 100644 index 000000000000..a67ff94ef327 --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestDoubleColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + DoubleColumnVector cv = new DoubleColumnVector(4); + cv.vector[1] = 3.14; + cv.vector[0] = 1.5; + cv.vector[3] = -2.5; + + cv.clearValue(1); + + assertTrue(cv.isNull[1]); + assertFalse(cv.noNulls); + assertEquals(0.0, cv.vector[1]); + assertEquals(1.5, cv.vector[0]); + assertEquals(-2.5, cv.vector[3]); + assertFalse(cv.isNull[0]); + assertFalse(cv.isNull[3]); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java new file mode 100644 index 000000000000..d715508e1484 --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestIntervalDayTimeColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + IntervalDayTimeColumnVector cv = new IntervalDayTimeColumnVector(4); + cv.set(3, new HiveIntervalDayTime(5, 0)); + + cv.clearValue(3); + + assertTrue(cv.isNull[3]); + assertFalse(cv.noNulls); + // setNullValue convention: totalSeconds = 0, nanos = 1 + assertEquals(0L, cv.getTotalSeconds(3)); + assertEquals(1, cv.getNanos(3)); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java new file mode 100644 index 000000000000..c1c8acc25e97 --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestLongColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + LongColumnVector cv = new LongColumnVector(4); + cv.vector[2] = 2025L; + cv.vector[1] = 7L; + cv.vector[3] = 9L; + + cv.clearValue(2); + + assertTrue(cv.isNull[2]); + assertFalse(cv.noNulls); + assertEquals(0L, cv.vector[2]); + assertEquals(7L, cv.vector[1]); + assertEquals(9L, cv.vector[3]); + assertFalse(cv.isNull[1]); + assertFalse(cv.isNull[3]); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java index 2d85b115d244..dda52797246a 100644 --- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.sql.Timestamp; @@ -246,4 +248,18 @@ private Thread startVectorManipulationThread(final int vectorLength, final long return thread; } + @Test + public void testClearValue() { + TimestampColumnVector cv = new TimestampColumnVector(4); + cv.time[2] = 1234567890000L; + cv.nanos[2] = 999; + + cv.clearValue(2); + + assertTrue(cv.isNull[2]); + assertFalse(cv.noNulls); + // setNullValue convention: time = 0, nanos = 1 + assertEquals(0L, cv.time[2]); + assertEquals(1, cv.nanos[2]); + } }