diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteArrayHashKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteArrayHashKey.java index 63ffabfa90f6..19d56e7264b3 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteArrayHashKey.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteArrayHashKey.java @@ -44,4 +44,9 @@ private int getAbsolutePos(int pos) { public int length() { return this.length; } + + @Override + public int getIntLE(int pos) { + return LittleEndianBytes.toInt(t, getAbsolutePos(pos)); + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HashKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HashKey.java index 9f12acb0b674..b9eb465a8047 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HashKey.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HashKey.java @@ -35,4 +35,12 @@ public HashKey(T t) { /** Returns The number of bytes in this HashKey */ public abstract int length(); + + /** + * Returns the little-endian 32-bit int value starting at the given position in this + * {@code HashKey}. + * @param pos the starting offset of the 4-byte little-endian int + * @return the 32-bit value decoded in little-endian order + */ + public abstract int getIntLE(int pos); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/JenkinsHash.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/JenkinsHash.java index ffe796985722..74cd053aef7c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/JenkinsHash.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/JenkinsHash.java @@ -104,18 +104,9 @@ public int hash(HashKey hashKey, int initval) { a = b = c = 0xdeadbeef + length + initval; int offset = 0; for (; length > 12; offset += 12, length -= 12) { - a += (hashKey.get(offset) & BYTE_MASK); - a += ((hashKey.get(offset + 1) & BYTE_MASK) << 8); - a += ((hashKey.get(offset + 2) & BYTE_MASK) << 16); - a += ((hashKey.get(offset + 3) & BYTE_MASK) << 24); - b += (hashKey.get(offset + 4) & BYTE_MASK); - b += ((hashKey.get(offset + 5) & BYTE_MASK) << 8); - b += ((hashKey.get(offset + 6) & BYTE_MASK) << 16); - b += ((hashKey.get(offset + 7) & BYTE_MASK) << 24); - c += (hashKey.get(offset + 8) & BYTE_MASK); - c += ((hashKey.get(offset + 9) & BYTE_MASK) << 8); - c += ((hashKey.get(offset + 10) & BYTE_MASK) << 16); - c += ((hashKey.get(offset + 11) & BYTE_MASK) << 24); + a += hashKey.getIntLE(offset); + b += hashKey.getIntLE(offset + 4); + c += hashKey.getIntLE(offset + 8); /* * mix -- mix 3 32-bit values reversibly. This is reversible, so any information in (a,b,c) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/LittleEndianBytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/LittleEndianBytes.java new file mode 100644 index 000000000000..97b685d4a4a5 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/LittleEndianBytes.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.ByteBufferExtendedCell; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Utility methods for reading and writing little-endian integers from byte[] and ByteBuffer. Used + * by hashing components to perform fast, low-level LE conversions with optional Unsafe + * acceleration. + */ +@InterfaceAudience.Private +public final class LittleEndianBytes { + final static boolean UNSAFE_UNALIGNED = HBasePlatformDependent.unaligned(); + + static abstract class Converter { + abstract int toInt(byte[] bytes, int offset); + + abstract int toInt(ByteBuffer buffer, int offset); + + abstract int putInt(byte[] bytes, int offset, int val); + } + + static class ConverterHolder { + static final String UNSAFE_CONVERTER_NAME = + ConverterHolder.class.getName() + "$UnsafeConverter"; + static final Converter BEST_CONVERTER = getBestConverter(); + + static Converter getBestConverter() { + try { + Class theClass = + Class.forName(UNSAFE_CONVERTER_NAME).asSubclass(Converter.class); + return theClass.getConstructor().newInstance(); + } catch (Throwable t) { + return PureJavaConverter.INSTANCE; + } + } + + static final class PureJavaConverter extends Converter { + static final PureJavaConverter INSTANCE = new PureJavaConverter(); + + private PureJavaConverter() { + } + + @Override + int toInt(byte[] bytes, int offset) { + int n = 0; + for (int i = offset + 3; i >= offset; i--) { + n <<= 8; + n ^= (bytes[i] & 0xFF); + } + return n; + } + + @Override + int toInt(ByteBuffer buffer, int offset) { + return Integer.reverseBytes(buffer.getInt(offset)); + } + + @Override + int putInt(byte[] bytes, int offset, int val) { + for (int i = offset; i < offset + 3; i++) { + bytes[i] = (byte) val; + val >>>= 8; + } + bytes[offset + 3] = (byte) val; + return offset + Bytes.SIZEOF_INT; + } + } + + static final class UnsafeConverter extends Converter { + static final UnsafeConverter INSTANCE = new UnsafeConverter(); + + public UnsafeConverter() { + } + + static { + if (!UNSAFE_UNALIGNED) { + throw new Error(); + } + } + + @Override + int toInt(byte[] bytes, int offset) { + return UnsafeAccess.toIntLE(bytes, offset); + } + + @Override + int toInt(ByteBuffer buffer, int offset) { + return UnsafeAccess.toIntLE(buffer, offset); + } + + @Override + int putInt(byte[] bytes, int offset, int val) { + return UnsafeAccess.putIntLE(bytes, offset, val); + } + } + } + + /* + * Writes an int in little-endian order. Caller must ensure bounds; no checks are performed. + */ + public static void putInt(byte[] bytes, int offset, int val) { + assert offset >= 0 && bytes.length - offset >= Bytes.SIZEOF_INT; + ConverterHolder.BEST_CONVERTER.putInt(bytes, offset, val); + } + + /* + * Reads an int in little-endian order. Caller must ensure bounds; no checks are performed. + */ + public static int toInt(byte[] bytes, int offset) { + assert offset >= 0 && bytes.length - offset >= Bytes.SIZEOF_INT; + return ConverterHolder.BEST_CONVERTER.toInt(bytes, offset); + } + + /* + * Reads an int in little-endian order from ByteBuffer. Caller must ensure bounds; no checks are + * performed. + */ + public static int toInt(ByteBuffer buffer, int offset) { + assert offset >= 0 && buffer.capacity() - offset >= Bytes.SIZEOF_INT; + return ConverterHolder.BEST_CONVERTER.toInt(buffer, offset); + } + + /* + * Reads an int in little-endian order from the row portion of the Cell, at the given offset. + */ + public static int getRowAsInt(Cell cell, int offset) { + if (cell instanceof ByteBufferExtendedCell) { + ByteBufferExtendedCell bbCell = (ByteBufferExtendedCell) cell; + return toInt(bbCell.getRowByteBuffer(), bbCell.getRowPosition() + offset); + } + return toInt(cell.getRowArray(), cell.getRowOffset() + offset); + } + + /* + * Reads an int in little-endian order from the qualifier portion of the Cell, at the given + * offset. + */ + public static int getQualifierAsInt(Cell cell, int offset) { + if (cell instanceof ByteBufferExtendedCell) { + ByteBufferExtendedCell bbCell = (ByteBufferExtendedCell) cell; + return toInt(bbCell.getQualifierByteBuffer(), bbCell.getQualifierPosition() + offset); + } + return toInt(cell.getQualifierArray(), cell.getQualifierOffset() + offset); + } + + private LittleEndianBytes() { + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash.java index 0df3276c61a2..c6b7de78f4bd 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash.java @@ -47,15 +47,7 @@ public int hash(HashKey hashKey, int seed) { int len_4 = length >> 2; for (int i = 0; i < len_4; i++) { - int i_4 = (i << 2); - int k = hashKey.get(i_4 + 3); - k = k << 8; - k = k | (hashKey.get(i_4 + 2) & 0xff); - k = k << 8; - k = k | (hashKey.get(i_4 + 1) & 0xff); - k = k << 8; - // noinspection PointlessArithmeticExpression - k = k | (hashKey.get(i_4 + 0) & 0xff); + int k = hashKey.getIntLE(i << 2); k *= m; k ^= k >>> r; k *= m; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java index 115f8121fe84..20563edf143f 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/MurmurHash3.java @@ -48,9 +48,7 @@ public int hash(HashKey hashKey, int initval) { int roundedEnd = (length & 0xfffffffc); // round down to 4 byte block for (int i = 0; i < roundedEnd; i += 4) { - // little endian load order - int k1 = (hashKey.get(i) & 0xff) | ((hashKey.get(i + 1) & 0xff) << 8) - | ((hashKey.get(i + 2) & 0xff) << 16) | (hashKey.get(i + 3) << 24); + int k1 = hashKey.getIntLE(i); k1 *= c1; k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); k1 *= c2; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowBloomHashKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowBloomHashKey.java index 10b9dd8ac0ee..cbfab7f9c6e1 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowBloomHashKey.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowBloomHashKey.java @@ -37,4 +37,9 @@ public byte get(int offset) { public int length() { return this.t.getRowLength(); } + + @Override + public int getIntLE(int offset) { + return LittleEndianBytes.getRowAsInt(t, offset); + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowColBloomHashKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowColBloomHashKey.java index 68162f89e19f..b034119d3049 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowColBloomHashKey.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowColBloomHashKey.java @@ -23,15 +23,15 @@ import org.apache.yetus.audience.InterfaceAudience; /** - * An hash key for ROWCOL bloom. This assumes the cells to be serialized in the Keyvalue + * A hash key for ROWCOL bloom. This assumes the cells to be serialized in the Keyvalue * serialization format with Empty column family. Note that the byte representing the family length * is considered to be 0 */ @InterfaceAudience.Private public class RowColBloomHashKey extends CellHashKey { - private final int rowLength; private final int qualLength; + private final int totalLength; public RowColBloomHashKey(Cell cell) { super(cell); @@ -39,50 +39,133 @@ public RowColBloomHashKey(Cell cell) { // We don't consider the family length for ROWCOL bloom. So subtract the famLen from the // length calculation. Timestamp and type are of no relevance here qualLength = cell.getQualifierLength(); + // ROWCOL Bloom byte layout: + // <2B RK length> <1B CF length> <8B TS> <1B TYPE> + totalLength = KeyValue.ROW_LENGTH_SIZE + rowLength + KeyValue.FAMILY_LENGTH_SIZE + qualLength + + KeyValue.TIMESTAMP_TYPE_SIZE; } @Override public byte get(int offset) { - // For ROW_COL blooms we use bytes - // (2 bytes) , , 0 (one byte CF length), , (8 btes), ( 1 byte) - if (offset < Bytes.SIZEOF_SHORT) { - // assign locally - int rowlen = rowLength; - byte b = (byte) rowlen; - if (offset == 0) { - rowlen >>= 8; - b = (byte) rowlen; - } - return b; - } - int refLen = Bytes.SIZEOF_SHORT + rowLength; - if (offset < refLen) { - return PrivateCellUtil.getRowByte(t, offset - Bytes.SIZEOF_SHORT); - } - if (offset == refLen) { - // The fam length should return 0 assuming there is no column family. - // Because for ROWCOL blooms family is not considered - return 0; + return (byte) assembleCrossingLE(offset, Bytes.SIZEOF_BYTE); + } + + @Override + public int length() { + return totalLength; + } + + @Override + public int getIntLE(int offset) { + // Handle fast path that can return the row key as int directly + // Compute rowkey section range. + final int rowEnd = KeyValue.ROW_LENGTH_SIZE + rowLength; + if (offset >= KeyValue.ROW_LENGTH_SIZE && offset + Bytes.SIZEOF_INT <= rowEnd) { + return LittleEndianBytes.getRowAsInt(t, offset - KeyValue.ROW_LENGTH_SIZE); } - refLen += qualLength + Bytes.SIZEOF_BYTE; - // skip the family len because actual cells may have family also - if (offset < refLen) { - return PrivateCellUtil.getQualifierByte(t, - offset - (Bytes.SIZEOF_SHORT + rowLength + Bytes.SIZEOF_BYTE)); + + // Compute qualifier section range. + final int qualStart = rowEnd + KeyValue.FAMILY_LENGTH_SIZE; + final int qualEnd = qualStart + qualLength; + if (offset >= qualStart && offset + Bytes.SIZEOF_INT <= qualEnd) { + return LittleEndianBytes.getQualifierAsInt(t, offset - qualStart); } - // TODO : check if ts and type can be removed - refLen += KeyValue.TIMESTAMP_SIZE; - if (offset < refLen) { - return LATEST_TS[offset - (Bytes.SIZEOF_SHORT + rowLength + qualLength + Bytes.SIZEOF_BYTE)]; + + // Compute timestamp section range. + final int tsEnd = qualEnd + KeyValue.TIMESTAMP_SIZE; + if (offset >= qualEnd && offset + Bytes.SIZEOF_INT <= tsEnd) { + return LittleEndianBytes.toInt(LATEST_TS, offset - qualEnd); } - return MAX_TYPE; + + return (int) assembleCrossingLE(offset, Bytes.SIZEOF_INT); } - @Override - public int length() { - // For ROW_COL blooms we use bytes - // (2 bytes) , , 0 (one byte CF length), , (8 btes), ( 1 byte) - return KeyValue.ROW_LENGTH_SIZE + this.t.getRowLength() + KeyValue.FAMILY_LENGTH_SIZE - + this.t.getQualifierLength() + KeyValue.TIMESTAMP_TYPE_SIZE; + private long assembleCrossingLE(int offset, int wordBytes) { + final int rowEnd = KeyValue.ROW_LENGTH_SIZE + rowLength; + final int qualStart = rowEnd + KeyValue.FAMILY_LENGTH_SIZE; + final int qualEnd = qualStart + qualLength; + final int tsEnd = qualEnd + KeyValue.TIMESTAMP_SIZE; + + long result = 0L; + int pos = offset; + int remaining = wordBytes; + + while (remaining > 0) { + // 1) row length field [0,2) + if (pos < KeyValue.ROW_LENGTH_SIZE) { + if (pos == 0 && remaining >= KeyValue.ROW_LENGTH_SIZE) { + result |= (rowLength >>> 8) & 0xFF; + result |= (rowLength & 0xFF) << 8; + pos += 2; + remaining -= 2; + } else if (pos == 0) { + result |= (rowLength >>> 8) & 0xFF; + pos += 1; + remaining -= 1; + } else { + result |= rowLength & 0xFF; + pos += 1; + remaining -= 1; + } + continue; + } + + // 2) row bytes [2, rowEnd) + if (pos < rowEnd) { + final int take = Math.min(rowEnd - pos, remaining); + final int rOffset = pos - KeyValue.ROW_LENGTH_SIZE; + for (int i = 0; i < take; i++) { + final int shift = (wordBytes - remaining) * 8; + final byte b = PrivateCellUtil.getRowByte(t, rOffset + i); + result |= ((long) b & 0xFF) << shift; + remaining -= 1; + } + pos += take; + continue; + } + + // 3) family length byte (always 0) + if (pos == rowEnd) { + pos += 1; + remaining -= 1; + continue; + } + + // 4) qualifier bytes [qualStart, qualEnd) + if (pos < qualEnd) { + final int take = Math.min(qualEnd - pos, remaining); + final int qOffset = pos - qualStart; + for (int i = 0; i < take; i++) { + final int shift = (wordBytes - remaining) * 8; + final int b = PrivateCellUtil.getQualifierByte(t, qOffset + i) & 0xFF; + result |= ((long) b) << shift; + remaining -= 1; + } + pos += take; + continue; + } + + // 5) timestamp bytes [qualEnd, tsEnd) -> LATEST_TS + if (pos < tsEnd) { + final int take = Math.min(tsEnd - pos, remaining); + final int tsOff = pos - qualEnd; + for (int i = 0; i < take; i++) { + final int shift = (wordBytes - remaining) * 8; + final int b = LATEST_TS[tsOff + i] & 0xFF; + result |= ((long) b) << shift; + remaining -= 1; + } + pos += take; + continue; + } + + // 6) type byte at typePos -> MAX_TYPE + final int shift = (wordBytes - remaining) * 8; + result |= ((long) MAX_TYPE & 0xFF) << shift; + pos += 1; + remaining -= 1; + } + + return result; } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java index 3aa8a6ec123f..83fcc3fc1c6e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java @@ -81,6 +81,21 @@ public static int toInt(byte[] bytes, int offset) { } } + /** + * Converts a byte array to an int value considering it was written in little-endian format. + * @param bytes byte array + * @param offset offset into array + * @return the int value + */ + public static int toIntLE(byte[] bytes, int offset) { + if (LITTLE_ENDIAN) { + return HBasePlatformDependent.getInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET); + } else { + return Integer + .reverseBytes(HBasePlatformDependent.getInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET)); + } + } + /** * Converts a byte array to a long value considering it was written in big-endian format. * @param bytes byte array @@ -127,6 +142,21 @@ public static int putInt(byte[] bytes, int offset, int val) { return offset + Bytes.SIZEOF_INT; } + /** + * Put an int value out to the specified byte array position in little-endian format. + * @param bytes the byte array + * @param offset position in the array + * @param val int to write out + * @return incremented offset + */ + public static int putIntLE(byte[] bytes, int offset, int val) { + if (!LITTLE_ENDIAN) { + val = Integer.reverseBytes(val); + } + HBasePlatformDependent.putInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET, val); + return offset + Bytes.SIZEOF_INT; + } + /** * Put a long value out to the specified byte array position in big-endian format. * @param bytes the byte array @@ -191,6 +221,18 @@ public static int toInt(ByteBuffer buf, int offset) { return getAsInt(buf, offset); } + /** + * Reads an int value at the given buffer's offset considering it was written in little-endian + * format. + * @return int value at offset + */ + public static int toIntLE(ByteBuffer buf, int offset) { + if (LITTLE_ENDIAN) { + return getAsInt(buf, offset); + } + return Integer.reverseBytes(getAsInt(buf, offset)); + } + /** * Reads a int value at the given Object's offset considering it was written in big-endian format. * @return int value at offset diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytes.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytes.java new file mode 100644 index 000000000000..e1c9c4f5552b --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytes.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.jupiter.api.Tag; + +@Tag(MiscTests.TAG) +@Tag(SmallTests.TAG) +public class TestLittleEndianBytes extends TestLittleEndianBytesBase { + +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytesBase.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytesBase.java new file mode 100644 index 000000000000..c87b74f0a2c9 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytesBase.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.ByteBufferExtendedCell; +import org.apache.hadoop.hbase.ByteBufferKeyValue; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.junit.jupiter.api.Test; + +public abstract class TestLittleEndianBytesBase { + + @Test + public void testToInt() { + byte[] b = generateByteArray(32); + + for (int i = 0; i <= b.length - Integer.BYTES; i++) { + int expected = readIntLE(b, i); + assertEquals(expected, LittleEndianBytes.toInt(b, i)); + } + } + + @Test + public void testByteBufferToInt() { + byte[] b = generateByteArray(32); + ByteBuffer buf = ByteBuffer.wrap(b); + + for (int i = 0; i <= b.length - Integer.BYTES; i++) { + int expected = readIntLE(b, i); + assertEquals(expected, LittleEndianBytes.toInt(buf, i)); + } + } + + @Test + public void testPutInt() { + byte[] b = new byte[16]; + + int offset = 5; + int value = 0x12345678; + LittleEndianBytes.putInt(b, offset, value); + int expected = readIntLE(b, offset); + assertEquals(value, expected); + + offset += Integer.BYTES; + value = 0x9ABCDEF0; + LittleEndianBytes.putInt(b, offset, value); + expected = readIntLE(b, offset); + assertEquals(value, expected); + } + + @Test + public void testGetRowAsIntFromByteBufferExtendedCell() { + Cell bbCell = createByteBufferExtendedCell(); + byte[] row = bbCell.getRowArray(); + + for (int i = bbCell.getRowOffset(); i <= bbCell.getRowLength() - Integer.BYTES; i++) { + int expected = readIntLE(row, i); + assertEquals(expected, LittleEndianBytes.getRowAsInt(bbCell, i)); + } + } + + @Test + public void testGetRowAsIntFromCell() { + KeyValue cell = createCell(); + byte[] row = cell.getRowArray(); + + for (int i = cell.getRowOffset(); i <= cell.getRowLength() - Integer.BYTES; i++) { + int expected = readIntLE(row, cell.getRowOffset() + i); + assertEquals(expected, LittleEndianBytes.getRowAsInt(cell, i)); + } + } + + @Test + public void testGetQualifierAsIntFromByteBufferExtendedCell() { + Cell bbCell = createByteBufferExtendedCell(); + byte[] qual = bbCell.getQualifierArray(); + + for (int i = bbCell.getQualifierOffset(); i + <= bbCell.getQualifierLength() - Integer.BYTES; i++) { + int expected = readIntLE(qual, i); + assertEquals(expected, LittleEndianBytes.getQualifierAsInt(bbCell, i)); + } + } + + @Test + public void testGetQualifierAsIntFromCell() { + KeyValue cell = createCell(); + byte[] qual = cell.getQualifierArray(); + + for (int i = cell.getQualifierOffset(); i <= cell.getQualifierLength() - Integer.BYTES; i++) { + int expected = readIntLE(qual, cell.getQualifierOffset() + i); + assertEquals(expected, LittleEndianBytes.getQualifierAsInt(cell, i)); + } + } + + private static KeyValue createCell() { + byte[] row = Bytes.toBytes("row_key_for_test_12345"); + byte[] family = Bytes.toBytes("f"); + byte[] qualifier = Bytes.toBytes("qualifier_12345"); + byte[] value = Bytes.toBytes(123456789); + return new KeyValue(row, family, qualifier, value); + } + + private static ByteBufferExtendedCell createByteBufferExtendedCell() { + KeyValue kv = createCell(); + ByteBuffer buffer = ByteBuffer.wrap(kv.getBuffer()); + return new ByteBufferKeyValue(buffer, 0, buffer.remaining()); + } + + private static byte[] generateByteArray(int size) { + byte[] b = new byte[size]; + for (int i = 0; i < b.length; i++) { + b[i] = (byte) (i * 3 + 7); + } + return b; + } + + private static int readIntLE(byte[] b, int off) { + return (b[off] & 0xFF) | ((b[off + 1] & 0xFF) << 8) | ((b[off + 2] & 0xFF) << 16) + | ((b[off + 3] & 0xFF) << 24); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytesWoUnsafe.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytesWoUnsafe.java new file mode 100644 index 000000000000..dc82e4a40871 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestLittleEndianBytesWoUnsafe.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.mockito.Mockito.mockStatic; + +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.mockito.MockedStatic; + +@Tag(MiscTests.TAG) +@Tag(SmallTests.TAG) +public class TestLittleEndianBytesWoUnsafe extends TestLittleEndianBytesBase { + @BeforeAll + public static void disableUnsafe() { + try (MockedStatic mocked = mockStatic(HBasePlatformDependent.class)) { + mocked.when(HBasePlatformDependent::unaligned).thenReturn(false); + assertFalse(LittleEndianBytes.UNSAFE_UNALIGNED); + } + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestRowColBloomHashKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestRowColBloomHashKey.java new file mode 100644 index 000000000000..17e038932463 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestRowColBloomHashKey.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.ClassRule; +import org.junit.experimental.categories.Category; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +@Category({ MiscTests.class, SmallTests.class }) +public class TestRowColBloomHashKey { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowColBloomHashKey.class); + + private KeyValue kv; + private RowColBloomHashKey hashKey; + + @BeforeEach + public void setup() { + byte[] row = Bytes.toBytes("row_key_test"); + byte[] family = Bytes.toBytes("family"); + byte[] qualifier = Bytes.toBytes("qualifier"); + byte[] value = Bytes.toBytes("1234567890"); + kv = new KeyValue(row, family, qualifier, value); + hashKey = new RowColBloomHashKey(kv); + } + + @Test + public void testGet() { + final int rowLen = kv.getRowLength(); + final int qualLen = kv.getQualifierLength(); + + // Expected virtual layout: + // [rowLen(2 bytes)][row bytes][famLen(1 byte, always 0)][qualifier bytes] + // [timestamp(8 bytes, HConstants.LATEST_TIMESTAMP)][type(1 byte, KeyValue.Type.Maximum)] + final int expectedLength = KeyValue.ROW_LENGTH_SIZE + rowLen + KeyValue.FAMILY_LENGTH_SIZE + + qualLen + KeyValue.TIMESTAMP_TYPE_SIZE; + assertEquals(expectedLength, hashKey.length()); + + int offset = 0; + + // 1) Row length field: MSB then LSB + int msb = hashKey.get(offset++) & 0xFF; + int lsb = hashKey.get(offset++) & 0xFF; + int decodedRowLen = (msb << 8) | lsb; + assertEquals(rowLen, decodedRowLen); + + // 2) Row bytes + for (int i = 0; i < rowLen; i++) { + int expected = PrivateCellUtil.getRowByte(kv, i) & 0xFF; + int actual = hashKey.get(offset++) & 0xFF; + assertEquals(expected, actual, "row byte mismatch at i=" + i); + } + + // 3) Family length byte + assertEquals(0, hashKey.get(offset++) & 0xFF); + + // 4) Qualifier bytes + for (int i = 0; i < qualLen; i++) { + int expected = PrivateCellUtil.getQualifierByte(kv, i) & 0xFF; + int actual = hashKey.get(offset++) & 0xFF; + assertEquals(expected, actual, "qualifier byte mismatch at i=" + i); + } + + // 5) Timestamp bytes: should match HConstants.LATEST_TIMESTAMP in big-endian + // RowColBloomHashKey uses LATEST_TS byte[] from CellHashKey which corresponds to latest + // timestamp. + long ts = HConstants.LATEST_TIMESTAMP; + for (int i = 0; i < KeyValue.TIMESTAMP_SIZE; i++) { + // KeyValue timestamp serialization is big-endian + int expected = (int) ((ts >>> (8 * (KeyValue.TIMESTAMP_SIZE - 1 - i))) & 0xFF); + int actual = hashKey.get(offset++) & 0xFF; + assertEquals(expected, actual, "timestamp byte mismatch at i=" + i); + } + + // 6) Type byte: should be Maximum + assertEquals(KeyValue.Type.Maximum.getCode(), hashKey.get(offset++)); + + // consumed exactly all bytes + assertEquals(hashKey.length(), offset); + } + + @Test + public void testGetIntLE() { + for (int i = 0; i <= hashKey.length() - Bytes.SIZEOF_INT; i++) { + int expected = expectedIntLEFromGet(i); + int actual = hashKey.getIntLE(i); + assertEquals(expected, actual, "sequential mismatch at offset=" + i); + } + } + + private int expectedIntLEFromGet(int offset) { + int b0 = hashKey.get(offset) & 0xFF; + int b1 = hashKey.get(offset + 1) & 0xFF; + int b2 = hashKey.get(offset + 2) & 0xFF; + int b3 = hashKey.get(offset + 3) & 0xFF; + return (b0) | (b1 << 8) | (b2 << 16) | (b3 << 24); + } +}