diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java index 542c6b381493..f26c19103876 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java @@ -419,7 +419,7 @@ protected static final class ComplexChecker implements Checker { FastUTF8Decoder decoder; public ComplexChecker(String pattern) { - compiledPattern = Pattern.compile(pattern); + compiledPattern = Pattern.compile(pattern, Pattern.DOTALL); matcher = compiledPattern.matcher(""); decoder = new FastUTF8Decoder(); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index 22ac562c43ab..001996182c40 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -4407,6 +4407,33 @@ public void testStringLikePatternType() throws HiveException { expr.checker.getClass()); } + // Regression: vec ComplexChecker must apply Pattern.DOTALL (mirroring HIVE-22008). + @Test + public void testStringLikeComplexCheckerMultiLine() throws HiveException { + VectorizedRowBatch batch = new VectorizedRowBatch(1); + BytesColumnVector col = new BytesColumnVector(); + batch.cols[0] = col; + byte[] rowA = "first\nsecond".getBytes(StandardCharsets.UTF_8); + byte[] rowB = "first_second\nthird".getBytes(StandardCharsets.UTF_8); + col.setRef(0, rowA, 0, rowA.length); + col.setRef(1, rowB, 0, rowB.length); + col.isNull[0] = false; + col.isNull[1] = false; + col.noNulls = true; + batch.size = 2; + + FilterStringColLikeStringScalar expr = + new FilterStringColLikeStringScalar(0, "%first_second%".getBytes(StandardCharsets.UTF_8)); + expr.transientInit(hiveConf); + Assert.assertEquals(FilterStringColLikeStringScalar.ComplexChecker.class, + expr.checker.getClass()); + + expr.evaluate(batch); + + // Both rows must survive the LIKE filter. + Assert.assertEquals(2, batch.size); + } + @Test public void testStringLikeMultiByte() throws HiveException { FilterStringColLikeStringScalar expr; diff --git a/ql/src/test/queries/clientpositive/udf_like.q b/ql/src/test/queries/clientpositive/udf_like.q index a4a7251bf28b..89364aee78fc 100644 --- a/ql/src/test/queries/clientpositive/udf_like.q +++ b/ql/src/test/queries/clientpositive/udf_like.q @@ -28,3 +28,36 @@ FROM src tablesample (1 rows); CREATE TEMPORARY TABLE SplitLines(`id` string) STORED AS ORC; INSERT INTO SplitLines SELECT 'withdraw\ncash'; SELECT `id` LIKE '%withdraw%cash' FROM SplitLines ; + +CREATE TABLE SplitLinesUnderscore (q STRING) STORED AS ORC; +INSERT INTO SplitLinesUnderscore + SELECT 'first\nsecond' UNION ALL SELECT 'first_second\nthird'; +SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%'; + +-- Repeat with vectorization off to ensure consistency either way +set hive.vectorized.execution.enabled=false; + +DESCRIBE FUNCTION like; +DESCRIBE FUNCTION EXTENDED like; + +EXPLAIN +SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE '' +FROM src WHERE src.key = 86; + +SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE '' +FROM src WHERE src.key = 86; + + +SELECT '1+2' LIKE '_+_', + '1+2' LIKE '1+_', + '112' LIKE '1+_', + '|||' LIKE '|_|', + '+++' LIKE '1+_' +FROM src tablesample (1 rows); + +SELECT `id` LIKE '%withdraw%cash' FROM SplitLines; +SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%'; diff --git a/ql/src/test/results/clientpositive/llap/udf_like.q.out b/ql/src/test/results/clientpositive/llap/udf_like.q.out index 3c86fa7feecf..f8a8ac6e1ce2 100644 --- a/ql/src/test/results/clientpositive/llap/udf_like.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_like.q.out @@ -107,3 +107,132 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@splitlines #### A masked pattern was here #### true +PREHOOK: query: CREATE TABLE SplitLinesUnderscore (q STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@SplitLinesUnderscore +POSTHOOK: query: CREATE TABLE SplitLinesUnderscore (q STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@SplitLinesUnderscore +PREHOOK: query: INSERT INTO SplitLinesUnderscore + SELECT 'first\nsecond' UNION ALL SELECT 'first_second\nthird' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@splitlinesunderscore +POSTHOOK: query: INSERT INTO SplitLinesUnderscore + SELECT 'first\nsecond' UNION ALL SELECT 'first_second\nthird' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@splitlinesunderscore +POSTHOOK: Lineage: splitlinesunderscore.q SCRIPT [] +PREHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%' +PREHOOK: type: QUERY +PREHOOK: Input: default@splitlinesunderscore +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@splitlinesunderscore +#### A masked pattern was here #### +2 +PREHOOK: query: DESCRIBE FUNCTION like +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION like +POSTHOOK: type: DESCFUNCTION +like(str, pattern) - Checks if str matches pattern +PREHOOK: query: DESCRIBE FUNCTION EXTENDED like +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED like +POSTHOOK: type: DESCFUNCTION +like(str, pattern) - Checks if str matches pattern +Example: + > SELECT a.* FROM srcpart a WHERE a.hr like '%2' LIMIT 1; + 27 val_27 2008-04-08 12 +Function class:org.apache.hadoop.hive.ql.udf.UDFLike +Function type:BUILTIN +PREHOOK: query: EXPLAIN +SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE '' +FROM src WHERE src.key = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE '' +FROM src WHERE src.key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + filterExpr: (UDFToDouble(key) = 86.0D) (type: boolean) + Filter Operator + predicate: (UDFToDouble(key) = 86.0D) (type: boolean) + Select Operator + expressions: true (type: boolean), false (type: boolean), true (type: boolean), true (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), true (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), true (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + ListSink + +PREHOOK: query: SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE '' +FROM src WHERE src.key = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%', + '_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%', + '%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE '' +FROM src WHERE src.key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true false true true false false false false true false false false false true +PREHOOK: query: SELECT '1+2' LIKE '_+_', + '1+2' LIKE '1+_', + '112' LIKE '1+_', + '|||' LIKE '|_|', + '+++' LIKE '1+_' +FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT '1+2' LIKE '_+_', + '1+2' LIKE '1+_', + '112' LIKE '1+_', + '|||' LIKE '|_|', + '+++' LIKE '1+_' +FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true false true false +PREHOOK: query: SELECT `id` LIKE '%withdraw%cash' FROM SplitLines +PREHOOK: type: QUERY +PREHOOK: Input: default@splitlines +#### A masked pattern was here #### +POSTHOOK: query: SELECT `id` LIKE '%withdraw%cash' FROM SplitLines +POSTHOOK: type: QUERY +POSTHOOK: Input: default@splitlines +#### A masked pattern was here #### +true +PREHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%' +PREHOOK: type: QUERY +PREHOOK: Input: default@splitlinesunderscore +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@splitlinesunderscore +#### A masked pattern was here #### +2