Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ protected static final class ComplexChecker implements Checker {
FastUTF8Decoder decoder;

public ComplexChecker(String pattern) {
compiledPattern = Pattern.compile(pattern);
compiledPattern = Pattern.compile(pattern, Pattern.DOTALL);
matcher = compiledPattern.matcher("");
decoder = new FastUTF8Decoder();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4407,6 +4407,33 @@ public void testStringLikePatternType() throws HiveException {
expr.checker.getClass());
}

// Regression: vec ComplexChecker must apply Pattern.DOTALL (mirroring HIVE-22008).
@Test
public void testStringLikeComplexCheckerMultiLine() throws HiveException {
VectorizedRowBatch batch = new VectorizedRowBatch(1);
BytesColumnVector col = new BytesColumnVector();
batch.cols[0] = col;
byte[] rowA = "first\nsecond".getBytes(StandardCharsets.UTF_8);
byte[] rowB = "first_second\nthird".getBytes(StandardCharsets.UTF_8);
col.setRef(0, rowA, 0, rowA.length);
col.setRef(1, rowB, 0, rowB.length);
col.isNull[0] = false;
col.isNull[1] = false;
col.noNulls = true;
batch.size = 2;

FilterStringColLikeStringScalar expr =
new FilterStringColLikeStringScalar(0, "%first_second%".getBytes(StandardCharsets.UTF_8));
expr.transientInit(hiveConf);
Assert.assertEquals(FilterStringColLikeStringScalar.ComplexChecker.class,
expr.checker.getClass());

expr.evaluate(batch);

// Both rows must survive the LIKE filter.
Assert.assertEquals(2, batch.size);
}

@Test
public void testStringLikeMultiByte() throws HiveException {
FilterStringColLikeStringScalar expr;
Expand Down
33 changes: 33 additions & 0 deletions ql/src/test/queries/clientpositive/udf_like.q
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,36 @@ FROM src tablesample (1 rows);
CREATE TEMPORARY TABLE SplitLines(`id` string) STORED AS ORC;
INSERT INTO SplitLines SELECT 'withdraw\ncash';
SELECT `id` LIKE '%withdraw%cash' FROM SplitLines ;

CREATE TABLE SplitLinesUnderscore (q STRING) STORED AS ORC;
INSERT INTO SplitLinesUnderscore
SELECT 'first\nsecond' UNION ALL SELECT 'first_second\nthird';
SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%';

-- Repeat with vectorization off to ensure consistency either way
set hive.vectorized.execution.enabled=false;

DESCRIBE FUNCTION like;
DESCRIBE FUNCTION EXTENDED like;

EXPLAIN
SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%',
'_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%',
'%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE ''
FROM src WHERE src.key = 86;

SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%',
'_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%',
'%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE ''
FROM src WHERE src.key = 86;


SELECT '1+2' LIKE '_+_',
'1+2' LIKE '1+_',
'112' LIKE '1+_',
'|||' LIKE '|_|',
'+++' LIKE '1+_'
FROM src tablesample (1 rows);

SELECT `id` LIKE '%withdraw%cash' FROM SplitLines;
SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%';
129 changes: 129 additions & 0 deletions ql/src/test/results/clientpositive/llap/udf_like.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,132 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@splitlines
#### A masked pattern was here ####
true
PREHOOK: query: CREATE TABLE SplitLinesUnderscore (q STRING) STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@SplitLinesUnderscore
POSTHOOK: query: CREATE TABLE SplitLinesUnderscore (q STRING) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@SplitLinesUnderscore
PREHOOK: query: INSERT INTO SplitLinesUnderscore
SELECT 'first\nsecond' UNION ALL SELECT 'first_second\nthird'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@splitlinesunderscore
POSTHOOK: query: INSERT INTO SplitLinesUnderscore
SELECT 'first\nsecond' UNION ALL SELECT 'first_second\nthird'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@splitlinesunderscore
POSTHOOK: Lineage: splitlinesunderscore.q SCRIPT []
PREHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%'
PREHOOK: type: QUERY
PREHOOK: Input: default@splitlinesunderscore
#### A masked pattern was here ####
POSTHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@splitlinesunderscore
#### A masked pattern was here ####
2
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the value was 0 before the code changes of this PR

PREHOOK: query: DESCRIBE FUNCTION like
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION like
POSTHOOK: type: DESCFUNCTION
like(str, pattern) - Checks if str matches pattern
PREHOOK: query: DESCRIBE FUNCTION EXTENDED like
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED like
POSTHOOK: type: DESCFUNCTION
like(str, pattern) - Checks if str matches pattern
Example:
> SELECT a.* FROM srcpart a WHERE a.hr like '%2' LIMIT 1;
27 val_27 2008-04-08 12
Function class:org.apache.hadoop.hive.ql.udf.UDFLike
Function type:BUILTIN
PREHOOK: query: EXPLAIN
SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%',
'_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%',
'%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE ''
FROM src WHERE src.key = 86
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN
SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%',
'_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%',
'%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE ''
FROM src WHERE src.key = 86
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
TableScan
alias: src
filterExpr: (UDFToDouble(key) = 86.0D) (type: boolean)
Filter Operator
predicate: (UDFToDouble(key) = 86.0D) (type: boolean)
Select Operator
expressions: true (type: boolean), false (type: boolean), true (type: boolean), true (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), true (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), false (type: boolean), true (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
ListSink

PREHOOK: query: SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%',
'_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%',
'%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE ''
FROM src WHERE src.key = 86
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT '_%_' LIKE '%\_\%\_%', '__' LIKE '%\_\%\_%', '%%_%_' LIKE '%\_\%\_%', '%_%_%' LIKE '%\%\_\%',
'_%_' LIKE '\%\_%', '%__' LIKE '__\%%', '_%' LIKE '\_\%\_\%%', '_%' LIKE '\_\%_%',
'%_' LIKE '\%\_', 'ab' LIKE '\%\_', 'ab' LIKE '_a%', 'ab' LIKE 'a','ab' LIKE '','' LIKE ''
FROM src WHERE src.key = 86
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
true false true true false false false false true false false false false true
PREHOOK: query: SELECT '1+2' LIKE '_+_',
'1+2' LIKE '1+_',
'112' LIKE '1+_',
'|||' LIKE '|_|',
'+++' LIKE '1+_'
FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT '1+2' LIKE '_+_',
'1+2' LIKE '1+_',
'112' LIKE '1+_',
'|||' LIKE '|_|',
'+++' LIKE '1+_'
FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
true true false true false
PREHOOK: query: SELECT `id` LIKE '%withdraw%cash' FROM SplitLines
PREHOOK: type: QUERY
PREHOOK: Input: default@splitlines
#### A masked pattern was here ####
POSTHOOK: query: SELECT `id` LIKE '%withdraw%cash' FROM SplitLines
POSTHOOK: type: QUERY
POSTHOOK: Input: default@splitlines
#### A masked pattern was here ####
true
PREHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%'
PREHOOK: type: QUERY
PREHOOK: Input: default@splitlinesunderscore
#### A masked pattern was here ####
POSTHOOK: query: SELECT count(*) FROM SplitLinesUnderscore WHERE q LIKE '%first_second%'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@splitlinesunderscore
#### A masked pattern was here ####
2
Loading