diff --git data/files/TINT data/files/TINT new file mode 100644 index 0000000..1aeb377 --- /dev/null +++ data/files/TINT @@ -0,0 +1,5 @@ +0|\N +1|-1 +2|0 +3|1 +4|10 diff --git data/files/TSINT data/files/TSINT new file mode 100644 index 0000000..1aeb377 --- /dev/null +++ data/files/TSINT @@ -0,0 +1,5 @@ +0|\N +1|-1 +2|0 +3|1 +4|10 diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 1031655..f7b1498 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -228,6 +228,7 @@ minitez.query.files.shared=acid_globallimit.q,\ vector_aggregate_without_gby.q,\ vector_auto_smb_mapjoin_14.q,\ vector_between_in.q,\ + vector_between_columns.q,\ vector_binary_join_groupby.q,\ vector_bround.q,\ vector_bucket.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 95a4b9d..7e95244 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1913,6 +1913,12 @@ private VectorExpression getBetweenFilterExpression(List childExpr return null; } + // We don't currently support the BETWEEN ends being columns. They must be scalars. + if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || + !(childExpr.get(3) instanceof ExprNodeConstantDesc)) { + return null; + } + boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue(); ExprNodeDesc colExpr = childExpr.get(1); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index d75d185..4d2430f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -685,7 +685,7 @@ public static void debugDisplayOneRow(VectorizedRowBatch batch, int index, Strin LOG.info(sb.toString()); } - public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) throws HiveException { + public static void debugDisplayBatch(VectorizedRowBatch batch, String prefix) { for (int i = 0; i < batch.size; i++) { int index = (batch.selectedInUse ? batch.selected[i] : i); debugDisplayOneRow(batch, index, prefix); diff --git ql/src/test/queries/clientpositive/vector_between_columns.q ql/src/test/queries/clientpositive/vector_between_columns.q new file mode 100644 index 0000000..8add1ed --- /dev/null +++ ql/src/test/queries/clientpositive/vector_between_columns.q @@ -0,0 +1,28 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; + +-- SORT_QUERY_RESULTS + +create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +load data local inpath '../../data/files/TSINT' into table TSINT_txt; + +load data local inpath '../../data/files/TINT' into table TINT_txt; + +create table TSINT stored as orc AS SELECT * FROM TSINT_txt; + +create table TINT stored as orc AS SELECT * FROM TINT_txt; + +-- We DO NOT expect the following to vectorized because the BETWEEN range expressions +-- are not constants. We currently do not support the range expressions being columns. +explain +select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint; + +select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint; diff --git ql/src/test/results/clientpositive/tez/vector_between_columns.q.out ql/src/test/results/clientpositive/tez/vector_between_columns.q.out new file mode 100644 index 0000000..972d694 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_between_columns.q.out @@ -0,0 +1,155 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT_txt +PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT_txt +POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT_txt +PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tsint_txt +POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tsint_txt +PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tint_txt +POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tint_txt +PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tsint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT +POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tsint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT +tsint_txt.rnum tsint_txt.csint +PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT +POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT +tint_txt.rnum tint_txt.cint +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions +-- are not constants. We currently do not support the range expressions being columns. +explain +select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: type: QUERY +POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions +-- are not constants. We currently do not support the range expressions being columns. +explain +select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: tsint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), csint (type: smallint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: smallint) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: type: QUERY +PREHOOK: Input: default@tint +PREHOOK: Input: default@tsint +#### A masked pattern was here #### +POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tint +POSTHOOK: Input: default@tsint +#### A masked pattern was here #### +tint.rnum tsint.rnum +1 1 +2 2 +3 3 +4 4 diff --git ql/src/test/results/clientpositive/tez/vector_select_int.q.out ql/src/test/results/clientpositive/tez/vector_select_int.q.out new file mode 100644 index 0000000..08e3dc3 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_select_int.q.out @@ -0,0 +1,78 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT_txt +PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT_txt +POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT_txt +PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tsint_txt +POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tsint_txt +PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tint_txt +POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tint_txt +PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tsint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT +POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tsint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT +tsint_txt.rnum tsint_txt.csint +PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT +POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT +tint_txt.rnum tint_txt.cint +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: type: QUERY +PREHOOK: Input: default@tint +PREHOOK: Input: default@tsint +#### A masked pattern was here #### +POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tint +POSTHOOK: Input: default@tsint +#### A masked pattern was here #### +tint.rnum tsint.rnum +1 1 +2 2 +3 3 +4 4 diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out new file mode 100644 index 0000000..4837aba --- /dev/null +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table if not exists TSINT_txt ( RNUM int , CSINT smallint ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT_txt +PREHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT_txt +POSTHOOK: query: create table if not exists TINT_txt ( RNUM int , CINT int ) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT_txt +PREHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tsint_txt +POSTHOOK: query: load data local inpath '../../data/files/TSINT' into table TSINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tsint_txt +PREHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tint_txt +POSTHOOK: query: load data local inpath '../../data/files/TINT' into table TINT_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tint_txt +PREHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tsint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TSINT +POSTHOOK: query: create table TSINT stored as orc AS SELECT * FROM TSINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tsint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TSINT +tsint_txt.rnum tsint_txt.csint +PREHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tint_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TINT +POSTHOOK: query: create table TINT stored as orc AS SELECT * FROM TINT_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tint_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TINT +tint_txt.rnum tint_txt.cint +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions +-- are not constants. We currently do not support the range expressions being columns. +explain +select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: type: QUERY +POSTHOOK: query: -- We DO NOT expect the following to vectorized because the BETWEEN range expressions +-- are not constants. We currently do not support the range expressions being columns. +explain +select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: type: QUERY +Explain +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:tint + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:tint + TableScan + alias: tint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tsint + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), csint (type: smallint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 BETWEEN _col3 AND _col3 (type: boolean) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +PREHOOK: type: QUERY +PREHOOK: Input: default@tint +PREHOOK: Input: default@tsint +#### A masked pattern was here #### +POSTHOOK: query: select tint.rnum, tsint.rnum from tint , tsint where tint.cint between tsint.csint and tsint.csint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tint +POSTHOOK: Input: default@tsint +#### A masked pattern was here #### +tint.rnum tsint.rnum +1 1 +2 2 +3 3 +4 4