diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 9e587c6..536e418 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -156,6 +156,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_data_types.q,\ vector_decimal_aggregate.q,\ vector_left_outer_join.q,\ + vector_mapjoin_reduce.q,\ vector_string_concat.q,\ vectorization_12.q,\ vectorization_13.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 96243b6..973923c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -390,6 +390,22 @@ private static BaseWork getBaseWork(Configuration conf, String name) { } } + public static Map> getScratchColumnVectorTypes(Configuration hiveConf) { + BaseWork baseWork = getMapWork(hiveConf); + if (baseWork == null) { + baseWork = getReduceWork(hiveConf); + } + return baseWork.getScratchColumnVectorTypes(); + } + + public static Map> getScratchColumnMap(Configuration hiveConf) { + BaseWork baseWork = getMapWork(hiveConf); + if (baseWork == null) { + baseWork = getReduceWork(hiveConf); + } + return baseWork.getScratchColumnMap(); + } + public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) { try { Graph stageGraph = plan.getQueryPlan().getStageGraph(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index 607f641..30e36ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -207,8 +207,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive Object[] values = (Object[]) row; VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI); if (null == vcas) { - Map> allColumnMaps = Utilities. - getMapRedWork(hconf).getMapWork().getScratchColumnMap(); + Map> allColumnMaps = Utilities.getScratchColumnMap(hconf); Map columnMap = allColumnMaps.get(fileKey); vcas = VectorColumnAssignFactory.buildAssigners( outputBatch, outputOI, columnMap, conf.getOutputColumnNames()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index aa0afe7..0591531 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -285,8 +285,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive Object[] values = (Object[]) row; VectorColumnAssign[] vcas = outputVectorAssigners.get(outputOI); if (null == vcas) { - Map> allColumnMaps = Utilities. - getMapRedWork(hconf).getMapWork().getScratchColumnMap(); + Map> allColumnMaps = Utilities.getScratchColumnMap(hconf); Map columnMap = allColumnMaps.get(fileKey); vcas = VectorColumnAssignFactory.buildAssigners( outputBatch, outputOI, columnMap, conf.getOutputColumnNames()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 38d9e2c..c77d002 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -131,13 +131,8 @@ public VectorizedRowBatchCtx() { */ public void init(Configuration hiveConf, String fileKey, StructObjectInspector rowOI) { - MapredWork mapredWork = Utilities.getMapRedWork(hiveConf); - Map> scratchColumnVectorTypes; - if (mapredWork.getMapWork() != null) { - scratchColumnVectorTypes = mapredWork.getMapWork().getScratchColumnVectorTypes(); - } else { - scratchColumnVectorTypes = mapredWork.getReduceWork().getScratchColumnVectorTypes(); - } + Map> scratchColumnVectorTypes = + Utilities.getScratchColumnVectorTypes(hiveConf); columnTypeMap = scratchColumnVectorTypes.get(fileKey); this.rowOI= rowOI; this.rawRowOI = rowOI; @@ -174,7 +169,7 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx String partitionPath = split.getPath().getParent().toString(); columnTypeMap = Utilities - .getMapRedWork(hiveConf).getMapWork().getScratchColumnVectorTypes() + .getScratchColumnVectorTypes(hiveConf) .get(partitionPath); Properties partProps = diff --git ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q new file mode 100644 index 0000000..63da042 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_mapjoin_reduce.q @@ -0,0 +1,72 @@ +SET hive.vectorized.execution.enabled=true; +SET hive.auto.convert.join=true; + +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + +DROP TABLE lineitem; +CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem; + +-- Verify HIVE-8097 with a query that has a Vectorized MapJoin in the Reducer. +-- Query copied from subquery_in.q + +-- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +; + +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +; + +-- non agg, corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +; + +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +; diff --git ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out new file mode 100644 index 0000000..27423b2 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_mapjoin_reduce.q.out @@ -0,0 +1,412 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE lineitem +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE lineitem +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem +POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem +PREHOOK: query: -- Verify HIVE-8097 with a query that has a Vectorized MapJoin in the Reducer. +-- Query copied from subquery_in.q + +-- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +POSTHOOK: query: -- Verify HIVE-8097 with a query that has a Vectorized MapJoin in the Reducer. +-- Query copied from subquery_in.q + +-- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 756 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) + Statistics: Num rows: 94 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: l_partkey (type: int) + sort order: + + Map-reduce partition columns: l_partkey (type: int) + Statistics: Num rows: 94 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + value expressions: l_orderkey (type: int), l_suppkey (type: int) + Map 2 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int) + outputColumnNames: l_partkey + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 1728 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 432 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 432 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 432 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 432 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 831 Data size: 3326 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +2320 9821 +4297 1798 +40216 217 +61336 8855 +64128 9141 +82704 7721 +108570 8571 +115118 7630 +115209 7721 +155190 7706 +PREHOOK: query: -- non agg, corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 756 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((l_partkey is not null and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) + Statistics: Num rows: 47 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: l_partkey (type: int) + sort order: + + Map-reduce partition columns: l_partkey (type: int) + Statistics: Num rows: 47 Data size: 752 Basic stats: COMPLETE Column stats: NONE + value expressions: l_orderkey (type: int), l_suppkey (type: int) + Map 2 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int) + outputColumnNames: l_partkey + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 1099 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 138 Data size: 1519 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_linenumber (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 1519 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 1519 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 138 Data size: 1519 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 831 Data size: 3326 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +4297 1798 +108570 8571 diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out new file mode 100644 index 0000000..548bca1 --- /dev/null +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -0,0 +1,452 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE lineitem +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE lineitem +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem +POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem +PREHOOK: query: -- Verify HIVE-8097 with a query that has a Vectorized MapJoin in the Reducer. +-- Query copied from subquery_in.q + +-- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +POSTHOOK: query: -- Verify HIVE-8097 with a query that has a Vectorized MapJoin in the Reducer. +-- Query copied from subquery_in.q + +-- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-7 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int) + outputColumnNames: l_partkey + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + li + Fetch Operator + limit: -1 + sq_1:lineitem + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + li + TableScan + alias: li + Statistics: Num rows: 756 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) + Statistics: Num rows: 94 Data size: 1504 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + sq_1:lineitem + TableScan + alias: lineitem + Statistics: Num rows: 1728 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 432 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 432 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 432 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 831 Data size: 3326 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +2320 9821 +4297 1798 +40216 217 +61336 8855 +64128 9141 +82704 7721 +108570 8571 +115118 7630 +115209 7721 +155190 7706 +PREHOOK: query: -- non agg, corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-7 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 3024 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int) + outputColumnNames: l_partkey + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1512 Data size: 6049 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 756 Data size: 3024 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + li + Fetch Operator + limit: -1 + sq_1:lineitem + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + li + TableScan + alias: li + Statistics: Num rows: 756 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((l_partkey is not null and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) + Statistics: Num rows: 47 Data size: 752 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + sq_1:lineitem + TableScan + alias: lineitem + Statistics: Num rows: 1099 Data size: 12099 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 138 Data size: 1519 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_linenumber (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 1519 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 1519 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {l_orderkey} {l_suppkey} + keys: + 0 _col0 (type: int) + 1 l_partkey (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 831 Data size: 3326 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {_col0} {_col3} + 1 + keys: + 0 _col1 (type: int), 1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 914 Data size: 3658 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +4297 1798 +108570 8571