From 8b3a877da4b0c4bf9acd7404b97ae298b8330e33 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 12 Dec 2017 15:15:00 -0800 Subject: [PATCH] HIVE-18201 : Disable XPROD_EDGE for sq_count_check() created for scalar subqueries --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../hive/ql/optimizer/ConvertJoinMapJoin.java | 25 ++- ql/src/test/queries/clientpositive/perf/query6.q | 2 + .../clientpositive/llap/auto_join_filters.q.out | 4 +- .../clientpositive/llap/auto_join_nulls.q.out | 2 +- .../results/clientpositive/llap/mapjoin2.q.out | 2 +- .../results/clientpositive/llap/mapjoin_hint.q.out | 62 ++++--- .../clientpositive/llap/vector_complex_all.q.out | 94 +++++----- .../llap/vector_groupby_mapjoin.q.out | 113 ++++++------ .../clientpositive/llap/vector_join_filters.q.out | 2 +- .../llap/vectorized_multi_output_select.q.out | 58 +++---- .../results/clientpositive/perf/tez/query6.q.out | 189 ++++++++++----------- 12 files changed, 285 insertions(+), 270 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cce908f48e..093b4a73f3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2027,6 +2027,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "However, if it is on, and the predicted number of entries in hashtable for a given join \n" + "input is larger than this number, the join will not be converted to a mapjoin. \n" + "The value \"-1\" means no limit."), + XPRODSMALLTABLEROWSTHRESHOLD("hive.xprod.mapjoin.small.table.rows", 1,"Maximum number of rows on build side" + + " of map join before it switches over to cross product edge"), HIVECONVERTJOINMAXSHUFFLESIZE("hive.auto.convert.join.shuffle.max.size", 10000000000L, "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + "However, if it is on, and the predicted size of the larger input for a given join is greater \n" + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 011dadf495..4145baf25b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -113,14 +113,6 @@ MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf, llapInfo); joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo); - // not use map join in case of cross product - boolean cartesianProductEdgeEnabled = - HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); - if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) { - fallbackToMergeJoin(joinOp, context); - return null; - } - TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf); boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin(); @@ -988,6 +980,23 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c return -1; } + // only allow cross product in map joins if build side is 'small' + boolean cartesianProductEdgeEnabled = + HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) { + for (int i = 0 ; i < joinOp.getParentOperators().size(); i ++) { + if (i != bigTablePosition) { + Statistics parentStats = joinOp.getParentOperators().get(i).getStatistics(); + if (parentStats.getNumRows() > + HiveConf.getIntVar(context.conf, HiveConf.ConfVars.XPRODSMALLTABLEROWSTHRESHOLD)) { + // if any of smaller side is estimated to generate more than + // threshold rows we would disable mapjoin + return -1; + } + } + } + } + // We store the total memory that this MapJoin is going to use, // which is calculated as totalSize/buckets, with totalSize // equal to sum of small tables size. diff --git a/ql/src/test/queries/clientpositive/perf/query6.q b/ql/src/test/queries/clientpositive/perf/query6.q index d45045d135..aabce5202e 100644 --- a/ql/src/test/queries/clientpositive/perf/query6.q +++ b/ql/src/test/queries/clientpositive/perf/query6.q @@ -1,3 +1,5 @@ +set hive.auto.convert.join=true; +set hive.tez.cartesian-product.enabled=true; set hive.mapred.mode=nonstrict; -- start query 1 in stream 0 using template query6.tpl and seed 1819994127 explain diff --git a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out index 7a271fce92..a63979280e 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join_filters.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1_n5 -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 @@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in/000001_0' into tabl POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_input2_n0 -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n5 diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index c7bb1274cc..194fc5def3 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1_n2 -Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a JOIN myinput1_n2 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n2 diff --git a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out index 4638fcedd4..872f918efd 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin2.q.out @@ -57,7 +57,7 @@ POSTHOOK: Input: default@tbl_n1 #### A masked pattern was here #### false false true true true true false false -Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out index 5cccce9ff2..3c6270a052 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out @@ -527,7 +527,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_name = (select p_name from part_null_n1 where p_name is null) @@ -541,8 +541,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -558,13 +558,33 @@ STAGE PLANS: expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + input vertices: + 1 Reducer 3 + 2 Map 2 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: part_null_n1 @@ -589,31 +609,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 - 1 - 2 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index d5ea64f996..4e1698da4d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -642,7 +642,7 @@ b str two line1 four line2 six line3 -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 @@ -667,7 +667,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE), Map 4 (XPROD_EDGE), Map 5 (XPROD_EDGE) + Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -712,7 +712,7 @@ STAGE PLANS: dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 2 Map Operator Tree: TableScan alias: spam2 @@ -751,7 +751,7 @@ STAGE PLANS: dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: spam1 @@ -790,7 +790,7 @@ STAGE PLANS: dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: src1 @@ -806,16 +806,47 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 + 1 + 2 + 3 + Map Join Vectorization: + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col6 + input vertices: + 0 Map 1 + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4] + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex_n0 Execution mode: vectorized, llap Map Vectorization: enabled: true @@ -823,7 +854,7 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -831,34 +862,7 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:string, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - keys: - 0 - 1 - 2 - 3 - outputColumnNames: _col0, _col1, _col2, _col3, _col6 - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 1768000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orc_create_complex_n0 + scratchColumnTypeNames: [string, map, array, struct] Stage: Stage-2 Dependency Collection @@ -877,7 +881,7 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 10abe77323..6443678f89 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain vectorization expression select * from src @@ -26,10 +26,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Reducer 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -47,14 +47,58 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + className: VectorMapJoinInnerMultiKeyOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2, _col3, _col5 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 3:bigint, val 0), FilterExprAndExpr(children: SelectColumnIsNull(col 5:boolean), SelectColumnIsNotNull(col 0:string), FilterLongColGreaterEqualLongColumn(col 4:bigint, col 3:bigint))) + predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) + Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -66,7 +110,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -134,39 +178,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col2 = 0L) or (_col5 is null and _col0 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 500 Data size: 98584 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -193,7 +204,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -222,7 +233,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -269,7 +280,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from src where not key in @@ -298,7 +309,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from orcsrc where not key in @@ -315,7 +326,7 @@ order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@orcsrc #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from orcsrc where not key in diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index a111cd51e8..7c1780bf84 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -26,7 +26,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@myinput1_n1 POSTHOOK: Lineage: myinput1_n1.key SIMPLE [(myinput1_txt_n0)myinput1_txt_n0.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: myinput1_n1.value SIMPLE [(myinput1_txt_n0)myinput1_txt_n0.FieldSchema(name:value, type:int, comment:null), ] -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1_n1 diff --git a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out index d6bad24990..43661fa471 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[63][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: explain select * from ( select count(*) as h8_30_to_9 @@ -32,10 +32,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,7 +57,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) input vertices: - 1 Map 5 + 1 Map 4 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -76,7 +75,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) input vertices: - 1 Map 6 + 1 Map 5 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -89,7 +88,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src1 @@ -109,7 +108,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: src1 @@ -137,29 +136,24 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 3 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -178,7 +172,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[63][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: select * from ( select count(*) as h8_30_to_9 from src diff --git a/ql/src/test/results/clientpositive/perf/tez/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/query6.q.out index caa0e0e078..a57f72c11d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query6.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[171][tables = [$hdt$_5, $hdt$_6]] in Stage 'Reducer 13' is a cross product +Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Reducer 19' is a cross product PREHOOK: query: explain select a.ca_state state, count(*) cnt from customer_address a @@ -52,20 +52,19 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 16 <- Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 13 <- Reducer 16 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Reducer 10 <- Map 9 (SIMPLE_EDGE) Reducer 11 <- Map 9 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 12 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (ONE_TO_ONE_EDGE) -Reducer 22 <- Map 21 (SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) @@ -75,16 +74,16 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_225] - Limit [LIM_224] (rows=100 width=88) + File Output Operator [FS_227] + Limit [LIM_226] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_223] (rows=127775039 width=88) + Select Operator [SEL_225] (rows=127775039 width=88) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] - Filter Operator [FIL_221] (rows=127775039 width=88) + SHUFFLE [RS_224] + Filter Operator [FIL_223] (rows=127775039 width=88) predicate:(_col1 >= 10L) - Group By Operator [GBY_220] (rows=383325119 width=88) + Group By Operator [GBY_222] (rows=383325119 width=88) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_69] @@ -93,7 +92,7 @@ Stage-0 Output:["_col0","_col1"],aggregations:["count()"],keys:_col9 Merge Join Operator [MERGEJOIN_174] (rows=766650239 width=88) Conds:RS_64._col4=RS_65._col0(Inner),Output:["_col9"] - <-Reducer 14 [SIMPLE_EDGE] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0 Select Operator [SEL_54] (rows=169400 width=1436) @@ -101,86 +100,84 @@ Stage-0 Filter Operator [FIL_53] (rows=169400 width=1436) predicate:(_col4 > (1.2 * CAST( _col0 AS decimal(16,6)))) Merge Join Operator [MERGEJOIN_172] (rows=508200 width=1436) - Conds:RS_50._col1=RS_214._col2(Inner),Output:["_col0","_col3","_col4"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_214] + Conds:RS_213._col1=RS_216._col2(Inner),Output:["_col0","_col3","_col4"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_216] PartitionCols:_col2 - Select Operator [SEL_213] (rows=462000 width=1436) + Select Operator [SEL_215] (rows=462000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_212] (rows=462000 width=1436) + Filter Operator [FIL_214] (rows=462000 width=1436) predicate:(i_category is not null and i_item_sk is not null) TableScan [TS_44] (rows=462000 width=1436) default@item,i,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_category"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_50] + <-Reducer 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_213] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_171] (rows=231000 width=1445) + Map Join Operator [MAPJOIN_212] (rows=231000 width=1445) Conds:(Inner),Output:["_col0","_col1"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_205] - Select Operator [SEL_204] (rows=1 width=8) - Filter Operator [FIL_203] (rows=1 width=8) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_209] + Select Operator [SEL_208] (rows=1 width=8) + Filter Operator [FIL_207] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_202] (rows=1 width=8) + Group By Operator [GBY_206] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_201] - Group By Operator [GBY_200] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_205] + Group By Operator [GBY_204] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_199] (rows=9131 width=1119) - Group By Operator [GBY_198] (rows=9131 width=1119) + Select Operator [SEL_203] (rows=9131 width=1119) + Group By Operator [GBY_202] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] + SHUFFLE [RS_186] PartitionCols:_col0 - Group By Operator [GBY_183] (rows=18262 width=1119) + Group By Operator [GBY_184] (rows=18262 width=1119) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_181] (rows=18262 width=1119) + Select Operator [SEL_182] (rows=18262 width=1119) Output:["d_month_seq"] - Filter Operator [FIL_179] (rows=18262 width=1119) + Filter Operator [FIL_180] (rows=18262 width=1119) predicate:((d_moy = 2) and (d_year = 2000)) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_211] - Select Operator [SEL_210] (rows=231000 width=1436) - Output:["_col0","_col1"] - Group By Operator [GBY_209] (rows=231000 width=1436) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_208] - PartitionCols:_col0 - Group By Operator [GBY_207] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category - Filter Operator [FIL_206] (rows=462000 width=1436) - predicate:i_category is not null - TableScan [TS_23] (rows=462000 width=1436) - default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] + <-Select Operator [SEL_211] (rows=231000 width=1436) + Output:["_col0","_col1"] + Group By Operator [GBY_210] (rows=231000 width=1436) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_201] + PartitionCols:_col0 + Group By Operator [GBY_200] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category + Filter Operator [FIL_199] (rows=462000 width=1436) + predicate:i_category is not null + TableScan [TS_23] (rows=462000 width=1436) + default@item,j,Tbl:COMPLETE,Col:NONE,Output:["i_current_price","i_category"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col4 Merge Join Operator [MERGEJOIN_173] (rows=696954748 width=88) Conds:RS_61._col5=RS_62._col0(Inner),Output:["_col4","_col9"] - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_62] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_170] (rows=88000001 width=860) - Conds:RS_192._col1=RS_195._col0(Inner),Output:["_col0","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_192] + Conds:RS_193._col1=RS_196._col0(Inner),Output:["_col0","_col3"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] PartitionCols:_col1 - Select Operator [SEL_191] (rows=80000000 width=860) + Select Operator [SEL_192] (rows=80000000 width=860) Output:["_col0","_col1"] - Filter Operator [FIL_190] (rows=80000000 width=860) + Filter Operator [FIL_191] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_13] (rows=80000000 width=860) default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_196] PartitionCols:_col0 - Select Operator [SEL_194] (rows=40000000 width=1014) + Select Operator [SEL_195] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_193] (rows=40000000 width=1014) + Filter Operator [FIL_194] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_16] (rows=40000000 width=1014) default@customer_address,a,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] @@ -188,70 +185,70 @@ Stage-0 SHUFFLE [RS_61] PartitionCols:_col5 Merge Join Operator [MERGEJOIN_169] (rows=633595212 width=88) - Conds:RS_58._col0=RS_219._col0(Inner),Output:["_col4","_col5"] + Conds:RS_58._col0=RS_221._col0(Inner),Output:["_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_58] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_168] (rows=80353 width=1119) - Conds:RS_177._col1=RS_187._col0(Inner),Output:["_col0"] + Conds:RS_178._col1=RS_188._col0(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_177] + SHUFFLE [RS_178] PartitionCols:_col1 - Select Operator [SEL_176] (rows=73049 width=1119) + Select Operator [SEL_177] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_175] (rows=73049 width=1119) + Filter Operator [FIL_176] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq is not null) TableScan [TS_0] (rows=73049 width=1119) default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_187] + FORWARD [RS_188] PartitionCols:_col0 - Group By Operator [GBY_186] (rows=9131 width=1119) + Group By Operator [GBY_187] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + SHUFFLE [RS_185] PartitionCols:_col0 - Group By Operator [GBY_182] (rows=18262 width=1119) + Group By Operator [GBY_183] (rows=18262 width=1119) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_180] (rows=18262 width=1119) + Select Operator [SEL_181] (rows=18262 width=1119) Output:["d_month_seq"] - Filter Operator [FIL_178] (rows=18262 width=1119) + Filter Operator [FIL_179] (rows=18262 width=1119) predicate:((d_moy = 2) and (d_year = 2000) and d_month_seq is not null) Please refer to the previous TableScan [TS_3] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_218] (rows=575995635 width=88) + Select Operator [SEL_220] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_217] (rows=575995635 width=88) + Filter Operator [FIL_219] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_62_c_c_customer_sk_min) AND DynamicValue(RS_62_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_62_c_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_65_i_i_item_sk_min) AND DynamicValue(RS_65_i_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_65_i_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_58_d_d_date_sk_min) AND DynamicValue(RS_58_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_58_d_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null) TableScan [TS_10] (rows=575995635 width=88) default@store_sales,s,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_216] - Group By Operator [GBY_215] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=169400 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_54] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_197] - Group By Operator [GBY_196] (rows=1 width=12) + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_198] + Group By Operator [GBY_197] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=88000000)"] - <-Reducer 18 [CUSTOM_SIMPLE_EDGE] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] SHUFFLE [RS_130] Group By Operator [GBY_129] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=88000000)"] Select Operator [SEL_128] (rows=88000001 width=860) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_170] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 20 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_133] (rows=169400 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_54] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_189] - Group By Operator [GBY_188] (rows=1 width=12) + BROADCAST [RS_190] + Group By Operator [GBY_189] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_125] -- 2.14.3 (Apple Git-98)