diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 73e596e..0550186 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.lib.TypeRule; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx; import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization; import org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruning; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; @@ -287,8 +288,8 @@ private void runDynamicPartitionPruning(OptimizeSparkProcContext procCtx) // need a new run of the constant folding because we might have created lots // of "and true and true" conditions. - if(procCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { - new ConstantPropagate().transform(parseContext); + if (procCtx.getConf().getBoolVar(HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { + new ConstantPropagate(ConstantPropagateProcCtx.ConstantPropagateOption.SHORTCUT).transform(parseContext); } } diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 63a9548..f697d63 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -2560,9 +2560,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '2008-04-08' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '2008-04-08' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -2599,7 +2599,7 @@ STAGE PLANS: Reducer 5 Reduce Operator Tree: Group By Operator - keys: '2008-04-08' (type: string) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -5395,16 +5395,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '2008-04-08' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '2008-04-08' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - keys: '2008-04-08' (type: string) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 699fcc6..c253b23 100644 --- a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -14,8 +14,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2008-04-08 2008-04-09 +2008-04-08 PREHOOK: query: select distinct hr from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -48,7 +48,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +70,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -191,11 +192,9 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_double_hour POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] -PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -230,16 +229,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -258,6 +257,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -348,7 +348,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -367,6 +367,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -454,12 +455,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 -PREHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -495,9 +494,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Map 8 @@ -523,17 +522,17 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: hr Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -552,6 +551,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -682,8 +682,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -703,6 +703,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -828,11 +829,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 500 -PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -867,24 +866,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 - Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: srcpart_date_hour - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string), hr (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 @@ -895,16 +880,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: hr Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -922,6 +907,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1012,7 +998,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1031,6 +1017,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1118,11 +1105,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 500 -PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -1157,16 +1142,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1185,6 +1170,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1275,7 +1261,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1294,6 +1280,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1381,11 +1368,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 0 -PREHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -1420,16 +1405,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: UDFToDouble(hr) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1448,6 +1433,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1563,16 +1549,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: (UDFToDouble(hr) * 2.0) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1591,6 +1577,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1681,7 +1668,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1700,6 +1687,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1790,7 +1778,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1809,6 +1797,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -1933,16 +1922,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: UDFToString((UDFToDouble(hr) * 2.0)) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -1961,6 +1950,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2049,11 +2039,9 @@ POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2065,7 +2053,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 5 <- Map 4 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -2079,6 +2067,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2093,10 +2082,11 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '2008-04-08' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '2008-04-08' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -2134,7 +2124,7 @@ STAGE PLANS: Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: '2008-04-08' (type: string) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2174,11 +2164,9 @@ POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -PREHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +PREHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY -POSTHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +POSTHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2205,6 +2193,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2289,11 +2278,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 1500 -PREHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +PREHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY -POSTHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +POSTHOOK: query: EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2328,24 +2315,10 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 - Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: srcpart_date_hour - filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string), hr (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 @@ -2356,16 +2329,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: hr Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2383,6 +2356,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2461,11 +2435,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 -PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2500,16 +2472,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2528,6 +2500,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2624,16 +2597,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 4 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2670,11 +2643,12 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -2710,11 +2684,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2749,16 +2721,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2776,6 +2748,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -2799,7 +2772,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -2835,12 +2808,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -2876,17 +2847,45 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds + target work: Map 1 + Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: srcpart_hour + filterExpr: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean) + Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: hr (string) + partition key expr: hr + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE target work: Map 1 Execution mode: vectorized Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -2905,6 +2904,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -3031,8 +3031,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -3164,11 +3164,9 @@ POSTHOOK: Input: default@srcpart_date POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 0 -PREHOOK: query: -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -3179,12 +3177,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 1) - Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2) - Reducer 13 <- Map 12 (GROUP, 1) + Reducer 11 <- Map 10 (GROUP, 1) + Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 12 + Map 10 Map Operator Tree: TableScan alias: srcpart @@ -3202,7 +3199,8 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 9 + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -3220,79 +3218,82 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 10 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 13 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4) Reducer 3 <- Reducer 2 (GROUP, 1) Reducer 5 <- Map 4 (GROUP, 1) - Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3303,6 +3304,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -3321,7 +3323,8 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 7 + Execution mode: vectorized + Map 6 Map Operator Tree: TableScan alias: srcpart @@ -3339,11 +3342,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3380,30 +3384,20 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -3411,16 +3405,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3458,12 +3455,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 1) - Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2) - Reducer 13 <- Map 12 (GROUP, 1) + Reducer 11 <- Map 10 (GROUP, 1) + Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 12 + Map 10 Map Operator Tree: TableScan alias: srcpart @@ -3481,7 +3477,8 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 9 + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -3499,79 +3496,82 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 10 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 13 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Reducer 2 (GROUP, 4) Reducer 5 <- Map 4 (GROUP, 1) - Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3582,6 +3582,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 4 Map Operator Tree: TableScan @@ -3600,7 +3601,8 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 7 + Execution mode: vectorized + Map 6 Map Operator Tree: TableScan alias: srcpart @@ -3618,11 +3620,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3661,30 +3664,20 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -3692,16 +3685,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3725,8 +3721,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2008-04-08 2008-04-09 +2008-04-08 PREHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) @@ -3740,15 +3736,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 1) - Reducer 13 <- Reducer 12 (GROUP, 2), Reducer 15 (GROUP, 2) - Reducer 15 <- Map 14 (GROUP, 1) - Reducer 17 <- Map 11 (GROUP, 1) - Reducer 18 <- Reducer 17 (GROUP, 2), Reducer 20 (GROUP, 2) - Reducer 20 <- Map 14 (GROUP, 1) + Reducer 11 <- Map 10 (GROUP, 1) + Reducer 13 <- Map 12 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 11 + Map 10 Map Operator Tree: TableScan alias: srcpart @@ -3766,7 +3758,8 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 14 + Execution mode: vectorized + Map 12 Map Operator Tree: TableScan alias: srcpart @@ -3784,138 +3777,110 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 12 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 15 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reducer 17 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reducer 18 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 4 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 4 - Reducer 20 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 4 Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 1) - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 1 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 4) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 4), Reducer 2 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4), Reducer 9 (PARTITION-LEVEL SORT, 4) Reducer 7 <- Map 6 (GROUP, 1) - Reducer 8 <- Reducer 10 (GROUP, 2), Reducer 7 (GROUP, 2) + Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: ds (type: string) @@ -3927,6 +3892,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Map 6 Map Operator Tree: TableScan @@ -3945,7 +3911,8 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 9 + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -3963,24 +3930,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 10 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -3998,7 +3948,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -4011,19 +3961,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized Reduce Operator Tree: @@ -4032,29 +3969,40 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4082,11 +4030,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 2008-04-08 2008-04-09 2008-04-09 -PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4125,9 +4071,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Local Work: @@ -4167,6 +4113,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -4219,12 +4166,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 -PREHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -4264,9 +4209,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Local Work: @@ -4298,9 +4243,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: hr Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Local Work: @@ -4349,6 +4294,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -4405,11 +4351,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 500 -PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4448,9 +4392,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Select Operator expressions: _col2 (type: string) @@ -4462,9 +4406,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: hr Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Local Work: @@ -4503,6 +4447,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -4555,11 +4500,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 500 -PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4598,9 +4541,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Local Work: @@ -4640,6 +4583,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -4664,17 +4608,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; - --- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; - --- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4713,9 +4649,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: UDFToDouble(hr) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Local Work: @@ -4755,6 +4691,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -4839,9 +4776,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (string) partition key expr: (UDFToDouble(hr) * 2.0) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Execution mode: vectorized Local Work: @@ -4881,6 +4818,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -4934,11 +4872,9 @@ POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4949,7 +4885,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 4 <- Map 3 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 3 @@ -4966,17 +4902,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '2008-04-08' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '2008-04-08' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - keys: '2008-04-08' (type: string) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -5019,6 +4956,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -5066,11 +5004,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 -PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -5109,9 +5045,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Local Work: @@ -5151,6 +5087,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -5181,8 +5118,7 @@ POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -5190,39 +5126,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: (date = '2008-04-08') (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (date = '2008-04-08') (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 3 - Execution mode: vectorized - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: Map 3 Map Operator Tree: TableScan @@ -5236,6 +5139,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + Execution mode: vectorized Local Work: Map Reduce Local Work @@ -5260,7 +5164,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Outer Join0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -5301,16 +5205,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -5318,39 +5219,6 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: srcpart_date - filterExpr: (date = '2008-04-08') (type: boolean) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (date = '2008-04-08') (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Execution mode: vectorized - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: Map 1 Map Operator Tree: TableScan @@ -5364,6 +5232,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + Execution mode: vectorized Local Work: Map Reduce Local Work @@ -5388,7 +5257,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Right Outer Join0 to 1 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -5429,12 +5298,10 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY -POSTHOOK: query: -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -5474,9 +5341,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Execution mode: vectorized Local Work: @@ -5498,6 +5365,20 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: hr (string) + partition key expr: hr + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -5545,6 +5426,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Local Work: Map Reduce Local Work Reducer 2 @@ -5723,19 +5605,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) --- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; - --- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +PREHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY -POSTHOOK: query: -- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) --- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; - --- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) +POSTHOOK: query: EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -5746,12 +5618,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 5 <- Reducer 4 (GROUP, 2), Reducer 7 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 11 <- Map 10 (GROUP, 1) + Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 10 Map Operator Tree: TableScan alias: srcpart @@ -5761,7 +5632,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE @@ -5769,7 +5640,8 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: srcpart @@ -5779,7 +5651,7 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ds) + aggregations: max(ds) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE @@ -5787,109 +5659,152 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 4 Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reducer 5 + Reducer 11 Execution mode: vectorized - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 7 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: ds (string) + partition key expr: ds + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + target work: Map 1 Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 5 (PARTITION-LEVEL SORT, 4), Reducer 7 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Reducer 2 (GROUP, 4) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart + filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(ds) + mode: hash outputColumnNames: _col0 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ds) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5904,6 +5819,48 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -5927,15 +5884,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -2008-04-08 2008-04-09 -PREHOOK: query: -- different file format -create table srcpart_orc (key int, value string) partitioned by (ds string, hr int) stored as orc +2008-04-08 +PREHOOK: query: create table srcpart_orc (key int, value string) partitioned by (ds string, hr int) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcpart_orc -POSTHOOK: query: -- different file format -create table srcpart_orc (key int, value string) partitioned by (ds string, hr int) stored as orc +POSTHOOK: query: create table srcpart_orc (key int, value string) partitioned by (ds string, hr int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_orc @@ -6007,9 +5962,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: ds (string) partition key expr: ds Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE - target column name: ds target work: Map 1 Select Operator expressions: UDFToDouble(_col2) (type: double) @@ -6021,9 +5976,9 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator + Target column: hr (int) partition key expr: UDFToDouble(hr) Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE - target column name: hr target work: Map 1 Local Work: Map Reduce Local Work