diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 71f73802c2..25e9cd0482 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -115,12 +115,7 @@ public void initialize(HiveConf hiveConf) { transformations.add(new ConstantPropagate()); } - if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONING) && - HiveConf.getVar(hiveConf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && - HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTDYNAMICPARTITION) && - !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { - transformations.add(new SortedDynPartitionOptimizer()); - } + transformations.add(new SortedDynPartitionTimeGranularityOptimizer()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 2dc2351793..6668f47af7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -55,19 +55,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.FileSinkDesc; -import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; -import org.apache.hadoop.hive.ql.plan.SelectDesc; -import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.*; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -174,13 +162,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // unlink connection between FS and its parent fsParent = fsOp.getParentOperators().get(0); + DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx(); + List partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema()); + if(!shouldDo(partitionPositions, fsParent)) { + return null; + } + fsParent.getChildOperators().clear(); - DynamicPartitionCtx dpCtx = fsOp.getConf().getDynPartCtx(); - int numBuckets = destTable.getNumBuckets(); // if enforce bucketing/sorting is disabled numBuckets will not be set. // set the number of buckets here to ensure creation of empty buckets + int numBuckets = destTable.getNumBuckets(); dpCtx.setNumBuckets(numBuckets); // Get the positions for partition, bucket and sort columns @@ -232,7 +225,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (int i : sortPositions) LOG.debug("sort position " + i); for (int i : sortOrder) LOG.debug("sort order " + i); for (int i : sortNullOrder) LOG.debug("sort null order " + i); - List partitionPositions = getPartitionPositions(dpCtx, fsParent.getSchema()); // update file sink descriptor fsOp.getConf().setMultiFileSpray(false); @@ -651,6 +643,28 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, return cols; } - } + private boolean shouldDo(List partitionPos, Operator fsParent) { + List colStats = fsParent.getStatistics().getColumnStats(); + if(colStats == null || colStats.isEmpty()) { + return true; //TODO: if statistics are not available?? + } + + long partCardinality = 1; + // compute cardinality for partition columns + + for(Integer idx:partitionPos) { + ColStatistics partStats = colStats.get(idx); + //TODO: should check the state of column stats (for partition only) + partCardinality = partCardinality * partStats.getCountDistint(); + } + + long MAX_WRITERS = 32; + if(partCardinality <= MAX_WRITERS) { + return true; + } + return false; + } + + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index fdc963506c..6dae4750ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -72,16 +72,8 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; +import org.apache.hadoop.hive.ql.optimizer.*; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; -import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; -import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization; -import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc; -import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; -import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize; -import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; -import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer; -import org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; @@ -170,6 +162,16 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, // Update bucketing version of ReduceSinkOp if needed updateBucketingVersionForUpgrade(procCtx); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + // run Sorted dynamic partition optimization + if(HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONING) && + HiveConf.getVar(procCtx.conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equals("nonstrict") && + !HiveConf.getBoolVar(procCtx.conf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { + //runSortDynamicPartOpt(procCtx, inputs, outputs); + new SortedDynPartitionOptimizer().transform(procCtx.parseContext); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // run the optimizations that use stats for optimization runStatsDependentOptimizations(procCtx, inputs, outputs); diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index ec95389d08..198447c592 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -146,7 +146,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -156,7 +155,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -232,7 +230,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: llap LLAP IO: no inputs @@ -250,7 +247,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: llap @@ -258,7 +254,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -331,7 +326,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -341,7 +335,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -413,7 +406,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -423,7 +415,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -563,7 +554,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -573,7 +563,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -649,7 +638,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: llap LLAP IO: no inputs @@ -667,7 +655,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: llap @@ -675,7 +662,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -748,7 +734,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: llap LLAP IO: no inputs @@ -758,7 +743,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -830,7 +814,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -840,7 +823,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -1263,52 +1245,52 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part PREHOOK: Input: default@over1k_part@ds=foo/t=27 PREHOOK: Input: default@over1k_part@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part POSTHOOK: Input: default@over1k_part@ds=foo/t=27 POSTHOOK: Input: default@over1k_part@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 38 PREHOOK: query: select count(*) from over1k_part_limit PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_limit PREHOOK: Input: default@over1k_part_limit@ds=foo/t=27 PREHOOK: Input: default@over1k_part_limit@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_limit POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_limit POSTHOOK: Input: default@over1k_part_limit@ds=foo/t=27 POSTHOOK: Input: default@over1k_part_limit@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 20 PREHOOK: query: select count(*) from over1k_part_buck PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck PREHOOK: Input: default@over1k_part_buck@t=27 PREHOOK: Input: default@over1k_part_buck@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck POSTHOOK: Input: default@over1k_part_buck@t=27 POSTHOOK: Input: default@over1k_part_buck@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 38 PREHOOK: query: select count(*) from over1k_part_buck_sort PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort PREHOOK: Input: default@over1k_part_buck_sort@t=27 PREHOOK: Input: default@over1k_part_buck_sort@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck_sort POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort POSTHOOK: Input: default@over1k_part_buck_sort@t=27 POSTHOOK: Input: default@over1k_part_buck_sort@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 38 PREHOOK: query: create table over1k_part2( si smallint, @@ -1465,6 +1447,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1481,20 +1464,30 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: tinyint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col4 (type: tinyint) + key expressions: _col1 (type: int) + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -1564,7 +1557,6 @@ STAGE PLANS: key expressions: _col2 (type: int) sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) Execution mode: llap LLAP IO: no inputs @@ -1589,7 +1581,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: llap @@ -1597,7 +1588,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -1775,6 +1765,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1794,7 +1785,7 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ - Map-reduce partition columns: _col0 (type: tinyint) + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs @@ -1810,15 +1801,26 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.over1k_part2 + Reduce Output Operator + key expressions: _col4 (type: tinyint) + sort order: + + Map-reduce partition columns: _col4 (type: tinyint) + value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.over1k_part2 Stage: Stage-2 Dependency Collection @@ -1946,13 +1948,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2 PREHOOK: Input: default@over1k_part2@ds=foo/t=27 PREHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2 POSTHOOK: Input: default@over1k_part2@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 405 65536 4294967508 82.24 foo 27 457 65570 4294967464 81.58 foo 27 256 65599 4294967383 89.55 foo 27 @@ -1977,13 +1979,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2 PREHOOK: Input: default@over1k_part2@ds=foo/t=27 PREHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2 POSTHOOK: Input: default@over1k_part2@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 19 PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k_n3 where t is null or t=27 order by i PREHOOK: type: QUERY @@ -2087,13 +2089,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2 PREHOOK: Input: default@over1k_part2@ds=foo/t=27 PREHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2 POSTHOOK: Input: default@over1k_part2@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 405 65536 4294967508 82.24 foo 27 457 65570 4294967464 81.58 foo 27 256 65599 4294967383 89.55 foo 27 @@ -2118,13 +2120,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part2 PREHOOK: Input: default@over1k_part2@ds=foo/t=27 PREHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part2 POSTHOOK: Input: default@over1k_part2@ds=foo/t=27 POSTHOOK: Input: default@over1k_part2@ds=foo/t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 19 PREHOOK: query: create table over1k_part_buck_sort2( si smallint, @@ -2304,7 +2306,6 @@ STAGE PLANS: key expressions: _col4 (type: tinyint), _bucket_number (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2314,7 +2315,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED @@ -2448,13 +2448,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2 PREHOOK: Input: default@over1k_part_buck_sort2@t=27 PREHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part_buck_sort2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2 POSTHOOK: Input: default@over1k_part_buck_sort2@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 329 65778 4294967451 6.63 27 367 65675 4294967518 12.32 27 278 65622 4294967516 25.67 27 @@ -2479,13 +2479,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2 PREHOOK: Input: default@over1k_part_buck_sort2@t=27 PREHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck_sort2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2 POSTHOOK: Input: default@over1k_part_buck_sort2@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 19 PREHOOK: query: insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k_n3 where t is null or t=27 PREHOOK: type: QUERY @@ -2587,13 +2587,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2 PREHOOK: Input: default@over1k_part_buck_sort2@t=27 PREHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select * from over1k_part_buck_sort2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2 POSTHOOK: Input: default@over1k_part_buck_sort2@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 329 65778 4294967451 6.63 27 367 65675 4294967518 12.32 27 278 65622 4294967516 25.67 27 @@ -2618,13 +2618,13 @@ PREHOOK: type: QUERY PREHOOK: Input: default@over1k_part_buck_sort2 PREHOOK: Input: default@over1k_part_buck_sort2@t=27 PREHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select count(*) from over1k_part_buck_sort2 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part_buck_sort2 POSTHOOK: Input: default@over1k_part_buck_sort2@t=27 POSTHOOK: Input: default@over1k_part_buck_sort2@t=__HIVE_DEFAULT_PARTITION__ -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 19 PREHOOK: query: create table over1k_part3( si smallint, @@ -2680,7 +2680,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2690,7 +2689,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -2764,7 +2762,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2774,7 +2771,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -2848,7 +2844,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2858,7 +2853,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -2932,7 +2926,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -2942,7 +2935,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -3016,7 +3008,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -3026,7 +3017,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -3100,7 +3090,6 @@ STAGE PLANS: key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float) Execution mode: llap LLAP IO: no inputs @@ -3110,7 +3099,6 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: bigint), VALUE._col2 (type: float), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED @@ -3384,7 +3372,7 @@ PREHOOK: Input: default@over1k_part3@s=tom brown/t=27/i=65675 PREHOOK: Input: default@over1k_part3@s=ulysses underhill/t=27/i=65570 PREHOOK: Input: default@over1k_part3@s=wendy van buren/t=27/i=65680 PREHOOK: Input: default@over1k_part3@s=xavier quirinius/t=27/i=65599 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select sum(hash(*)) from over1k_part3 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part3 @@ -3404,7 +3392,7 @@ POSTHOOK: Input: default@over1k_part3@s=tom brown/t=27/i=65675 POSTHOOK: Input: default@over1k_part3@s=ulysses underhill/t=27/i=65570 POSTHOOK: Input: default@over1k_part3@s=wendy van buren/t=27/i=65680 POSTHOOK: Input: default@over1k_part3@s=xavier quirinius/t=27/i=65599 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 17814641134 PREHOOK: query: drop table over1k_part3 PREHOOK: type: DROPTABLE @@ -3562,7 +3550,7 @@ PREHOOK: Input: default@over1k_part3@s=tom brown/t=27/i=65675 PREHOOK: Input: default@over1k_part3@s=ulysses underhill/t=27/i=65570 PREHOOK: Input: default@over1k_part3@s=wendy van buren/t=27/i=65680 PREHOOK: Input: default@over1k_part3@s=xavier quirinius/t=27/i=65599 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: select sum(hash(*)) from over1k_part3 POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k_part3 @@ -3582,5 +3570,5 @@ POSTHOOK: Input: default@over1k_part3@s=tom brown/t=27/i=65675 POSTHOOK: Input: default@over1k_part3@s=ulysses underhill/t=27/i=65570 POSTHOOK: Input: default@over1k_part3@s=wendy van buren/t=27/i=65680 POSTHOOK: Input: default@over1k_part3@s=xavier quirinius/t=27/i=65599 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### 17814641134