From 06b284761944eb821bad43122b28c53e17c06ae3 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Wed, 27 Jul 2016 19:21:11 -0700 Subject: [PATCH] HIVE-14367 : Estimated size for constant nulls is 0 --- .../stats/annotation/StatsRulesProcFactory.java | 2 +- .../apache/hadoop/hive/ql/stats/StatsUtils.java | 45 ++++---------- .../hadoop/hive/ql/udf/generic/GenericUDAFMax.java | 11 +++- .../hadoop/hive/ql/udf/generic/GenericUDAFMin.java | 10 ++++ .../test/queries/clientpositive/vector_coalesce.q | 1 + .../results/clientpositive/decimal_stats.q.out | 12 ++-- .../clientpositive/tez/vector_coalesce.q.out | 70 +++++++++++----------- 7 files changed, 75 insertions(+), 76 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 42cbc14..ab07fb6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1171,7 +1171,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ColStatistics cs = new ColStatistics(colName, colType); cs.setCountDistint(stats.getNumRows()); cs.setNumNulls(0); - cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType)); + cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType)); aggColStats.add(cs); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 7a15904..18efe23 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -424,7 +424,7 @@ private static void addParitionColumnStats(HiveConf conf, List neededCol long numPartitions = getNDVPartitionColumn(partList.getPartitions(), ci.getInternalName()); partCS.setCountDistint(numPartitions); - partCS.setAvgColLen(StatsUtils.getAvgColLenOfVariableLengthTypes(conf, + partCS.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), partCS.getColumnType())); partCS.setRange(getRangePartitionColumn(partList.getPartitions(), ci.getInternalName(), ci.getType().getTypeName(), conf.getVar(ConfVars.DEFAULTPARTITIONNAME))); @@ -543,7 +543,7 @@ public static int estimateRowSizeFromSchema(HiveConf conf, List sche || colTypeLowerCase.startsWith(serdeConstants.MAP_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.STRUCT_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.UNION_TYPE_NAME)) { - avgRowSize += getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase); + avgRowSize += getAvgColLenOf(conf, oi, colTypeLowerCase); } else { avgRowSize += getAvgColLenOfFixedLengthTypes(colTypeLowerCase); } @@ -805,7 +805,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab * - column type * @return raw data size */ - public static long getAvgColLenOfVariableLengthTypes(HiveConf conf, ObjectInspector oi, + public static long getAvgColLenOf(HiveConf conf, ObjectInspector oi, String colType) { long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH); @@ -872,7 +872,7 @@ public static long getAvgColLenOfVariableLengthTypes(HiveConf conf, ObjectInspec return getSizeOfComplexTypes(conf, oi); } - return 0; + throw new IllegalArgumentException("Size requested for unknown type: " + colType + " OI: " + oi.getTypeName()); } /** @@ -895,10 +895,10 @@ public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) { if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) { - int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase); + int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase); result += JavaDataModel.get().lengthForStringOfLength(avgColLen); } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) { - int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colTypeLowerCase); + int avgColLen = (int) getAvgColLenOf(conf, oi, colTypeLowerCase); result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen); } else { result += getAvgColLenOfFixedLengthTypes(colTypeLowerCase); @@ -1003,7 +1003,7 @@ public static long getAvgColLenOfFixedLengthTypes(String colType) { } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { return JavaDataModel.get().lengthOfDecimal(); } else { - return 0; + throw new IllegalArgumentException("Size requested for unknown type: " + colType); } } @@ -1225,7 +1225,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis double avgColSize = 0; long countDistincts = 0; long numNulls = 0; - ObjectInspector oi = null; + ObjectInspector oi = end.getWritableObjectInspector(); long numRows = parentStats.getNumRows(); if (end instanceof ExprNodeColumnDesc) { @@ -1244,7 +1244,6 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis // virtual columns colType = encd.getTypeInfo().getTypeName(); countDistincts = numRows; - oi = encd.getWritableObjectInspector(); } else { // clone the column stats and return @@ -1263,16 +1262,13 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis // constant projection ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; - // null projection + colName = encd.getName(); + colType = encd.getTypeString(); if (encd.getValue() == null) { - colName = encd.getName(); - colType = serdeConstants.VOID_TYPE_NAME; + // null projection numNulls = numRows; } else { - colName = encd.getName(); - colType = encd.getTypeString(); countDistincts = 1; - oi = encd.getWritableObjectInspector(); } } else if (end instanceof ExprNodeGenericFuncDesc) { @@ -1281,7 +1277,6 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis colName = engfd.getName(); colType = engfd.getTypeString(); countDistincts = getNDVFor(engfd, numRows, parentStats); - oi = engfd.getWritableObjectInspector(); } else if (end instanceof ExprNodeColumnListDesc) { // column list @@ -1289,7 +1284,6 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis colName = Joiner.on(",").join(encd.getCols()); colType = serdeConstants.LIST_TYPE_NAME; countDistincts = numRows; - oi = encd.getWritableObjectInspector(); } else if (end instanceof ExprNodeFieldDesc) { // field within complex type @@ -1297,25 +1291,12 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis colName = enfd.getFieldName(); colType = enfd.getTypeString(); countDistincts = numRows; - oi = enfd.getWritableObjectInspector(); } else { throw new IllegalArgumentException("not supported expr type " + end.getClass()); } colType = colType.toLowerCase(); - if (colType.equals(serdeConstants.STRING_TYPE_NAME) - || colType.equals(serdeConstants.BINARY_TYPE_NAME) - || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) - || colType.startsWith(serdeConstants.CHAR_TYPE_NAME) - || colType.startsWith(serdeConstants.LIST_TYPE_NAME) - || colType.startsWith(serdeConstants.MAP_TYPE_NAME) - || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME) - || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) { - avgColSize = getAvgColLenOfVariableLengthTypes(conf, oi, colType); - } else { - avgColSize = getAvgColLenOfFixedLengthTypes(colType); - } - + avgColSize = getAvgColLenOf(conf, oi, colType); ColStatistics colStats = new ColStatistics(colName, colType); colStats.setAvgColLen(avgColSize); colStats.setCountDistint(countDistincts); @@ -1456,7 +1437,7 @@ public static long getDataSizeFromColumnStats(long numRows, List for (ColStatistics cs : colStats) { if (cs != null) { String colTypeLowerCase = cs.getColumnType().toLowerCase(); - long nonNullCount = numRows - cs.getNumNulls(); + long nonNullCount = Math.max(numRows - cs.getNumNulls(),1); double sizeOf = 0; if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java index 43b23fa..763bfd5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationType; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -79,8 +80,13 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) } /** class for storing the current max value */ + @AggregationType(estimable = true) static class MaxAgg extends AbstractAggregationBuffer { Object o; + @Override + public int estimate() { + return JavaDataModel.PRIMITIVES2; + } } @Override @@ -138,7 +144,7 @@ public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { /* * Based on the Paper by Daniel Lemire: Streaming Max-Min filter using no more * than 3 comparisons per elem. - * + * * 1. His algorithm works on fixed size windows up to the current row. For row * 'i' and window 'w' it computes the min/max for window (i-w, i). 2. The core * idea is to keep a queue of (max, idx) tuples. A tuple in the queue @@ -150,7 +156,7 @@ public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { * element at the front of the queue has reached its max range of influence; * i.e. frontTuple.idx + w > i. If yes we can remove it from the queue. - on * the ith step o/p the front of the queue as the max for the ith entry. - * + * * Here we modify the algorithm: 1. to handle window's that are of the form * (i-p, i+f), where p is numPreceding,f = numFollowing - we start outputing * rows only after receiving f rows. - the formula for 'influence range' of an @@ -192,6 +198,7 @@ public int estimate() { + (3 * JavaDataModel.PRIMITIVES1); } + @Override protected void reset() { maxChain.clear(); super.reset(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java index 70e0db1..132bad6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java @@ -26,7 +26,9 @@ import org.apache.hadoop.hive.ql.plan.ptf.BoundaryDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax.MaxStreamingFixedWindow; +import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.FullMapEqualComparer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -76,8 +78,13 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) } /** class for storing the current max value */ + @AggregationType(estimable = true) static class MinAgg extends AbstractAggregationBuffer { Object o; + @Override + public int estimate() { + return JavaDataModel.PRIMITIVES2; + } } @Override @@ -139,14 +146,17 @@ public MinStreamingFixedWindow(GenericUDAFEvaluator wrappedEval, super(wrappedEval, wFrmDef); } + @Override protected ObjectInspector inputOI() { return ((GenericUDAFMinEvaluator) wrappedEval).inputOI; } + @Override protected ObjectInspector outputOI() { return ((GenericUDAFMinEvaluator) wrappedEval).outputOI; } + @Override protected boolean removeLast(Object in, Object last) { return isLess(in, last); } diff --git a/ql/src/test/queries/clientpositive/vector_coalesce.q b/ql/src/test/queries/clientpositive/vector_coalesce.q index b1a7766..cfba7be 100644 --- a/ql/src/test/queries/clientpositive/vector_coalesce.q +++ b/ql/src/test/queries/clientpositive/vector_coalesce.q @@ -1,3 +1,4 @@ +set hive.stats.fetch.column.stats=true; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out index 6bcf3fa..5af58fb 100644 --- a/ql/src/test/results/clientpositive/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -63,27 +63,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: decimal(4,2)), u (type: decimal(5,0)), v (type: decimal(10,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0)) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 112000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 22512 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 22400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 22512 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out index e65245e..18a1744 100644 --- a/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_coalesce.q.out @@ -30,18 +30,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1045828 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cdouble is null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3114 Data size: 265050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cint (type: int), cfloat (type: float), csmallint (type: smallint), COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string) outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3114 Data size: 819434 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string) sort order: +++++ - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3114 Data size: 819434 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reducer 2 @@ -50,13 +50,13 @@ STAGE PLANS: Select Operator expressions: null (type: double), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: float), KEY.reducesinkkey3 (type: smallint), KEY.reducesinkkey4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3114 Data size: 246466 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 758 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 758 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -122,18 +122,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 146776 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ctinyint is null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3115 Data size: 37208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cint (type: int), COALESCE(null,(cdouble + log2(cint)),0) (type: double) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3115 Data size: 52832 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: double), _col2 (type: int), _col3 (type: double) sort order: +++ - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3115 Data size: 52832 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reducer 2 @@ -142,13 +142,13 @@ STAGE PLANS: Select Operator expressions: null (type: tinyint), KEY.reducesinkkey0 (type: double), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3115 Data size: 27916 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -214,15 +214,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110076 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reducer 2 @@ -231,13 +231,13 @@ STAGE PLANS: Select Operator expressions: null (type: float), null (type: bigint), 0.0 (type: float) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 3172 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -303,18 +303,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ctimestamp1 is not null or ctimestamp2 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 983040 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), COALESCE(ctimestamp1,ctimestamp2) (type: timestamp) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: timestamp) sort order: +++ - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reducer 2 @@ -323,13 +323,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: timestamp), KEY.reducesinkkey2 (type: timestamp) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1474560 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -395,15 +395,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 110076 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cfloat is null and cbigint is null) (type: boolean) - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Reducer 2 @@ -412,13 +412,13 @@ STAGE PLANS: Select Operator expressions: null (type: float), null (type: bigint), null (type: float) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 790 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -- 1.7.12.4 (Apple Git-37)