diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index f273d25..4bcf6bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -26,8 +26,6 @@ import java.util.Map.Entry; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -80,6 +78,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -500,6 +500,181 @@ private long getMaxNulls(Statistics stats, ExprNodeDesc pred) { return maxNoNulls; } + private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFunc) { + long numRows = stats.getNumRows(); + GenericUDF udf = genFunc.getGenericUDF(); + + ExprNodeColumnDesc columnDesc; + ExprNodeConstantDesc constantDesc; + boolean upperBound; + String boundValue = null; + if (genFunc.getChildren().get(0) instanceof ExprNodeColumnDesc && + genFunc.getChildren().get(1) instanceof ExprNodeConstantDesc) { + columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(0); + constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(1); + // Comparison to null will always return false + if (constantDesc.getValue() == null) { + return 0; + } + if (udf instanceof GenericUDFOPEqualOrGreaterThan || + udf instanceof GenericUDFOPGreaterThan) { + boundValue = constantDesc.getValue().toString(); + upperBound = false; + } else { + boundValue = constantDesc.getValue().toString(); + upperBound = true; + } + } else if (genFunc.getChildren().get(1) instanceof ExprNodeColumnDesc && + genFunc.getChildren().get(0) instanceof ExprNodeConstantDesc) { + columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(1); + constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(0); + // Comparison to null will always return false + if (constantDesc.getValue() == null) { + return 0; + } + if (udf instanceof GenericUDFOPEqualOrGreaterThan || + udf instanceof GenericUDFOPGreaterThan) { + boundValue = constantDesc.getValue().toString(); + upperBound = true; + } else { + boundValue = constantDesc.getValue().toString(); + upperBound = false; + } + } else { + // default + return numRows / 3; + } + + ColStatistics cs = stats.getColumnStatisticsFromColName(columnDesc.getColumn()); + if (cs != null && cs.getRange() != null && + cs.getRange().maxValue != null && cs.getRange().minValue != null) { + String colTypeLowerCase = columnDesc.getTypeString().toLowerCase(); + try { + if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)) { + byte value = new Byte(boundValue); + byte maxValue = cs.getRange().maxValue.byteValue(); + byte minValue = cs.getRange().minValue.byteValue(); + if (upperBound) { + if (maxValue < value) { + return numRows; + } + if (minValue > value) { + return 0; + } + } else { + if (minValue > value) { + return numRows; + } + if (maxValue < value) { + return 0; + } + } + } else if (colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)) { + short value = new Short(boundValue); + short maxValue = cs.getRange().maxValue.shortValue(); + short minValue = cs.getRange().minValue.shortValue(); + if (upperBound) { + if (maxValue < value) { + return numRows; + } + if (minValue > value) { + return 0; + } + } else { + if (minValue > value) { + return numRows; + } + if (maxValue < value) { + return 0; + } + } + } else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { + // Date is an integer internally + int value = new Integer(boundValue); + int maxValue = cs.getRange().maxValue.intValue(); + int minValue = cs.getRange().minValue.intValue(); + if (upperBound) { + if (maxValue < value) { + return numRows; + } + if (minValue > value) { + return 0; + } + } else { + if (minValue > value) { + return numRows; + } + if (maxValue < value) { + return 0; + } + } + } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) { + long value = new Long(boundValue); + long maxValue = cs.getRange().maxValue.longValue(); + long minValue = cs.getRange().minValue.longValue(); + if (upperBound) { + if (maxValue < value) { + return numRows; + } + if (minValue > value) { + return 0; + } + } else { + if (minValue > value) { + return numRows; + } + if (maxValue < value) { + return 0; + } + } + } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) { + float value = new Float(boundValue); + float maxValue = cs.getRange().maxValue.floatValue(); + float minValue = cs.getRange().minValue.floatValue(); + if (upperBound) { + if (maxValue < value) { + return numRows; + } + if (minValue > value) { + return 0; + } + } else { + if (minValue > value) { + return numRows; + } + if (maxValue < value) { + return 0; + } + } + } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) { + double value = new Double(boundValue); + double maxValue = cs.getRange().maxValue.doubleValue(); + double minValue = cs.getRange().minValue.doubleValue(); + if (upperBound) { + if (maxValue < value) { + return numRows; + } + if (minValue > value) { + return 0; + } + } else { + if (minValue > value) { + return numRows; + } + if (maxValue < value) { + return 0; + } + } + } + } catch (NumberFormatException nfe) { + return numRows / 3; + } + } + // default + return numRows / 3; + } + private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List neededCols, FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException { @@ -578,9 +753,10 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, } else if (udf instanceof GenericUDFOPNotEqual) { return numRows; } else if (udf instanceof GenericUDFOPEqualOrGreaterThan - || udf instanceof GenericUDFOPEqualOrLessThan || udf instanceof GenericUDFOPGreaterThan + || udf instanceof GenericUDFOPEqualOrLessThan + || udf instanceof GenericUDFOPGreaterThan || udf instanceof GenericUDFOPLessThan) { - return numRows / 3; + return evaluateComparator(stats, genFunc); } else if (udf instanceof GenericUDFOPNotNull) { return evaluateNotNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPNull) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 9d139ba..d8acf94 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -724,6 +724,8 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab } } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); + cs.setRange(csd.getDateStats().getLowValue().getDaysSinceEpoch(), + csd.getDateStats().getHighValue().getDaysSinceEpoch()); } else { // Columns statistics for complex datatypes are not supported yet return null; diff --git ql/src/test/queries/clientpositive/annotate_stats_filter.q ql/src/test/queries/clientpositive/annotate_stats_filter.q index 436c053..a352a77 100644 --- ql/src/test/queries/clientpositive/annotate_stats_filter.q +++ ql/src/test/queries/clientpositive/annotate_stats_filter.q @@ -83,9 +83,17 @@ explain select * from loc_orc where (year=2001 and year is null) or (state='CA') -- numRows: 1 rawDataSize: 102 explain select * from loc_orc where (year=2001 or year is null) and (state='CA'); --- all inequality conditions rows/3 is the rules --- numRows: 2 rawDataSize: 204 +-- inequality conditions falling out of range. total or zero (converted to one) +-- numRows: 1 rawDataSize: 102 +-- numRows: 8 rawDataSize: 804 explain select * from loc_orc where locid < 30; explain select * from loc_orc where locid > 30; explain select * from loc_orc where locid <= 30; explain select * from loc_orc where locid >= 30; + +-- all inequality conditions falling within range. rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain select * from loc_orc where locid < 3; +explain select * from loc_orc where locid > 3; +explain select * from loc_orc where locid <= 3; +explain select * from loc_orc where locid >= 3; diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index b09ad03..7e697f1 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -856,12 +856,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: -- all inequality conditions rows/3 is the rules --- numRows: 2 rawDataSize: 204 +PREHOOK: query: -- inequality conditions falling out of range. total or zero (converted to one) +-- numRows: 1 rawDataSize: 102 +-- numRows: 8 rawDataSize: 804 explain select * from loc_orc where locid < 30 PREHOOK: type: QUERY -POSTHOOK: query: -- all inequality conditions rows/3 is the rules --- numRows: 2 rawDataSize: 204 +POSTHOOK: query: -- inequality conditions falling out of range. total or zero (converted to one) +-- numRows: 1 rawDataSize: 102 +-- numRows: 8 rawDataSize: 804 explain select * from loc_orc where locid < 30 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -877,14 +879,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid < 30) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -913,14 +915,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 30) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -949,14 +951,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid <= 30) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -985,6 +987,154 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid >= 30) (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- all inequality conditions falling within range. rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain select * from loc_orc where locid < 3 +PREHOOK: type: QUERY +POSTHOOK: query: -- all inequality conditions falling within range. rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain select * from loc_orc where locid < 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid < 3) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid > 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from loc_orc where locid > 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 3) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid <= 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from loc_orc where locid <= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid <= 3) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select * from loc_orc where locid >= 3 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from loc_orc where locid >= 3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid >= 3) (type: boolean) Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) diff --git ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index ff95252..64a57fe 100644 --- ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -342,31 +342,31 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s_store_sk > 0) (type: boolean) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: ss Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ss_store_sk > 0) (type: boolean) - Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -375,10 +375,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -472,16 +472,16 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((s_floor_space > 0) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: ss Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE @@ -505,10 +505,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 393 Data size: 1572 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 964 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -688,46 +688,46 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 3856 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ss_store_sk > 1000) (type: boolean) - Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 333 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s_store_sk > 1000) (type: boolean) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s_store_sk > 1000) (type: boolean) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -738,14 +738,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 213 Data size: 852 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -789,16 +789,16 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE @@ -824,14 +824,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 508 Data size: 2032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 821 Data size: 3284 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index 186f7af..131cf6a 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -493,11 +493,11 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: locid (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select locid,year from loc_orc where locid>0 and year='2001' @@ -517,11 +517,11 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: locid (type: int), '2001' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001' @@ -541,10 +541,10 @@ STAGE PLANS: Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (locid > 0) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: locid (type: int), '2001' (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 644 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index a3ff85c..938d183 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -388,9 +388,9 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_35] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6 - Select Operator [SEL_34] (rows=2 width=16) + Select Operator [SEL_34] (rows=3 width=16) Output:["_col2","_col6"] - Filter Operator [FIL_33] (rows=2 width=16) + Filter Operator [FIL_33] (rows=3 width=16) predicate:((_col1 > 0) or (_col6 >= 0)) Merge Join Operator [MERGEJOIN_52] (rows=3 width=16) Conds:RS_30._col0=RS_31._col0(Inner),Output:["_col1","_col2","_col6"] @@ -491,14 +491,14 @@ Stage-0 Output:["_col2","_col6"] Filter Operator [FIL_30] (rows=1 width=16) predicate:(((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) - Merge Join Operator [MERGEJOIN_48] (rows=2 width=16) + Merge Join Operator [MERGEJOIN_48] (rows=3 width=16) Conds:RS_27._col0=RS_28._col0(Inner),Output:["_col1","_col2","_col6"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Select Operator [SEL_26] (rows=5 width=71) + Select Operator [SEL_26] (rows=18 width=79) Output:["_col0","_col1"] - Filter Operator [FIL_46] (rows=5 width=71) + Filter Operator [FIL_46] (rows=18 width=79) predicate:((c_int > 0) and key is not null) TableScan [TS_24] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] @@ -664,14 +664,14 @@ Stage-0 Output:["_col2","_col6"] Filter Operator [FIL_29] (rows=1 width=20) predicate:(((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) - Merge Join Operator [MERGEJOIN_42] (rows=3 width=20) + Merge Join Operator [MERGEJOIN_42] (rows=4 width=20) Conds:RS_25._col0=RS_26._col0(Outer),RS_25._col0=RS_27._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0 - Select Operator [SEL_24] (rows=6 width=74) + Select Operator [SEL_24] (rows=20 width=80) Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=6 width=74) + Filter Operator [FIL_41] (rows=20 width=80) predicate:(c_int > 0) TableScan [TS_22] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] @@ -744,9 +744,9 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_29] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6 - Select Operator [SEL_28] (rows=2 width=16) + Select Operator [SEL_28] (rows=3 width=16) Output:["_col2","_col6"] - Filter Operator [FIL_27] (rows=2 width=16) + Filter Operator [FIL_27] (rows=3 width=16) predicate:((_col1 > 0) or (_col6 >= 0)) Merge Join Operator [MERGEJOIN_43] (rows=3 width=16) Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col1","_col2","_col6"] @@ -1201,9 +1201,9 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_19] - Select Operator [SEL_18] (rows=14 width=101) + Select Operator [SEL_18] (rows=21 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_17] (rows=14 width=101) + Filter Operator [FIL_17] (rows=21 width=101) predicate:((_col1 > 0) or (_col6 >= 0)) Merge Join Operator [MERGEJOIN_28] (rows=21 width=101) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] @@ -1257,9 +1257,9 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_14] - Select Operator [SEL_13] (rows=12 width=101) + Select Operator [SEL_13] (rows=24 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_12] (rows=12 width=101) + Filter Operator [FIL_12] (rows=24 width=101) predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2)) Merge Join Operator [MERGEJOIN_19] (rows=72 width=101) Conds:RS_8._col0=RS_9._col0(Right Outer),RS_8._col0=RS_10._col0(Right Outer),Output:["_col1","_col2","_col3","_col4","_col6"] @@ -1487,9 +1487,9 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_41] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6 - Select Operator [SEL_40] (rows=2 width=16) + Select Operator [SEL_40] (rows=3 width=16) Output:["_col2","_col6"] - Filter Operator [FIL_39] (rows=2 width=16) + Filter Operator [FIL_39] (rows=3 width=16) predicate:((_col1 > 0) or (_col6 >= 0)) Merge Join Operator [MERGEJOIN_61] (rows=3 width=16) Conds:RS_36._col0=RS_37._col0(Inner),Output:["_col1","_col2","_col6"]