diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 75390e7..8987690 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -10,6 +10,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.Description; @@ -30,6 +31,7 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ParseContext; @@ -42,12 +44,15 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** There is a set of queries which can be answered entirely from statistics stored in metastore. * Examples of such queries are count(*), count(a), max(a), min(b) etc. Hive already collects @@ -181,30 +186,29 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Hive hive = Hive.get(pctx.getConf()); for (AggregationDesc aggr : aggrs) { - if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( + if (aggr.getGenericUDAFName().equals(GenericUDAFSum.class.getAnnotation( Description.class).name())) { - long rowCnt = 0; + if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){ + return null; + } + Long rowCnt = getRowCnt(hive, tbl); + if(rowCnt == null) { + return null; + } + oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0)) + .getValue().toString()).multiply(HiveDecimal.create(rowCnt))); + ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( + PrimitiveCategory.DECIMAL)); + } + else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( + Description.class).name())) { + Long rowCnt = 0L; if ((aggr.getParameters().isEmpty() || aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)) { // Its either count (*) or count(1) case - if(tbl.isPartitioned()) { - for (Partition part : hive.getAllPartitionsOf(tbl)) { - long partRowCnt = Long.parseLong(part.getParameters() - .get(StatsSetupConst.ROW_COUNT)); - if (partRowCnt < 1) { - Log.debug("Partition doesn't have upto date stats " + part.getSpec()); - return null; - } - rowCnt += partRowCnt; - } - } else { // unpartitioned table - rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT)); - if (rowCnt < 1) { - // if rowCnt < 1 than its either empty table or table on which stats are not - // computed We assume the worse and don't attempt to optimize. - Log.debug("Table doesn't have upto date stats " + tbl.getTableName()); - return null; - } + rowCnt = getRowCnt(hive, tbl); + if(rowCnt == null) { + return null; } } else { // Its count(col) case @@ -442,5 +446,29 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } + + private Long getRowCnt (Hive hive, Table tbl) throws HiveException { + Long rowCnt = 0L; + if(tbl.isPartitioned()) { + for (Partition part : hive.getAllPartitionsOf(tbl)) { + long partRowCnt = Long.parseLong(part.getParameters() + .get(StatsSetupConst.ROW_COUNT)); + if (partRowCnt < 1) { + Log.debug("Partition doesn't have upto date stats " + part.getSpec()); + return null; + } + rowCnt += partRowCnt; + } + } else { // unpartitioned table + rowCnt = Long.parseLong(tbl.getProperty(StatsSetupConst.ROW_COUNT)); + if (rowCnt < 1) { + // if rowCnt < 1 than its either empty table or table on which stats are not + // computed We assume the worse and don't attempt to optimize. + Log.debug("Table doesn't have upto date stats " + tbl.getTableName()); + rowCnt = null; + } + } + return rowCnt; + } } } diff --git ql/src/test/queries/clientpositive/metadata_only_queries.q ql/src/test/queries/clientpositive/metadata_only_queries.q index 7cbd148..9cec4da 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries.q +++ ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -9,7 +9,7 @@ create table over10k( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) row format delimited @@ -51,9 +51,9 @@ insert into table stats_tbl_part partition (dt='2011') select * from over10k whe insert into table stats_tbl_part partition (dt='2012') select * from over10k where t>60; explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl; explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part; analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin; analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin; @@ -61,11 +61,11 @@ analyze table stats_tbl_part partition(dt='2011') compute statistics for columns analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin; explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl; -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl; explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part; -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part; +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part; explain select count(ts) from stats_tbl_part; diff --git ql/src/test/results/clientpositive/metadata_only_queries.q.out ql/src/test/results/clientpositive/metadata_only_queries.q.out index b6d149a..03ca3ec 100644 --- ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -7,7 +7,7 @@ PREHOOK: query: create table over10k( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) row format delimited @@ -22,7 +22,7 @@ POSTHOOK: query: create table over10k( d double, bo boolean, s string, - ts timestamp, + ts timestamp, dec decimal, bin binary) row format delimited @@ -232,10 +232,10 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] PREHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl PREHOOK: type: QUERY POSTHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] @@ -282,7 +282,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b)))))) + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b)))))) STAGE DEPENDENCIES: Stage-1 is a root stage @@ -313,6 +313,8 @@ STAGE PLANS: Group By Operator aggregations: expr: count() + expr: sum(1) + expr: sum(0.2) expr: count(1) expr: count(s) expr: count(bo) @@ -322,7 +324,7 @@ STAGE PLANS: expr: min(b) bucketGroup: false mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Reduce Output Operator sort order: tag: -1 @@ -332,7 +334,7 @@ STAGE PLANS: expr: _col1 type: bigint expr: _col2 - type: bigint + type: double expr: _col3 type: bigint expr: _col4 @@ -340,23 +342,29 @@ STAGE PLANS: expr: _col5 type: bigint expr: _col6 - type: int + type: bigint expr: _col7 type: bigint + expr: _col8 + type: int + expr: _col9 + type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) + expr: sum(VALUE._col1) + expr: sum(VALUE._col2) expr: count(VALUE._col3) expr: count(VALUE._col4) expr: count(VALUE._col5) - expr: max(VALUE._col6) - expr: min(VALUE._col7) + expr: count(VALUE._col6) + expr: count(VALUE._col7) + expr: max(VALUE._col8) + expr: min(VALUE._col9) bucketGroup: false mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Select Operator expressions: expr: _col0 @@ -364,7 +372,7 @@ STAGE PLANS: expr: _col1 type: bigint expr: _col2 - type: bigint + type: double expr: _col3 type: bigint expr: _col4 @@ -372,10 +380,14 @@ STAGE PLANS: expr: _col5 type: bigint expr: _col6 - type: int + type: bigint expr: _col7 type: bigint - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expr: _col8 + type: int + expr: _col9 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 File Output Operator compressed: false GlobalTableId: 0 @@ -389,10 +401,10 @@ STAGE PLANS: limit: -1 PREHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] @@ -439,7 +451,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b)))))) + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b)))))) STAGE DEPENDENCIES: Stage-1 is a root stage @@ -470,6 +482,8 @@ STAGE PLANS: Group By Operator aggregations: expr: count() + expr: sum(1) + expr: sum(0.2) expr: count(1) expr: count(s) expr: count(bo) @@ -479,7 +493,7 @@ STAGE PLANS: expr: min(b) bucketGroup: false mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Reduce Output Operator sort order: tag: -1 @@ -489,7 +503,7 @@ STAGE PLANS: expr: _col1 type: bigint expr: _col2 - type: bigint + type: double expr: _col3 type: bigint expr: _col4 @@ -497,23 +511,29 @@ STAGE PLANS: expr: _col5 type: bigint expr: _col6 - type: int + type: bigint expr: _col7 type: bigint + expr: _col8 + type: int + expr: _col9 + type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) - expr: count(VALUE._col1) - expr: count(VALUE._col2) + expr: sum(VALUE._col1) + expr: sum(VALUE._col2) expr: count(VALUE._col3) expr: count(VALUE._col4) expr: count(VALUE._col5) - expr: max(VALUE._col6) - expr: min(VALUE._col7) + expr: count(VALUE._col6) + expr: count(VALUE._col7) + expr: max(VALUE._col8) + expr: min(VALUE._col9) bucketGroup: false mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Select Operator expressions: expr: _col0 @@ -521,7 +541,7 @@ STAGE PLANS: expr: _col1 type: bigint expr: _col2 - type: bigint + type: double expr: _col3 type: bigint expr: _col4 @@ -529,10 +549,14 @@ STAGE PLANS: expr: _col5 type: bigint expr: _col6 - type: int + type: bigint expr: _col7 type: bigint - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expr: _col8 + type: int + expr: _col9 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 File Output Operator compressed: false GlobalTableId: 0 @@ -760,10 +784,10 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] PREHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl PREHOOK: type: QUERY POSTHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl POSTHOOK: type: QUERY POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] @@ -810,7 +834,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) STAGE DEPENDENCIES: Stage-0 is a root stage @@ -820,10 +844,10 @@ STAGE PLANS: Fetch Operator limit: 1 -PREHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl PREHOOK: type: QUERY #### A masked pattern was here #### -POSTHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl +POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl POSTHOOK: type: QUERY #### A masked pattern was here #### POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] @@ -870,12 +894,12 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -9999 9999 9999 9999 9999 9999 65791 0 99.9800033569336 0.0 +9999 9999 1999.8 9999 9999 9999 9999 9999 65791 0 99.9800033569336 0.0 PREHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain -select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part POSTHOOK: type: QUERY POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] POSTHOOK: Lineage: stats_tbl.bin SIMPLE [(over10k)over10k.FieldSchema(name:bin, type:binary, comment:null), ] @@ -922,7 +946,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] ABSTRACT SYNTAX TREE: - (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME stats_tbl_part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count)) (TOK_SELEXPR (TOK_FUNCTION sum 1)) (TOK_SELEXPR (TOK_FUNCTION sum 0.2)) (TOK_SELEXPR (TOK_FUNCTION count 1)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL s))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bo))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL bin))) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL si))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL i))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL b))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL f))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL d)))))) STAGE DEPENDENCIES: Stage-0 is a root stage @@ -932,10 +956,10 @@ STAGE PLANS: Fetch Operator limit: 1 -PREHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +PREHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part PREHOOK: type: QUERY #### A masked pattern was here #### -POSTHOOK: query: select count(*), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part +POSTHOOK: query: select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part POSTHOOK: type: QUERY #### A masked pattern was here #### POSTHOOK: Lineage: stats_tbl.b SIMPLE [(over10k)over10k.FieldSchema(name:b, type:bigint, comment:null), ] @@ -982,7 +1006,7 @@ POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).s SIMPLE [(over10k)over10k. POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).si SIMPLE [(over10k)over10k.FieldSchema(name:si, type:smallint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).t SIMPLE [(over10k)over10k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: stats_tbl_part PARTITION(dt=2012).ts SIMPLE [(over10k)over10k.FieldSchema(name:ts, type:timestamp, comment:null), ] -9489 9489 9489 9489 9489 9489 65791 0 99.9800033569336 0.0 +9489 9489 1897.8 9489 9489 9489 9489 9489 65791 0 99.9800033569336 0.0 PREHOOK: query: explain select count(ts) from stats_tbl_part PREHOOK: type: QUERY POSTHOOK: query: explain select count(ts) from stats_tbl_part