diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index 1d23449..ee4d56c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -31,8 +31,6 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FetchTask; @@ -67,7 +65,6 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum; import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -76,7 +73,6 @@ import org.apache.thrift.TException; import com.google.common.collect.Lists; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** There is a set of queries which can be answered entirely from statistics stored in metastore. @@ -184,48 +180,65 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // 4. Compose rows and add it in FetchWork // 5. Delete GBY - RS - GBY - SEL from the pipeline. - TableScanOperator tsOp = (TableScanOperator) stack.get(0); - if(tsOp.getParentOperators() != null && tsOp.getParentOperators().size() > 0) { - // looks like a subq plan. - return null; - } - SelectOperator selOp = (SelectOperator)tsOp.getChildren().get(0); - for(ExprNodeDesc desc : selOp.getConf().getColList()) { - if (!(desc instanceof ExprNodeColumnDesc)) { - // Probably an expression, cant handle that + try { + TableScanOperator tsOp = (TableScanOperator) stack.get(0); + if(tsOp.getParentOperators() != null && tsOp.getParentOperators().size() > 0) { + // looks like a subq plan. return null; } - } - // Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS - // we need not to do any instanceof checks for following. - GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0); - FileSinkOperator fsOp = (FileSinkOperator)(gbyOp.getChildren().get(0). - getChildren().get(0).getChildren().get(0).getChildren().get(0)); - if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) { - // looks like a subq plan. - return null; - } - List aggrs = gbyOp.getConf().getAggregators(); + SelectOperator selOp = (SelectOperator)tsOp.getChildren().get(0); + for(ExprNodeDesc desc : selOp.getConf().getColList()) { + if (!(desc instanceof ExprNodeColumnDesc)) { + // Probably an expression, cant handle that + return null; + } + } + // Since we have done an exact match on TS-SEL-GBY-RS-GBY-SEL-FS + // we need not to do any instanceof checks for following. + GroupByOperator gbyOp = (GroupByOperator)selOp.getChildren().get(0); + ReduceSinkOperator rsOp = (ReduceSinkOperator)gbyOp.getChildren().get(0); + if (rsOp.getConf().getDistinctColumnIndices().size() > 0) { + // we can't handle distinct + return null; + } + + selOp = (SelectOperator)rsOp.getChildOperators().get(0).getChildOperators().get(0); + List aggrs = gbyOp.getConf().getAggregators(); + + if (!(selOp.getConf().getColList().size() == aggrs.size())) { + // all select columns must be aggregations + return null; + + } + FileSinkOperator fsOp = (FileSinkOperator)(selOp.getChildren().get(0)); + if (fsOp.getChildOperators() != null && fsOp.getChildOperators().size() > 0) { + // looks like a subq plan. + return null; + } + + Table tbl = pctx.getTopToTable().get(tsOp); + List oneRow = new ArrayList(); + List ois = new ArrayList(); - Table tbl = pctx.getTopToTable().get(tsOp); - List oneRow = new ArrayList(); - List ois = new ArrayList(); - try{ Hive hive = Hive.get(pctx.getConf()); for (AggregationDesc aggr : aggrs) { + if (aggr.getDistinct()) { + // our stats for NDV is approx, not accurate. + return null; + } if (aggr.getGenericUDAFName().equals(GenericUDAFSum.class.getAnnotation( Description.class).name())) { - if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){ - return null; - } - Long rowCnt = getRowCnt(pctx, tsOp, tbl); - if(rowCnt == null) { - return null; - } - oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0)) + if(!(aggr.getParameters().get(0) instanceof ExprNodeConstantDesc)){ + return null; + } + Long rowCnt = getRowCnt(pctx, tsOp, tbl); + if(rowCnt == null) { + return null; + } + oneRow.add(HiveDecimal.create(((ExprNodeConstantDesc) aggr.getParameters().get(0)) .getValue().toString()).multiply(HiveDecimal.create(rowCnt))); - ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( + ois.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( PrimitiveCategory.DECIMAL)); } else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( @@ -274,14 +287,14 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( } } else { Set parts = pctx.getPrunedPartitions( - tsOp.getConf().getAlias(), tsOp).getPartitions(); + tsOp.getConf().getAlias(), tsOp).getPartitions(); for (Partition part : parts) { if (!StatsSetupConst.areStatsUptoDate(part.getParameters())) { Log.debug("Stats for part : " + part.getSpec() + " are not upto date."); return null; } Long partRowCnt = Long.parseLong(part.getParameters() - .get(StatsSetupConst.ROW_COUNT)); + .get(StatsSetupConst.ROW_COUNT)); if (partRowCnt < 1) { Log.debug("Partition doesn't have upto date stats " + part.getSpec()); return null; @@ -328,66 +341,66 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( } ColumnStatisticsData statData = stats.get(0).getStatsData(); switch (type) { - case Integeral: - oneRow.add(statData.getLongStats().getHighValue()); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); - break; - case Double: - oneRow.add(statData.getDoubleStats().getHighValue()); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); - break; - default: - // unsupported type - Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + - "metadata optimizer for column : " + colName); - return null; + case Integeral: + oneRow.add(statData.getLongStats().getHighValue()); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); + break; + case Double: + oneRow.add(statData.getDoubleStats().getHighValue()); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); + break; + default: + // unsupported type + Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + + "metadata optimizer for column : " + colName); + return null; } } else { Set parts = pctx.getPrunedPartitions( - tsOp.getConf().getAlias(), tsOp).getPartitions(); + tsOp.getConf().getAlias(), tsOp).getPartitions(); switch (type) { - case Integeral: { - long maxVal = Long.MIN_VALUE; - Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); - if (result == null) { - return null; // logging inside - } - for (List statObj : result) { - ColumnStatisticsData statData = validateSingleColStat(statObj); - if (statData == null) return null; - long curVal = statData.getLongStats().getHighValue(); - maxVal = Math.max(maxVal, curVal); - } - oneRow.add(maxVal); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); - break; - } - case Double: { - double maxVal = Double.MIN_VALUE; - Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); - if (result == null) { - return null; // logging inside + case Integeral: { + long maxVal = Long.MIN_VALUE; + Collection> result = + verifyAndGetPartStats(hive, tbl, colName, parts); + if (result == null) { + return null; // logging inside + } + for (List statObj : result) { + ColumnStatisticsData statData = validateSingleColStat(statObj); + if (statData == null) return null; + long curVal = statData.getLongStats().getHighValue(); + maxVal = Math.max(maxVal, curVal); + } + oneRow.add(maxVal); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); + break; } - for (List statObj : result) { - ColumnStatisticsData statData = validateSingleColStat(statObj); - if (statData == null) return null; - double curVal = statData.getDoubleStats().getHighValue(); - maxVal = Math.max(maxVal, curVal); + case Double: { + double maxVal = Double.MIN_VALUE; + Collection> result = + verifyAndGetPartStats(hive, tbl, colName, parts); + if (result == null) { + return null; // logging inside + } + for (List statObj : result) { + ColumnStatisticsData statData = validateSingleColStat(statObj); + if (statData == null) return null; + double curVal = statData.getDoubleStats().getHighValue(); + maxVal = Math.max(maxVal, curVal); + } + oneRow.add(maxVal); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); + break; } - oneRow.add(maxVal); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); - break; - } - default: - Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + - "metadata optimizer for column : " + colName); - return null; + default: + Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + + "metadata optimizer for column : " + colName); + return null; } } } else if (aggr.getGenericUDAFName().equals(GenericUDAFMin.class.getAnnotation( @@ -401,67 +414,67 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( return null; } ColumnStatisticsData statData = hive.getMSC().getTableColumnStatistics( - tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName)) - .get(0).getStatsData(); + tbl.getDbName(), tbl.getTableName(), Lists.newArrayList(colName)) + .get(0).getStatsData(); switch (type) { - case Integeral: - oneRow.add(statData.getLongStats().getLowValue()); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); - break; - case Double: - oneRow.add(statData.getDoubleStats().getLowValue()); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); - break; - default: // unsupported type - Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + - "metadata optimizer for column : " + colName); - return null; + case Integeral: + oneRow.add(statData.getLongStats().getLowValue()); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); + break; + case Double: + oneRow.add(statData.getDoubleStats().getLowValue()); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); + break; + default: // unsupported type + Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + + "metadata optimizer for column : " + colName); + return null; } } else { Set parts = pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp).getPartitions(); switch(type) { - case Integeral: { - long minVal = Long.MAX_VALUE; - Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); - if (result == null) { - return null; // logging inside - } - for (List statObj : result) { - ColumnStatisticsData statData = validateSingleColStat(statObj); - if (statData == null) return null; - long curVal = statData.getLongStats().getLowValue(); - minVal = Math.min(minVal, curVal); - } - oneRow.add(minVal); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); - break; - } - case Double: { - double minVal = Double.MAX_VALUE; - Collection> result = - verifyAndGetPartStats(hive, tbl, colName, parts); - if (result == null) { - return null; // logging inside + case Integeral: { + long minVal = Long.MAX_VALUE; + Collection> result = + verifyAndGetPartStats(hive, tbl, colName, parts); + if (result == null) { + return null; // logging inside + } + for (List statObj : result) { + ColumnStatisticsData statData = validateSingleColStat(statObj); + if (statData == null) return null; + long curVal = statData.getLongStats().getLowValue(); + minVal = Math.min(minVal, curVal); + } + oneRow.add(minVal); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.LONG)); + break; } - for (List statObj : result) { - ColumnStatisticsData statData = validateSingleColStat(statObj); - if (statData == null) return null; - double curVal = statData.getDoubleStats().getLowValue(); - minVal = Math.min(minVal, curVal); + case Double: { + double minVal = Double.MAX_VALUE; + Collection> result = + verifyAndGetPartStats(hive, tbl, colName, parts); + if (result == null) { + return null; // logging inside + } + for (List statObj : result) { + ColumnStatisticsData statData = validateSingleColStat(statObj); + if (statData == null) return null; + double curVal = statData.getDoubleStats().getLowValue(); + minVal = Math.min(minVal, curVal); + } + oneRow.add(minVal); + ois.add(PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); + break; } - oneRow.add(minVal); - ois.add(PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(PrimitiveCategory.DOUBLE)); - break; - } - default: // unsupported type - Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + - "metadata optimizer for column : " + colName); - return null; + default: // unsupported type + Log.debug("Unsupported type: " + colDesc.getTypeString() + " encountered in " + + "metadata optimizer for column : " + colName); + return null; } } @@ -471,28 +484,29 @@ else if (aggr.getGenericUDAFName().equals(GenericUDAFCount.class.getAnnotation( return null; } } + + + List> allRows = new ArrayList>(); + allRows.add(oneRow); + + List colNames = new ArrayList(); + for (ColumnInfo colInfo: gbyOp.getSchema().getSignature()) { + colNames.add(colInfo.getInternalName()); + } + StandardStructObjectInspector sOI = ObjectInspectorFactory. + getStandardStructObjectInspector(colNames, ois); + FetchWork fWork = new FetchWork(allRows, sOI); + FetchTask fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf()); + fWork.setLimit(allRows.size()); + pctx.setFetchTask(fTask); + + return null; } catch (Exception e) { // this is best effort optimization, bail out in error conditions and // try generate and execute slower plan Log.debug("Failed to optimize using metadata optimizer", e); return null; } - - List> allRows = new ArrayList>(); - allRows.add(oneRow); - - List colNames = new ArrayList(); - for (ColumnInfo colInfo: gbyOp.getSchema().getSignature()) { - colNames.add(colInfo.getInternalName()); - } - StandardStructObjectInspector sOI = ObjectInspectorFactory. - getStandardStructObjectInspector(colNames, ois); - FetchWork fWork = new FetchWork(allRows, sOI); - FetchTask fTask = (FetchTask)TaskFactory.get(fWork, pctx.getConf()); - fWork.setLimit(allRows.size()); - pctx.setFetchTask(fTask); - - return null; } private ColumnStatisticsData validateSingleColStat(List statObj) { diff --git ql/src/test/queries/clientpositive/distinct_stats.q ql/src/test/queries/clientpositive/distinct_stats.q new file mode 100644 index 0000000..812cdf3 --- /dev/null +++ ql/src/test/queries/clientpositive/distinct_stats.q @@ -0,0 +1,21 @@ +set hive.stats.autogather=true; + +set hive.compute.query.using.stats=true; +create table t1 (a string, b string); + +insert into table t1 select * from src; + +analyze table t1 compute statistics for columns a,b; + +explain +select count(distinct b) from t1 group by a; + +explain +select distinct(b) from t1; + +explain +select a, count(*) from t1 group by a; + +drop table t1; +reset hive.compute.query.using.stats; +reset hive.stats.autogather; diff --git ql/src/test/results/clientpositive/distinct_stats.q.out ql/src/test/results/clientpositive/distinct_stats.q.out new file mode 100644 index 0000000..f715ea3 --- /dev/null +++ ql/src/test/results/clientpositive/distinct_stats.q.out @@ -0,0 +1,208 @@ +PREHOOK: query: create table t1 (a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table t1 (a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: insert into table t1 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into table t1 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table t1 compute statistics for columns a,b +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: analyze table t1 compute statistics for columns a,b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: Lineage: t1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select count(distinct b) from t1 group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(distinct b) from t1 group by a +POSTHOOK: type: QUERY +POSTHOOK: Lineage: t1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string), b (type: string) + outputColumnNames: a, b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT b) + keys: a (type: string), b (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: explain +select distinct(b) from t1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select distinct(b) from t1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: t1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: string) + outputColumnNames: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: b (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: explain +select a, count(*) from t1 group by a +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a, count(*) from t1 group by a +POSTHOOK: type: QUERY +POSTHOOK: Lineage: t1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: a (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: drop table t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: drop table t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.a SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]