diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 186d5809c8..b3a3ba6d1b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -528,8 +528,6 @@ COLUMNSTATSCOLLECTOR_INVALID_COLUMN(30012, "Column statistics are not supported " + "for partition columns"), - STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"), - STATSAGGREGATOR_SOURCETASK_NULL(30014, "SourceTask of StatsTask should not be null"), STATSAGGREGATOR_CONNECTION_ERROR(30015, "Stats aggregator of type {0} cannot be connected to", true), diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 8cedbe5322..8425911db7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -242,11 +242,7 @@ private int convertJoinBucketMapJoin(JoinOperator joinOp, MapJoinOperator mapJoi // Not adding other stats (e.g., # of rows, col stats) since only data size is used here for (TableScanOperator root : OperatorUtils.findOperatorsUpstream(parentOp, TableScanOperator.class)) { if (currInputStat == null) { - try { currInputStat = root.getStatistics().clone(); - } catch (CloneNotSupportedException e) { - throw new RuntimeException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); - } } else { currInputStat.addBasicStats(root.getStatistics()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 86b87248ba..fcfdce95fc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -138,8 +138,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, LOG.debug("[0] STATS-" + tsop.toString() + " (" + table.getTableName() + "): " + stats.extendedToString()); } - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } catch (HiveException e) { LOG.debug("Failed to retrieve stats ",e); throw new SemanticException(e); @@ -177,41 +175,33 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Statistics stats = null; if (parentStats != null) { - try { - stats = parentStats.clone(); - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); - } + stats = parentStats.clone(); } - try { - if (satisfyPrecondition(parentStats)) { - // this will take care of mapping between input column names and output column names. The - // returned column stats will have the output column names. - List colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, - sop.getColumnExprMap(), sop.getSchema()); - stats.setColumnStats(colStats); - // in case of select(*) the data size does not change - if (!sop.getConf().isSelectStar() && !sop.getConf().isSelStarNoCompute()) { - long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats); - stats.setDataSize(dataSize); - } - sop.setStatistics(stats); + if (satisfyPrecondition(parentStats)) { + // this will take care of mapping between input column names and output column names. The + // returned column stats will have the output column names. + List colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + sop.getColumnExprMap(), sop.getSchema()); + stats.setColumnStats(colStats); + // in case of select(*) the data size does not change + if (!sop.getConf().isSelectStar() && !sop.getConf().isSelStarNoCompute()) { + long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats); + stats.setDataSize(dataSize); + } + sop.setStatistics(stats); - if (LOG.isDebugEnabled()) { - LOG.debug("[0] STATS-" + sop.toString() + ": " + stats.extendedToString()); - } - } else { - if (parentStats != null) { - sop.setStatistics(parentStats.clone()); + if (LOG.isDebugEnabled()) { + LOG.debug("[0] STATS-" + sop.toString() + ": " + stats.extendedToString()); + } + } else { + if (parentStats != null) { + sop.setStatistics(parentStats.clone()); - if (LOG.isDebugEnabled()) { - LOG.debug("[1] STATS-" + sop.toString() + ": " + parentStats.extendedToString()); - } + if (LOG.isDebugEnabled()) { + LOG.debug("[1] STATS-" + sop.toString() + ": " + parentStats.extendedToString()); } } - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } return null; } @@ -276,51 +266,48 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, neededCols = tsop.getNeededColumns(); } - try { - if (parentStats != null) { - ExprNodeDesc pred = fop.getConf().getPredicate(); - // evaluate filter expression and update statistics - long newNumRows = evaluateExpression(parentStats, pred, aspCtx, - neededCols, fop, parentStats.getNumRows()); - Statistics st = parentStats.clone(); - - if (satisfyPrecondition(parentStats)) { - - // update statistics based on column statistics. - // OR conditions keeps adding the stats independently, this may - // result in number of rows getting more than the input rows in - // which case stats need not be updated - if (newNumRows <= parentStats.getNumRows()) { - updateStats(st, newNumRows, true, fop); - } + if (parentStats != null) { + ExprNodeDesc pred = fop.getConf().getPredicate(); - if (LOG.isDebugEnabled()) { - LOG.debug("[0] STATS-" + fop.toString() + ": " + st.extendedToString()); - } - } else { + // evaluate filter expression and update statistics + long newNumRows = evaluateExpression(parentStats, pred, aspCtx, + neededCols, fop, parentStats.getNumRows()); + Statistics st = parentStats.clone(); - // update only the basic statistics in the absence of column statistics - if (newNumRows <= parentStats.getNumRows()) { - updateStats(st, newNumRows, false, fop); - } + if (satisfyPrecondition(parentStats)) { - if (LOG.isDebugEnabled()) { - LOG.debug("[1] STATS-" + fop.toString() + ": " + st.extendedToString()); - } + // update statistics based on column statistics. + // OR conditions keeps adding the stats independently, this may + // result in number of rows getting more than the input rows in + // which case stats need not be updated + if (newNumRows <= parentStats.getNumRows()) { + updateStats(st, newNumRows, true, fop); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("[0] STATS-" + fop.toString() + ": " + st.extendedToString()); + } + } else { + + // update only the basic statistics in the absence of column statistics + if (newNumRows <= parentStats.getNumRows()) { + updateStats(st, newNumRows, false, fop); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("[1] STATS-" + fop.toString() + ": " + st.extendedToString()); } - fop.setStatistics(st); - aspCtx.setAndExprStats(null); } - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + fop.setStatistics(st); + aspCtx.setAndExprStats(null); } return null; } protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List neededCols, - Operator op, long currNumRows) throws CloneNotSupportedException, SemanticException { + Operator op, long currNumRows) throws SemanticException { long newNumRows = 0; Statistics andStats = null; @@ -505,7 +492,7 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, long currNumRow } private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, - List neededCols, Operator op) throws SemanticException, CloneNotSupportedException { + List neededCols, Operator op) throws SemanticException { final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; final boolean invert = Boolean.TRUE.equals( ((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not) @@ -538,7 +525,7 @@ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currN private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List neededCols, Operator op) - throws CloneNotSupportedException, SemanticException { + throws SemanticException { long numRows = currNumRows; @@ -837,7 +824,7 @@ private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFun private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List neededCols, - Operator op, long currNumRows) throws CloneNotSupportedException, SemanticException { + Operator op, long currNumRows) throws SemanticException { long numRows = currNumRows; @@ -1066,210 +1053,206 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, containsGroupingSet + " sizeOfGroupingSet: " + sizeOfGroupingSet); } - try { - // satisfying precondition means column statistics is available - if (satisfyPrecondition(parentStats)) { + // satisfying precondition means column statistics is available + if (satisfyPrecondition(parentStats)) { - // check if map side aggregation is possible or not based on column stats - hashAgg = checkMapSideAggregation(gop, colStats, conf); + // check if map side aggregation is possible or not based on column stats + hashAgg = checkMapSideAggregation(gop, colStats, conf); - if (LOG.isDebugEnabled()) { - LOG.debug("STATS-" + gop.toString() + " hashAgg: " + hashAgg); - } + if (LOG.isDebugEnabled()) { + LOG.debug("STATS-" + gop.toString() + " hashAgg: " + hashAgg); + } - stats = parentStats.clone(); - stats.setColumnStats(colStats); - long ndvProduct = 1; - final long parentNumRows = stats.getNumRows(); + stats = parentStats.clone(); + stats.setColumnStats(colStats); + long ndvProduct = 1; + final long parentNumRows = stats.getNumRows(); - // compute product of distinct values of grouping columns - for (ColStatistics cs : colStats) { - if (cs != null) { - long ndv = cs.getCountDistint(); - if (cs.getNumNulls() > 0) { - ndv = StatsUtils.safeAdd(ndv, 1); - } - ndvProduct = StatsUtils.safeMult(ndvProduct, ndv); + // compute product of distinct values of grouping columns + for (ColStatistics cs : colStats) { + if (cs != null) { + long ndv = cs.getCountDistint(); + if (cs.getNumNulls() > 0) { + ndv = StatsUtils.safeAdd(ndv, 1); + } + ndvProduct = StatsUtils.safeMult(ndvProduct, ndv); + } else { + if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) { + // the column must be an aggregate column inserted by GBY. We + // don't have to account for this column when computing product + // of NDVs + continue; } else { - if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) { - // the column must be an aggregate column inserted by GBY. We - // don't have to account for this column when computing product - // of NDVs - continue; - } else { - // partial column statistics on grouping attributes case. - // if column statistics on grouping attribute is missing, then - // assume worst case. - // GBY rule will emit half the number of rows if ndvProduct is 0 - ndvProduct = 0; - } - break; + // partial column statistics on grouping attributes case. + // if column statistics on grouping attribute is missing, then + // assume worst case. + // GBY rule will emit half the number of rows if ndvProduct is 0 + ndvProduct = 0; } + break; } + } - // if ndvProduct is 0 then column stats state must be partial and we are missing - // column stats for a group by column - if (ndvProduct == 0) { - ndvProduct = parentNumRows / 2; + // if ndvProduct is 0 then column stats state must be partial and we are missing + // column stats for a group by column + if (ndvProduct == 0) { + ndvProduct = parentNumRows / 2; - if (LOG.isDebugEnabled()) { - LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as some column does not" + - " have stats. ndvProduct changed to: " + ndvProduct); - } + if (LOG.isDebugEnabled()) { + LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as some column does not" + + " have stats. ndvProduct changed to: " + ndvProduct); } + } - if (interReduction) { - - if (hashAgg) { - if (containsGroupingSet) { - // Case 4: column stats, hash aggregation, grouping sets - cardinality = Math.min( - (StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet)) / 2, - StatsUtils.safeMult(StatsUtils.safeMult(ndvProduct, parallelism), sizeOfGroupingSet)); + if (interReduction) { - if (LOG.isDebugEnabled()) { - LOG.debug("[Case 4] STATS-" + gop.toString() + ": cardinality: " + cardinality); - } - } else { - // Case 3: column stats, hash aggregation, NO grouping sets - cardinality = Math.min(parentNumRows / 2, StatsUtils.safeMult(ndvProduct, parallelism)); + if (hashAgg) { + if (containsGroupingSet) { + // Case 4: column stats, hash aggregation, grouping sets + cardinality = Math.min( + (StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet)) / 2, + StatsUtils.safeMult(StatsUtils.safeMult(ndvProduct, parallelism), sizeOfGroupingSet)); - if (LOG.isDebugEnabled()) { - LOG.debug("[Case 3] STATS-" + gop.toString() + ": cardinality: " + cardinality); - } + if (LOG.isDebugEnabled()) { + LOG.debug("[Case 4] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } else { - if (containsGroupingSet) { - // Case 6: column stats, NO hash aggregation, grouping sets - cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet); + // Case 3: column stats, hash aggregation, NO grouping sets + cardinality = Math.min(parentNumRows / 2, StatsUtils.safeMult(ndvProduct, parallelism)); - if (LOG.isDebugEnabled()) { - LOG.debug("[Case 6] STATS-" + gop.toString() + ": cardinality: " + cardinality); - } - } else { - // Case 5: column stats, NO hash aggregation, NO grouping sets - cardinality = parentNumRows; - - if (LOG.isDebugEnabled()) { - LOG.debug("[Case 5] STATS-" + gop.toString() + ": cardinality: " + cardinality); - } + if (LOG.isDebugEnabled()) { + LOG.debug("[Case 3] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } } else { - - // in reduce side GBY, we don't know if the grouping set was present or not. so get it - // from map side GBY - GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class); - if (mGop != null) { - containsGroupingSet = mGop.getConf().isGroupingSetsPresent(); - } - if (containsGroupingSet) { - // Case 8: column stats, grouping sets - sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size(); - cardinality = Math.min(parentNumRows, StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet)); + // Case 6: column stats, NO hash aggregation, grouping sets + cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet); if (LOG.isDebugEnabled()) { - LOG.debug("[Case 8] STATS-" + gop.toString() + ": cardinality: " + cardinality); + LOG.debug("[Case 6] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } else { - // Case 9: column stats, NO grouping sets - cardinality = Math.min(parentNumRows, ndvProduct); + // Case 5: column stats, NO hash aggregation, NO grouping sets + cardinality = parentNumRows; if (LOG.isDebugEnabled()) { - LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " + cardinality); + LOG.debug("[Case 5] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } } - - // update stats, but don't update NDV as it will not change - updateStats(stats, cardinality, true, gop, false); } else { - // NO COLUMN STATS - if (parentStats != null) { + // in reduce side GBY, we don't know if the grouping set was present or not. so get it + // from map side GBY + GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class); + if (mGop != null) { + containsGroupingSet = mGop.getConf().isGroupingSetsPresent(); + } + + if (containsGroupingSet) { + // Case 8: column stats, grouping sets + sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size(); + cardinality = Math.min(parentNumRows, StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet)); - stats = parentStats.clone(); - final long parentNumRows = stats.getNumRows(); + if (LOG.isDebugEnabled()) { + LOG.debug("[Case 8] STATS-" + gop.toString() + ": cardinality: " + cardinality); + } + } else { + // Case 9: column stats, NO grouping sets + cardinality = Math.min(parentNumRows, ndvProduct); - // if we don't have column stats, we just assume hash aggregation is disabled - if (interReduction) { + if (LOG.isDebugEnabled()) { + LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " + cardinality); + } + } + } - if (containsGroupingSet) { - // Case 2: NO column stats, NO hash aggregation, grouping sets - cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet); + // update stats, but don't update NDV as it will not change + updateStats(stats, cardinality, true, gop, false); + } else { - if (LOG.isDebugEnabled()) { - LOG.debug("[Case 2] STATS-" + gop.toString() + ": cardinality: " + cardinality); - } - } else { - // Case 1: NO column stats, NO hash aggregation, NO grouping sets - cardinality = parentNumRows; + // NO COLUMN STATS + if (parentStats != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("[Case 1] STATS-" + gop.toString() + ": cardinality: " + cardinality); - } + stats = parentStats.clone(); + final long parentNumRows = stats.getNumRows(); + + // if we don't have column stats, we just assume hash aggregation is disabled + if (interReduction) { + + if (containsGroupingSet) { + // Case 2: NO column stats, NO hash aggregation, grouping sets + cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet); + + if (LOG.isDebugEnabled()) { + LOG.debug("[Case 2] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } else { - - // Case 7: NO column stats - cardinality = parentNumRows / 2; + // Case 1: NO column stats, NO hash aggregation, NO grouping sets + cardinality = parentNumRows; if (LOG.isDebugEnabled()) { - LOG.debug("[Case 7] STATS-" + gop.toString() + ": cardinality: " + cardinality); + LOG.debug("[Case 1] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } + } else { - updateStats(stats, cardinality, false, gop); - } - } + // Case 7: NO column stats + cardinality = parentNumRows / 2; - // if UDAFs are present, new columns needs to be added - if (!aggDesc.isEmpty() && stats != null) { - List aggColStats = Lists.newArrayList(); - for (ColumnInfo ci : rs.getSignature()) { - - // if the columns in row schema is not contained in column - // expression map, then those are the aggregate columns that - // are added GBY operator. we will estimate the column statistics - // for those newly added columns - if (!colExprMap.containsKey(ci.getInternalName())) { - String colName = ci.getInternalName(); - String colType = ci.getTypeName(); - ColStatistics cs = new ColStatistics(colName, colType); - cs.setCountDistint(stats.getNumRows()); - cs.setNumNulls(0); - cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType)); - aggColStats.add(cs); + if (LOG.isDebugEnabled()) { + LOG.debug("[Case 7] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } - // add the new aggregate column and recompute data size - if (aggColStats.size() > 0) { - stats.addToColumnStats(aggColStats); + updateStats(stats, cardinality, false, gop); + } + } - // only if the column stats is available, update the data size from - // the column stats - if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) { - updateStats(stats, stats.getNumRows(), true, gop); - } - } + // if UDAFs are present, new columns needs to be added + if (!aggDesc.isEmpty() && stats != null) { + List aggColStats = Lists.newArrayList(); + for (ColumnInfo ci : rs.getSignature()) { - // if UDAF present and if column expression map is empty then it must - // be full aggregation query like count(*) in which case number of - // rows will be 1 - if (colExprMap.isEmpty()) { - updateStats(stats, 1, true, gop); + // if the columns in row schema is not contained in column + // expression map, then those are the aggregate columns that + // are added GBY operator. we will estimate the column statistics + // for those newly added columns + if (!colExprMap.containsKey(ci.getInternalName())) { + String colName = ci.getInternalName(); + String colType = ci.getTypeName(); + ColStatistics cs = new ColStatistics(colName, colType); + cs.setCountDistint(stats.getNumRows()); + cs.setNumNulls(0); + cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType)); + aggColStats.add(cs); } } - gop.setStatistics(stats); + // add the new aggregate column and recompute data size + if (aggColStats.size() > 0) { + stats.addToColumnStats(aggColStats); + + // only if the column stats is available, update the data size from + // the column stats + if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) { + updateStats(stats, stats.getNumRows(), true, gop); + } + } - if (LOG.isDebugEnabled() && stats != null) { - LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString()); + // if UDAF present and if column expression map is empty then it must + // be full aggregation query like count(*) in which case number of + // rows will be 1 + if (colExprMap.isEmpty()) { + updateStats(stats, 1, true, gop); } - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + + gop.setStatistics(stats); + + if (LOG.isDebugEnabled() && stats != null) { + LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString()); } return null; } @@ -1470,11 +1453,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, for (int pos = 0; pos < parents.size(); pos++) { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); Statistics parentStats; - try { - parentStats = parent.getStatistics().clone(); - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); - } + parentStats = parent.getStatistics().clone(); keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf() .getOutputKeyColumnNames()); @@ -1581,12 +1560,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, pred = jop.getConf().getResidualFilterExprs().get(0); } // evaluate filter expression and update statistics - try { - newNumRows = evaluateExpression(stats, pred, - aspCtx, jop.getSchema().getColumnNames(), jop, stats.getNumRows()); - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); - } + newNumRows = evaluateExpression(stats, pred, + aspCtx, jop.getSchema().getColumnNames(), jop, stats.getNumRows()); // update statistics based on column statistics. // OR conditions keeps adding the stats independently, this may // result in number of rows getting more than the input rows in @@ -1677,12 +1652,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, pred = jop.getConf().getResidualFilterExprs().get(0); } // evaluate filter expression and update statistics - try { newNumRows = evaluateExpression(wcStats, pred, aspCtx, jop.getSchema().getColumnNames(), jop, wcStats.getNumRows()); - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); - } // update only the basic statistics in the absence of column statistics if (newNumRows <= joinRowCount) { updateStats(wcStats, newNumRows, false, jop); @@ -2242,42 +2213,37 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, LimitOperator lop = (LimitOperator) nd; Operator parent = lop.getParentOperators().get(0); Statistics parentStats = parent.getStatistics(); + long limit = -1; + limit = lop.getConf().getLimit(); - try { - long limit = -1; - limit = lop.getConf().getLimit(); + if (satisfyPrecondition(parentStats)) { + Statistics stats = parentStats.clone(); + List colStats = StatsUtils.getColStatisticsUpdatingTableAlias( + parentStats, lop.getSchema()); + stats.setColumnStats(colStats); - if (satisfyPrecondition(parentStats)) { - Statistics stats = parentStats.clone(); - List colStats = StatsUtils.getColStatisticsUpdatingTableAlias( - parentStats, lop.getSchema()); - stats.setColumnStats(colStats); - - // if limit is greater than available rows then do not update - // statistics - if (limit <= parentStats.getNumRows()) { - updateStats(stats, limit, true, lop); - } - lop.setStatistics(stats); + // if limit is greater than available rows then do not update + // statistics + if (limit <= parentStats.getNumRows()) { + updateStats(stats, limit, true, lop); + } + lop.setStatistics(stats); - if (LOG.isDebugEnabled()) { - LOG.debug("[0] STATS-" + lop.toString() + ": " + stats.extendedToString()); - } - } else { - if (parentStats != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("[0] STATS-" + lop.toString() + ": " + stats.extendedToString()); + } + } else { + if (parentStats != null) { - // in the absence of column statistics, compute data size based on - // based on average row size - limit = StatsUtils.getMaxIfOverflow(limit); - Statistics wcStats = parentStats.scaleToRowCount(limit); - lop.setStatistics(wcStats); - if (LOG.isDebugEnabled()) { - LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString()); - } + // in the absence of column statistics, compute data size based on + // based on average row size + limit = StatsUtils.getMaxIfOverflow(limit); + Statistics wcStats = parentStats.scaleToRowCount(limit); + lop.setStatistics(wcStats); + if (LOG.isDebugEnabled()) { + LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString()); } } - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } return null; } @@ -2302,48 +2268,43 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (parentStats != null) { AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); - List outKeyColNames = rop.getConf().getOutputKeyColumnNames(); List outValueColNames = rop.getConf().getOutputValueColumnNames(); Map colExprMap = rop.getColumnExprMap(); - try { - Statistics outStats = parentStats.clone(); - if (satisfyPrecondition(parentStats)) { - List colStats = Lists.newArrayList(); - for (String key : outKeyColNames) { - String prefixedKey = Utilities.ReduceField.KEY.toString() + "." + key; - ExprNodeDesc end = colExprMap.get(prefixedKey); - if (end != null) { - ColStatistics cs = StatsUtils - .getColStatisticsFromExpression(conf, parentStats, end); - if (cs != null) { - cs.setColumnName(prefixedKey); - colStats.add(cs); - } + Statistics outStats = parentStats.clone(); + if (satisfyPrecondition(parentStats)) { + List colStats = Lists.newArrayList(); + for (String key : outKeyColNames) { + String prefixedKey = Utilities.ReduceField.KEY.toString() + "." + key; + ExprNodeDesc end = colExprMap.get(prefixedKey); + if (end != null) { + ColStatistics cs = StatsUtils + .getColStatisticsFromExpression(conf, parentStats, end); + if (cs != null) { + cs.setColumnName(prefixedKey); + colStats.add(cs); } } + } - for (String val : outValueColNames) { - String prefixedVal = Utilities.ReduceField.VALUE.toString() + "." + val; - ExprNodeDesc end = colExprMap.get(prefixedVal); - if (end != null) { - ColStatistics cs = StatsUtils - .getColStatisticsFromExpression(conf, parentStats, end); - if (cs != null) { - cs.setColumnName(prefixedVal); - colStats.add(cs); - } + for (String val : outValueColNames) { + String prefixedVal = Utilities.ReduceField.VALUE.toString() + "." + val; + ExprNodeDesc end = colExprMap.get(prefixedVal); + if (end != null) { + ColStatistics cs = StatsUtils + .getColStatisticsFromExpression(conf, parentStats, end); + if (cs != null) { + cs.setColumnName(prefixedVal); + colStats.add(cs); } } - - outStats.setColumnStats(colStats); - } - rop.setStatistics(outStats); - if (LOG.isDebugEnabled()) { - LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString()); } - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + + outStats.setColumnStats(colStats); + } + rop.setStatistics(outStats); + if (LOG.isDebugEnabled()) { + LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString()); } } return null; @@ -2376,11 +2337,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Statistics parentStats = parent.getStatistics(); if (stats == null) { - try { - stats = parentStats.clone(); - } catch (CloneNotSupportedException e) { - throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); - } + stats = parentStats.clone(); } else { stats.addBasicStats(parentStats); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index 1aafa9e2a0..aa0559d9e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -140,7 +140,7 @@ public String toString() { } @Override - public ColStatistics clone() throws CloneNotSupportedException { + public ColStatistics clone() { ColStatistics clone = new ColStatistics(colName, colType); clone.setAvgColLen(avgColLen); clone.setCountDistint(countDistint); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 82df9606e7..013fccc337 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -167,7 +167,7 @@ public String extendedToString() { } @Override - public Statistics clone() throws CloneNotSupportedException { + public Statistics clone() { Statistics clone = new Statistics(numRows, dataSize); clone.setRunTimeNumRows(runTimeNumRows); clone.setBasicStatsState(basicStatsState); @@ -302,12 +302,7 @@ public void setRunTimeNumRows(long runTimeNumRows) { public Statistics scaleToRowCount(long newRowCount) { Statistics ret; - try { - ret = clone(); - } catch (CloneNotSupportedException e) { - // FIXME: remove the Colneable usage - return new Statistics(0,0); - } + ret = clone(); if(numRows == 0 || newRowCount >= numRows) { return ret; } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 20c2f94dbd..e42614c26b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1556,11 +1556,7 @@ public static long getWritableSize(ObjectInspector oi, Object value) { for (ColStatistics parentColStat : parentStats.getColumnStats()) { ColStatistics colStat; - try { - colStat = parentColStat.clone(); - } catch (CloneNotSupportedException e) { - colStat = null; - } + colStat = parentColStat.clone(); if (colStat != null) { cs.add(colStat); } @@ -1604,11 +1600,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName); if (colStats != null) { /* If statistics for the column already exist use it. */ - try { return colStats.clone(); - } catch (CloneNotSupportedException e) { - return null; - } } // virtual columns @@ -1619,11 +1611,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis // clone the column stats and return ColStatistics result = parentStats.getColumnStatisticsFromColName(colName); if (result != null) { - try { return result.clone(); - } catch (CloneNotSupportedException e) { - return null; - } } return null; } @@ -1651,12 +1639,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0)); if (stats != null) { ColStatistics newStats; - try { - newStats = stats.clone(); - } catch (CloneNotSupportedException e) { - LOG.warn("error cloning stats, this should not happen"); - return null; - } + newStats = stats.clone(); newStats.setColumnName(colName); colType = colType.toLowerCase(); newStats.setColumnType(colType);