diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index be7ed32..9df0fb9 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -399,7 +399,7 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String msdb.openTransaction(); oldPart = msdb.getPartition(dbname, name, new_part.getValues()); if (MetaStoreUtils.requireCalStats(hiveConf, oldPart, new_part, tbl, environmentContext)) { - MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true, environmentContext); + MetaStoreUtils.updatePartitionStatsFast(oldPart, new_part, wh, false, true, environmentContext); } updatePartColumnStats(msdb, dbname, name, new_part.getValues(), new_part); @@ -505,7 +505,7 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String new_part.getSd().setLocation(newPartLoc); if (MetaStoreUtils.requireCalStats(hiveConf, oldPart, new_part, tbl, environmentContext)) { - MetaStoreUtils.updatePartitionStatsFast(new_part, wh, false, true, environmentContext); + MetaStoreUtils.updatePartitionStatsFast(oldPart, new_part, wh, false, true, environmentContext); } String oldPartName = Warehouse.makePartName(tbl.getPartitionKeys(), oldPart.getValues()); @@ -627,7 +627,7 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String partValsList.add(tmpPart.getValues()); if (MetaStoreUtils.requireCalStats(hiveConf, oldTmpPart, tmpPart, tbl, environmentContext)) { - MetaStoreUtils.updatePartitionStatsFast(tmpPart, wh, false, true, environmentContext); + MetaStoreUtils.updatePartitionStatsFast(oldTmpPart, tmpPart, wh, false, true, environmentContext); } updatePartColumnStats(msdb, dbname, name, oldTmpPart.getValues(), tmpPart); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 48bebb2..29130dc 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -2851,7 +2851,7 @@ private void initializeAddedPartition( final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException { if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVESTATSAUTOGATHER) && !MetaStoreUtils.isView(tbl)) { - MetaStoreUtils.updatePartitionStatsFast(part, wh, madeDir, false, null); + MetaStoreUtils.updatePartitionStatsFast(part.getCurrent(), wh, madeDir, null); } // set create time diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 9a4c61a..ebc0816 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -301,14 +301,9 @@ public static boolean requireCalStats(Configuration hiveConf, Partition oldPart, // requires to calculate stats if new and old have different fast stats if ((oldPart != null) && (oldPart.getParameters() != null)) { - for (String stat : StatsSetupConst.fastStats) { - if (oldPart.getParameters().containsKey(stat)) { - Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); - Long newStat = Long.parseLong(newPart.getParameters().get(stat)); - if (!oldStat.equals(newStat)) { - return true; - } - } + if (!isFastStatsSame(oldPart, newPart)) { + // Need to update stats + return true; } } return false; @@ -316,33 +311,37 @@ public static boolean requireCalStats(Configuration hiveConf, Partition oldPart, public static boolean updatePartitionStatsFast(Partition part, Warehouse wh, EnvironmentContext environmentContext) throws MetaException { - return updatePartitionStatsFast(part, wh, false, false, environmentContext); + return updatePartitionStatsFast(part, wh, false, environmentContext); } public static boolean updatePartitionStatsFast(Partition part, Warehouse wh, boolean madeDir, EnvironmentContext environmentContext) throws MetaException { - return updatePartitionStatsFast(part, wh, madeDir, false, environmentContext); + return updatePartitionStatsFast(null, part, wh, madeDir, false, environmentContext); } - /** - * Updates the numFiles and totalSize parameters for the passed Partition by querying - * the warehouse if the passed Partition does not already have values for these parameters. - * @param part - * @param wh - * @param madeDir if true, the directory was just created and can be assumed to be empty - * @param forceRecompute Recompute stats even if the passed Partition already has - * these parameters set - * @return true if the stats were updated, false otherwise - */ - public static boolean updatePartitionStatsFast(Partition part, Warehouse wh, - boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { - return updatePartitionStatsFast(new PartitionSpecProxy.SimplePartitionWrapperIterator(part), - wh, madeDir, forceRecompute, environmentContext); + public static boolean isFastStatsSame(Partition oldPart, Partition newPart) { + boolean result = true; + if ((oldPart != null) && (oldPart.getParameters() != null)) { + for (String stat : StatsSetupConst.fastStats) { + if (oldPart.getParameters().containsKey(stat)) { + Long oldStat = Long.parseLong(oldPart.getParameters().get(stat)); + Long newStat = Long.parseLong(newPart.getParameters().get(stat)); + if (!oldStat.equals(newStat)) { + result = false; + break; + } + } else { + return false; + } + } + } + return result; } /** * Updates the numFiles and totalSize parameters for the passed Partition by querying * the warehouse if the passed Partition does not already have values for these parameters. + * @param oldPart * @param part * @param wh * @param madeDir if true, the directory was just created and can be assumed to be empty @@ -350,7 +349,7 @@ public static boolean updatePartitionStatsFast(Partition part, Warehouse wh, * these parameters set * @return true if the stats were updated, false otherwise */ - public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionIterator part, Warehouse wh, + public static boolean updatePartitionStatsFast(Partition oldPart, Partition part, Warehouse wh, boolean madeDir, boolean forceRecompute, EnvironmentContext environmentContext) throws MetaException { Map params = part.getParameters(); boolean updated = false; @@ -359,29 +358,38 @@ public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionItera !containsAllFastStats(params)) { if (params == null) { params = new HashMap(); + part.setParameters(params); } if (!madeDir) { // The partition location already existed and may contain data. Lets try to // populate those statistics that don't require a full scan of the data. LOG.warn("Updating partition stats fast for: " + part.getTableName()); - FileStatus[] fileStatus = wh.getFileStatusesForLocation(part.getLocation()); - populateQuickStats(fileStatus, params); - LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); - if (environmentContext != null - && environmentContext.isSetProperties() - && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( - StatsSetupConst.STATS_GENERATED))) { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); - } else { - StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); + FileStatus[] fileStatus = wh.getFileStatusesForLocation(part.getSd().getLocation()); + if (!isFastStatsSame(oldPart, part)) { + populateQuickStats(fileStatus, params); } + LOG.warn("Updated size to " + params.get(StatsSetupConst.TOTAL_SIZE)); + updateStatsState(environmentContext, params); } - part.setParameters(params); updated = true; } return updated; } + static void updateStatsState(EnvironmentContext environmentContext, Map params) { + if (params == null) { + return; + } + if (environmentContext != null + && environmentContext.isSetProperties() + && StatsSetupConst.TASK.equals(environmentContext.getProperties().get( + StatsSetupConst.STATS_GENERATED))) { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.TRUE); + } else { + StatsSetupConst.setBasicStatsState(params, StatsSetupConst.FALSE); + } + } + /** * getDeserializer *