diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index 142af10..d1463bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -278,10 +279,19 @@ else if (work.isClearAggregatorStats()) { // Statistics newPartStats = new Statistics(); - // In that case of a partition, the key for stats temporary store is - // "rootDir/[dynamic_partition_specs/]%" - String partitionID = Utilities.getHashedStatsPrefix( - work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength); + String partitionID; + + if(HiveConf.getVar(conf, ConfVars.HIVESTATSDBCLASS).equals("counter")) { + partitionID = work.getAggKey() + Warehouse.makePartPath(partn.getSpec()); + // there is no need to aggregate stats in this case, but this should also work. + // also check non-partitioned code path. + } else { + // In that case of a partition, the key for stats temporary store is + // "rootDir/[dynamic_partition_specs/]%" + partitionID = Utilities.getHashedStatsPrefix( + work.getAggKey() + Warehouse.makePartPath(partn.getSpec()), maxPrefixLength); + } + LOG.info("Stats aggregator : " + partitionID); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 8d895f4..66d96a1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -28,6 +28,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -296,11 +298,20 @@ private void publishStats() throws HiveException { conf.getStatsAggPrefix(), conf.getMaxStatsKeyPrefixLength()); key = keyPrefix + taskID; } else { - // In case of a partition, the key for temp storage is - // "tableName + partitionSpecs + taskID" - String keyPrefix = Utilities.getHashedStatsPrefix( - conf.getStatsAggPrefix() + pspecs, conf.getMaxStatsKeyPrefixLength()); - key = keyPrefix + taskID; + + if (HiveConf.getVar(hconf, ConfVars.HIVESTATSDBCLASS).equals("counter")) { + // Can we do something within CounterStatsPublisher to avoid this special logic + // in TSOp. + // also, do we need same changes in FileSinkOp? + // Also, check non-partitioned tables case. + key = conf.getStatsAggPrefix() + pspecs; + } else { + // In case of a partition, the key for temp storage is + // "tableName + partitionSpecs + taskID" + String keyPrefix = Utilities.getHashedStatsPrefix( + conf.getStatsAggPrefix() + pspecs, conf.getMaxStatsKeyPrefixLength()); + key = keyPrefix + taskID; + } } for(String statType : stats.get(pspecs).getStoredStats()) { statsToPublish.put(statType, Long.toString(stats.get(pspecs).getStat(statType)));