diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java index 3255031..be025fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java @@ -80,9 +80,17 @@ public boolean accept(Path file) { @Override public String aggregateStats(String partID, String statType) { long counter = 0; + LOG.debug("Part ID: " + partID + "\t" + statType); for (Map> statsMap : statsList) { - String statVal = statsMap.get(partID).get(statType); - counter += Long.valueOf(statVal == null ? "0" : statVal); + Map partStat = statsMap.get(partID); + if (null == partStat) { // not all partitions are scanned in all mappers, so this could be null. + continue; + } + String statVal = partStat.get(statType); + if (null == statVal) { // partition was found, but was empty. + continue; + } + counter += Long.valueOf(statVal); } LOG.info("Read stats for : " + partID + "\t" + statType + "\t" + counter); diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java index d898a26..bd6dc62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java @@ -69,7 +69,9 @@ public boolean connect(Configuration hconf) { @Override public boolean publishStat(String partKV, Map stats) { - statsMap.put(partKV, stats); + LOG.debug("Putting in map : " + partKV + "\t" + stats); + // we need to do new hashmap, since stats object is reused across calls. + statsMap.put(partKV, new HashMap(stats)); return true; } @@ -81,6 +83,7 @@ public boolean closeConnection() { LOG.debug("About to create stats file for this task : " + statsFile); Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true)); LOG.info("Created file : " + statsFile); + LOG.info("Writing stats in it : " + statsMap); Utilities.runtimeSerializationKryo.get().writeObject(output, statsMap); output.close(); return true; diff --git ql/src/test/queries/clientpositive/metadata_only_queries.q ql/src/test/queries/clientpositive/metadata_only_queries.q index 9cec4da..b549a56 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries.q +++ ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -1,3 +1,4 @@ +set hive.stats.dbclass=fs; set hive.compute.query.using.stats=true; set hive.stats.autogather=true; create table over10k( @@ -73,3 +74,4 @@ drop table stats_tbl; drop table stats_tbl_part; set hive.compute.query.using.stats=false; +set hive.stats.dbclass=jdbc:derby; diff --git ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q index 998f6a1..09f4bff 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q +++ ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q @@ -1,3 +1,4 @@ +set hive.stats.dbclass=fs; set hive.compute.query.using.stats=true; create table over10k( t tinyint, @@ -47,3 +48,4 @@ select count(*), count(1), sum(1), sum(2), count(s), count(bo), count(bin), coun drop table stats_tbl_part; set hive.compute.query.using.stats=false; +set hive.stats.dbclass=jdbc:derby; diff --git ql/src/test/queries/clientpositive/stats_only_null.q ql/src/test/queries/clientpositive/stats_only_null.q index 38524b4..b47bc48 100644 --- ql/src/test/queries/clientpositive/stats_only_null.q +++ ql/src/test/queries/clientpositive/stats_only_null.q @@ -1,3 +1,4 @@ +set hive.stats.dbclass=fs; set hive.compute.query.using.stats=true; set hive.stats.autogather=true; CREATE TABLE temps_null(a double, b int, c STRING, d smallint) STORED AS TEXTFILE; @@ -37,3 +38,4 @@ drop table stats_null; drop table stats_null_part; drop table temps_null; set hive.compute.query.using.stats=false; +set hive.stats.dbclass=jdbc:derby;