diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java index f3bfcfa..d128869 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java @@ -38,6 +38,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; @@ -176,8 +177,15 @@ public static HCatSchema extractSchema(Partition partition) throws HCatException } public static Table getTable(IMetaStoreClient client, String dbName, String tableName) - throws NoSuchObjectException, TException, MetaException { - return new Table(client.getTable(dbName, tableName)); + throws NoSuchObjectException, TException, MetaException { + // HCat tables should not have stats set to accurate + org.apache.hadoop.hive.metastore.api.Table table = client.getTable(dbName, tableName); + if (table.getParameters() != null + && table.getParameters().containsKey(StatsSetupConst.COLUMN_STATS_ACCURATE)) { + table.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); + client.alter_table(dbName, tableName, table); + } + return new Table(table); } public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java index 9db3dc1..498fccd 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; @@ -443,7 +444,11 @@ private String getFinalDynamicPartitionDestination(Table table, Map entry : storer.getProperties().entrySet()) { - params.put(entry.getKey().toString(), entry.getValue().toString()); + // Although we already unset COLUMN_STATS_ACCURATE in table level, we + // double check it here for partitions. + if (!entry.getKey().toString().equals(StatsSetupConst.COLUMN_STATS_ACCURATE)) { + params.put(entry.getKey().toString(), entry.getValue().toString()); + } } return params; } diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java index 823e9a9..506d3f4 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java @@ -84,6 +84,7 @@ protected void setUpHiveConf() { hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); hiveConf.setVar(HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict"); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true); hiveConf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java index f437079..deee3a0 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; @@ -55,7 +56,6 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; - import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatUtil; import org.apache.hive.hcatalog.data.DefaultHCatRecord; @@ -71,7 +71,6 @@ import org.junit.BeforeClass; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -217,6 +216,7 @@ public void createTable() throws Exception { if (isTableImmutable()){ tableParams.put(hive_metastoreConstants.IS_IMMUTABLE,"true"); } + StatsSetupConst.setBasicStatsState(tableParams, StatsSetupConst.TRUE); tbl.setParameters(tableParams); client.createTable(tbl); diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java index 0d87c6c..9573098 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java @@ -185,6 +185,26 @@ protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask, res = new ArrayList(); driver.getResults(res); assertEquals(NUM_RECORDS, res.size()); + + query = "select count(*) from " + tableName; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + res = new ArrayList(); + driver.getResults(res); + assertEquals(1, res.size()); + assertEquals("20", res.get(0)); + + query = "select count(*) from " + tableName + " where p1=1"; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + res = new ArrayList(); + driver.getResults(res); + assertEquals(1, res.size()); + assertEquals("4", res.get(0)); } //TODO 1.0 miniCluster is slow this test times out, make it work diff --git a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java index 174a92f..a73516c 100644 --- a/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java +++ b/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java @@ -141,10 +141,27 @@ private void hiveReadTest() throws Exception { ArrayList res = new ArrayList(); driver.getResults(res); - if (isTableImmutable()){ + if (isTableImmutable()) { assertEquals(10, res.size()); - }else { + } else { assertEquals(30, res.size()); } + + query = "select count(*) from " + tableName; + retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + + res = new ArrayList(); + driver.getResults(res); + if (isTableImmutable()) { + assertEquals(1, res.size()); + assertEquals("10", res.get(0)); + } else { + assertEquals(1, res.size()); + assertEquals("30", res.get(0)); + } } }