diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index b19c1aa..2d9a4aa 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -602,6 +602,13 @@ public void testIncrementalAdds() throws IOException { run("LOAD DATA LOCAL INPATH '" + ptn_locn_2 + "' OVERWRITE INTO TABLE " + dbName + ".ptned_late PARTITION(b=2)", driver); verifySetup("SELECT a from " + dbName + ".ptned_late WHERE b=2", ptn_data_2, driver); + // Do an analyze here, so Hive will use StatsOptimizer to get count(*) directly from stats + // This is added here to verify HIVE-17421 is fixed. Once the table get replicated, the stats is + // incorrect, we shall not use StatsOptimizer on replicated database + run("ANALYZE TABLE " + dbName + ".ptned_late partition(b) COMPUTE STATISTICS FOR COLUMNS", driver); + run("ANALYZE TABLE " + dbName + ".ptned_late partition(b) COMPUTE STATISTICS", driver); + verifySetup("SELECT count(*) from " + dbName + ".ptned_late", new String[]{"6"}, driver); + // Perform REPL-DUMP/LOAD advanceDumpDir(); run("REPL DUMP " + dbName + " FROM " + replDumpId, driver); @@ -631,6 +638,8 @@ public void testIncrementalAdds() throws IOException { verifyRun("SELECT a from " + dbName + "_dupe.ptned_late WHERE b=1", ptn_data_1, driverMirror); verifyRun("SELECT a from " + dbName + "_dupe.ptned_late WHERE b=2", ptn_data_2, driverMirror); + + verifyRun("SELECT count(*) from " + dbName + "_dupe.ptned_late", new String[]{"6"}, driverMirror); } @Test diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 606a414..fb5c90c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; @@ -222,6 +223,7 @@ public static boolean prepareImport( if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ tblDesc.setReplicationSpec(replicationSpec); + tblDesc.getTblProps().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); } if (isExternalSet){ @@ -245,6 +247,9 @@ public static boolean prepareImport( for (Partition partition : partitions) { // TODO: this should ideally not create AddPartitionDesc per partition AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition); + if ((replicationSpec != null) && replicationSpec.isInReplicationScope()){ + partsDesc.getPartition(0).getPartParams().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); + } partitionDescs.add(partsDesc); }