diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java index d3be11a30a..0163895742 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java @@ -273,8 +273,9 @@ void gatherStats() throws IOException { sb.append(" partition("); Map partitionColumnValues = Warehouse.makeEscSpecFromName(ci.partName); for(Map.Entry ent : partitionColumnValues.entrySet()) { - sb.append(ent.getKey()).append("='").append(ent.getValue()).append("'"); + sb.append(ent.getKey()).append("='").append(ent.getValue()).append("',"); } + sb.setLength(sb.length() - 1);//remove trailing , sb.append(")"); } catch(MetaException ex) { diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 2e73e48a0b..17d976a58e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -69,10 +69,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** - * TODO: this should be merged with TestTxnCommands once that is checked in - * specifically the tests; the supporting code here is just a clone of TestTxnCommands - */ public class TestTxnCommands2 { static final private Logger LOG = LoggerFactory.getLogger(TestTxnCommands2.class); protected static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index f0d9ff2235..7f5e091f6b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -18,12 +18,15 @@ package org.apache.hadoop.hive.ql; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.ShowCompactRequest; import org.apache.hadoop.hive.metastore.api.ShowCompactResponse; import org.apache.hadoop.hive.metastore.txn.TxnStore; import org.apache.hadoop.hive.metastore.txn.TxnUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.BucketCodec; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.junit.Assert; import org.junit.Before; @@ -34,8 +37,10 @@ import org.slf4j.LoggerFactory; import java.io.File; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; public class TestTxnNoBuckets extends TxnCommandsBaseForTests { @@ -640,5 +645,76 @@ private void assertVectorized(boolean vectorized, String query) throws Exception } Assert.assertTrue("Din't find expected 'vectorized' in plan", !vectorized); } + /** + * HIVE-17900 + */ + @Test + public void testCompactStatsGather() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict"); + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("create table T(a int, b int) partitioned by (p int, q int) " + + "stored as orc TBLPROPERTIES ('transactional'='true')"); + + int[][] targetVals = {{4, 1, 1}, {4, 2, 2}, {4, 3, 1}, {4, 4, 2}}; + //we only recompute stats after major compact if they existed before + runStatementOnDriver("insert into T partition(p=1,q) " + makeValuesClause(targetVals)); + runStatementOnDriver("analyze table T partition(p=1) compute statistics for columns"); + + IMetaStoreClient hms = Hive.get().getMSC(); + List partNames = new ArrayList<>(); + partNames.add("p=1/q=2"); + List colNames = new ArrayList<>(); + colNames.add("a"); + Map> map = hms.getPartitionColumnStatistics("default", + "T", partNames, colNames); + Assert.assertEquals(4, map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue()); + + + int[][] targetVals2 = {{5, 1, 1}, {5, 2, 2}, {5, 3, 1}, {5, 4, 2}}; + runStatementOnDriver("insert into T partition(p=1,q) " + makeValuesClause(targetVals2)); + + String query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b"; + List rs = runStatementOnDriver(query); + String[][] expected = { + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/delta_0000017_0000017_0000/bucket_00000"} + }; + checkExpected(rs, expected, "insert data"); + + //run major compaction + runStatementOnDriver("alter table T partition(p=1,q=2) compact 'major'"); + TestTxnCommands2.runWorker(hiveConf); + + query = "select ROW__ID, p, q, a, b, INPUT__FILE__NAME from T order by p, q, a, b"; + rs = runStatementOnDriver(query); + String[][] expected2 = { + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t4\t1", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t4\t3", "t/p=1/q=1/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t1\t5\t1", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t1\t5\t3", "t/p=1/q=1/delta_0000017_0000017_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t4\t2", "t/p=1/q=2/base_0000017/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t4\t4", "t/p=1/q=2/base_0000017/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t1\t2\t5\t2", "t/p=1/q=2/base_0000017/bucket_00000"}, + {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":1}\t1\t2\t5\t4", "t/p=1/q=2/base_0000017/bucket_00000"} + }; + checkExpected(rs, expected2, "after major compaction"); + + //check status of compaction job + TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf); + ShowCompactResponse resp = txnHandler.showCompact(new ShowCompactRequest()); + Assert.assertEquals("Unexpected number of compactions in history", 1, resp.getCompactsSize()); + Assert.assertEquals("Unexpected 0 compaction state", TxnStore.CLEANING_RESPONSE, resp.getCompacts().get(0).getState()); + Assert.assertTrue(resp.getCompacts().get(0).getHadoopJobId().startsWith("job_local")); + + //now check that stats were updated + map = hms.getPartitionColumnStatistics("default","T", partNames, colNames); + Assert.assertEquals("", 5, map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue()); + } }