diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 14235b5719..0904aadc7e 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1888,7 +1888,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal " of the lock manager is dumped to log file. This is for debugging. See also " + "hive.lock.numretries and hive.lock.sleep.between.retries."), - HIVE_TXN_OPERATIONAL_PROPERTIES("hive.txn.operational.properties", 0, + HIVE_TXN_OPERATIONAL_PROPERTIES("hive.txn.operational.properties", 1, "Sets the operational properties that control the appropriate behavior for various\n" + "versions of the Hive ACID subsystem. Setting it to zero will turn on the legacy mode\n" + "for ACID, while setting it to one will enable a split-update feature found in the newer\n" diff --git metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 0f08f434e7..023d703543 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -39,7 +39,6 @@ // These constants are also imported by org.apache.hadoop.hive.ql.io.AcidUtils. public static final String DEFAULT_TRANSACTIONAL_PROPERTY = "default"; - public static final String LEGACY_TRANSACTIONAL_PROPERTY = "legacy"; TransactionalValidationListener(Configuration conf) { super(conf); @@ -276,7 +275,6 @@ private String validateTransactionalProperties(String transactionalProperties) { boolean isValid = false; switch (transactionalProperties) { case DEFAULT_TRANSACTIONAL_PROPERTY: - case LEGACY_TRANSACTIONAL_PROPERTY: isValid = true; break; default: diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 1e33424e24..0371dd3f49 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -167,7 +167,7 @@ public static String deltaSubdir(long min, long max, int statementId) { * This is format of delete delta dir name prior to Hive 2.2.x */ @VisibleForTesting - static String deleteDeltaSubdir(long min, long max) { + public static String deleteDeltaSubdir(long min, long max) { return DELETE_DELTA_PREFIX + String.format(DELTA_DIGITS, min) + "_" + String.format(DELTA_DIGITS, max); } @@ -371,21 +371,10 @@ public HdfsFileStatusWithId getHdfsFileStatusWithId() { public static final int HASH_BASED_MERGE_BIT = 0x02; public static final String HASH_BASED_MERGE_STRING = "hash_merge"; public static final String DEFAULT_VALUE_STRING = TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY; - public static final String LEGACY_VALUE_STRING = TransactionalValidationListener.LEGACY_TRANSACTIONAL_PROPERTY; private AcidOperationalProperties() { } - /** - * Returns an acidOperationalProperties object that represents ACID behavior for legacy tables - * that were created before ACID type system using operational properties was put in place. - * @return the acidOperationalProperties object - */ - public static AcidOperationalProperties getLegacy() { - AcidOperationalProperties obj = new AcidOperationalProperties(); - // In legacy mode, none of these properties are turned on. - return obj; - } /** * Returns an acidOperationalProperties object that represents default ACID behavior for tables @@ -406,14 +395,11 @@ public static AcidOperationalProperties getDefault() { */ public static AcidOperationalProperties parseString(String propertiesStr) { if (propertiesStr == null) { - return AcidOperationalProperties.getLegacy(); + return AcidOperationalProperties.getDefault(); } if (propertiesStr.equalsIgnoreCase(DEFAULT_VALUE_STRING)) { return AcidOperationalProperties.getDefault(); } - if (propertiesStr.equalsIgnoreCase(LEGACY_VALUE_STRING)) { - return AcidOperationalProperties.getLegacy(); - } AcidOperationalProperties obj = new AcidOperationalProperties(); String[] options = propertiesStr.split("\\|"); for (String option : options) { @@ -1172,7 +1158,7 @@ public static AcidOperationalProperties getAcidOperationalProperties(Table table hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); if (transactionalProperties == null) { // If the table does not define any transactional properties, we return a legacy type. - return AcidOperationalProperties.getLegacy(); + return AcidOperationalProperties.getDefault(); } return AcidOperationalProperties.parseString(transactionalProperties); } @@ -1184,7 +1170,7 @@ public static AcidOperationalProperties getAcidOperationalProperties(Table table */ public static AcidOperationalProperties getAcidOperationalProperties(Configuration conf) { // If the conf does not define any transactional properties, the parseInt() should receive - // a value of zero, which will set AcidOperationalProperties to a legacy type and return that. + // a value of 1, which will set AcidOperationalProperties to a default type and return that. return AcidOperationalProperties.parseInt( HiveConf.getIntVar(conf, ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES)); } @@ -1197,8 +1183,8 @@ public static AcidOperationalProperties getAcidOperationalProperties(Configurati public static AcidOperationalProperties getAcidOperationalProperties(Properties props) { String resultStr = props.getProperty(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); if (resultStr == null) { - // If the properties does not define any transactional properties, we return a legacy type. - return AcidOperationalProperties.getLegacy(); + // If the properties does not define any transactional properties, we return a default type. + return AcidOperationalProperties.getDefault(); } return AcidOperationalProperties.parseString(resultStr); } @@ -1212,8 +1198,8 @@ public static AcidOperationalProperties getAcidOperationalProperties( Map parameters) { String resultStr = parameters.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); if (resultStr == null) { - // If the parameters does not define any transactional properties, we return a legacy type. - return AcidOperationalProperties.getLegacy(); + // If the parameters does not define any transactional properties, we return a default type. + return AcidOperationalProperties.getDefault(); } return AcidOperationalProperties.parseString(resultStr); } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index c50c1a8726..dc6b70a110 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -867,7 +867,7 @@ public void testNonAcidToAcidConversion01() throws Exception { Assert.assertTrue(rs.get(2), rs.get(2).startsWith("{\"transactionid\":0,\"bucketid\":1,\"rowid\":1}\t1\t5")); Assert.assertTrue(rs.get(2), rs.get(2).endsWith("nonacidorctbl/000001_0_copy_1")); Assert.assertTrue(rs.get(3), rs.get(3).startsWith("{\"transactionid\":14,\"bucketid\":536936448,\"rowid\":0}\t1\t17")); - Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/000001_0_copy_1")); + Assert.assertTrue(rs.get(3), rs.get(3).endsWith("nonacidorctbl/delta_0000014_0000014_0000/bucket_00001")); //run Compaction runStatementOnDriver("alter table "+ TestTxnCommands2.Table.NONACIDORCTBL +" compact 'major'"); TestTxnCommands2.runWorker(hiveConf); diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 408c08921b..e80a4f9d94 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -64,6 +64,7 @@ import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -113,6 +114,8 @@ String getPartitionColumns() { this.partitionColumns = partitionColumns; } } + @Rule + public ExpectedException expectedException = ExpectedException.none(); @Before public void setUp() throws Exception { @@ -366,7 +369,7 @@ public void testNonAcidToAcidConversion02() throws Exception { {"{\"transactionid\":0,\"bucketid\":0,\"rowid\":0}\t0\t13", "bucket_00000"}, {"{\"transactionid\":18,\"bucketid\":536870912,\"rowid\":0}\t0\t15", "bucket_00000"}, {"{\"transactionid\":20,\"bucketid\":536870912,\"rowid\":0}\t0\t17", "bucket_00000"}, - {"{\"transactionid\":0,\"bucketid\":0,\"rowid\":1}\t0\t120", "bucket_00000"}, + {"{\"transactionid\":19,\"bucketid\":536870912,\"rowid\":0}\t0\t120", "bucket_00000"}, {"{\"transactionid\":0,\"bucketid\":1,\"rowid\":1}\t1\t2", "bucket_00001"}, {"{\"transactionid\":0,\"bucketid\":1,\"rowid\":3}\t1\t4", "bucket_00001"}, {"{\"transactionid\":0,\"bucketid\":1,\"rowid\":2}\t1\t5", "bucket_00001"}, @@ -392,11 +395,23 @@ public void testNonAcidToAcidConversion02() throws Exception { } //make sure they are the same before and after compaction } - /** - * Test the query correctness and directory layout after ACID table conversion and MAJOR compaction + * In current implementation of ACID, altering the value of transactional_properties or trying to + * set a value for previously unset value for an acid table will throw an exception. + * @throws Exception + */ + @Test + public void testFailureOnAlteringTransactionalProperties() throws Exception { + expectedException.expect(RuntimeException.class); + expectedException.expectMessage("TBLPROPERTIES with 'transactional_properties' cannot be altered after the table is created"); + runStatementOnDriver("create table acidTblLegacy (a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("alter table acidTblLegacy SET TBLPROPERTIES ('transactional_properties' = 'default')"); + } + /** + * Test the query correctness and directory layout for ACID table conversion with split-update + * enabled. * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table + * 2. Convert Non-ACID to ACID table with split-update enabled * 3. Insert a row to ACID table * 4. Perform Major compaction * 5. Clean @@ -410,7 +425,7 @@ public void testNonAcidToAcidConversion1() throws Exception { // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -424,9 +439,10 @@ public void testNonAcidToAcidConversion1() throws Exception { Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 2. Convert NONACIDORCTBL to ACID table - runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("alter table " + Table.NONACIDORCTBL + + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -442,24 +458,23 @@ public void testNonAcidToAcidConversion1() throws Exception { // 3. Insert another row to newly-converted ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. - // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) + // The delta directory should also have only 1 bucket file (bucket_00001) Assert.assertEquals(3, status.length); boolean sawNewDelta = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(1, buckets.length); // only one bucket file + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); } else { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } } Assert.assertTrue(sawNewDelta); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); + rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL + " order by a,b"); resultData = new int[][] {{1, 2}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); @@ -472,16 +487,15 @@ public void testNonAcidToAcidConversion1() throws Exception { // There should be 1 new directory: base_xxxxxxx. // Original bucket files and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(4, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { sawNewBase = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(1, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); } } Assert.assertTrue(sawNewBase); @@ -495,13 +509,13 @@ public void testNonAcidToAcidConversion1() throws Exception { // 5. Let Cleaner delete obsolete files/dirs // Note, here we create a fake directory along with fake files as original directories/files String fakeFile0 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_0"; + "/subdir/000000_0"; String fakeFile1 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_1"; + "/subdir/000000_1"; fs.create(new Path(fakeFile0)); fs.create(new Path(fakeFile1)); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Before Cleaner, there should be 5 items: // 2 original files, 1 original directory, 1 base directory and 1 delta directory Assert.assertEquals(5, status.length); @@ -509,13 +523,12 @@ public void testNonAcidToAcidConversion1() throws Exception { // There should be only 1 directory left: base_xxxxxxx. // Original bucket files and delta directory should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(1, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -525,9 +538,10 @@ public void testNonAcidToAcidConversion1() throws Exception { } /** - * Test the query correctness and directory layout after ACID table conversion and MAJOR compaction + * Test the query correctness and directory layout for ACID table conversion with split-update + * enabled. * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table + * 2. Convert Non-ACID to ACID table with split update enabled. * 3. Update the existing row in ACID table * 4. Perform Major compaction * 5. Clean @@ -541,7 +555,7 @@ public void testNonAcidToAcidConversion2() throws Exception { // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -550,14 +564,15 @@ public void testNonAcidToAcidConversion2() throws Exception { List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); int [][] resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); int resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 2. Convert NONACIDORCTBL to ACID table - runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); + runStatementOnDriver("alter table " + Table.NONACIDORCTBL + + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -566,29 +581,39 @@ public void testNonAcidToAcidConversion2() throws Exception { rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 2}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 3. Update the existing row in newly-converted ACID table runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + // There should be 2 original bucket files (000000_0 and 000001_0), plus one delta directory + // and one delete_delta directory. When split-update is enabled, an update event is split into + // a combination of delete and insert, that generates the delete_delta directory. // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) - Assert.assertEquals(3, status.length); + // and so should the delete_delta directory. + Assert.assertEquals(4, status.length); boolean sawNewDelta = false; + boolean sawNewDeleteDelta = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { sawNewDelta = true; FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + } else if (status[i].getPath().getName().matches("delete_delta_.*")) { + sawNewDeleteDelta = true; + FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); + Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); } else { Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); } } Assert.assertTrue(sawNewDelta); + Assert.assertTrue(sawNewDeleteDelta); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -602,8 +627,8 @@ public void testNonAcidToAcidConversion2() throws Exception { // There should be 1 new directory: base_0000001. // Original bucket files and delta directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(4, status.length); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + Assert.assertEquals(5, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -623,15 +648,15 @@ public void testNonAcidToAcidConversion2() throws Exception { // 5. Let Cleaner delete obsolete files/dirs status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 4 items: - // 2 original files, 1 delta directory and 1 base directory - Assert.assertEquals(4, status.length); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + // Before Cleaner, there should be 5 items: + // 2 original files, 1 delta directory, 1 delete_delta directory and 1 base directory + Assert.assertEquals(5, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_0000001. - // Original bucket files and delta directory should have been cleaned up. + // Original bucket files, delta directory and delete_delta should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); @@ -646,9 +671,10 @@ public void testNonAcidToAcidConversion2() throws Exception { } /** - * Test the query correctness and directory layout after ACID table conversion and MAJOR compaction + * Test the query correctness and directory layout for ACID table conversion with split-update + * enabled. * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table + * 2. Convert Non-ACID to ACID table with split-update enabled * 3. Perform Major compaction * 4. Insert a new row to ACID table * 5. Perform another Major compaction @@ -663,7 +689,7 @@ public void testNonAcidToAcidConversion3() throws Exception { // 1. Insert a row to Non-ACID table runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // There should be 2 original bucket files in the location (000000_0 and 000001_0) Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -676,10 +702,11 @@ public void testNonAcidToAcidConversion3() throws Exception { int resultCount = 1; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - // 2. Convert NONACIDORCTBL to ACID table - runStatementOnDriver("alter table " + Table.NONACIDORCTBL + " SET TBLPROPERTIES ('transactional'='true')"); + // 2. Convert NONACIDORCTBL to ACID table with split_update enabled. (txn_props=default) + runStatementOnDriver("alter table " + Table.NONACIDORCTBL + + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Everything should be same as before Assert.assertEquals(BUCKET_COUNT, status.length); for (int i = 0; i < status.length; i++) { @@ -698,7 +725,7 @@ public void testNonAcidToAcidConversion3() throws Exception { // There should be 1 new directory: base_-9223372036854775808 // Original bucket files should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(3, status.length); boolean sawNewBase = false; for (int i = 0; i < status.length; i++) { @@ -722,12 +749,14 @@ public void testNonAcidToAcidConversion3() throws Exception { runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(status); // make sure delta_0000001_0000001_0000 appears before delta_0000002_0000002_0000 // There should be 2 original bucket files (000000_0 and 000001_0), a base directory, - // plus two new delta directories - Assert.assertEquals(5, status.length); + // plus two new delta directories and one delete_delta directory that would be created due to + // the update statement (remember split-update U=D+I)! + Assert.assertEquals(6, status.length); int numDelta = 0; + int numDeleteDelta = 0; sawNewBase = false; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("delta_.*")) { @@ -740,9 +769,17 @@ public void testNonAcidToAcidConversion3() throws Exception { Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } else if (numDelta == 2) { Assert.assertEquals("delta_0000023_0000023_0000", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); - Assert.assertEquals("bucket_00001", buckets[1].getPath().getName()); + Assert.assertEquals(1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); + } + } else if (status[i].getPath().getName().matches("delete_delta_.*")) { + numDeleteDelta++; + FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); + Arrays.sort(buckets); + if (numDeleteDelta == 1) { + Assert.assertEquals("delete_delta_0000022_0000022_0000", status[i].getPath().getName()); + Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } else if (status[i].getPath().getName().matches("base_.*")) { Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); @@ -755,11 +792,13 @@ public void testNonAcidToAcidConversion3() throws Exception { } } Assert.assertEquals(2, numDelta); + Assert.assertEquals(1, numDeleteDelta); Assert.assertTrue(sawNewBase); + rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); @@ -767,11 +806,12 @@ public void testNonAcidToAcidConversion3() throws Exception { runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); runWorker(hiveConf); // There should be 1 new base directory: base_0000001 - // Original bucket files, delta directories and the previous base directory should stay until Cleaner kicks in. + // Original bucket files, delta directories, delete_delta directories and the + // previous base directory should stay until Cleaner kicks in. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(status); - Assert.assertEquals(6, status.length); + Assert.assertEquals(7, status.length); int numBase = 0; for (int i = 0; i < status.length; i++) { if (status[i].getPath().getName().matches("base_.*")) { @@ -785,9 +825,8 @@ public void testNonAcidToAcidConversion3() throws Exception { } else if (numBase == 2) { // The new base dir now has two bucket files, since the delta dir has two bucket files Assert.assertEquals("base_0000023", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); - Assert.assertEquals("bucket_00001", buckets[1].getPath().getName()); + Assert.assertEquals(1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); } } } @@ -795,28 +834,27 @@ public void testNonAcidToAcidConversion3() throws Exception { rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL);//todo: what is the point of this if we just did select *? + rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); // 6. Let Cleaner delete obsolete files/dirs status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); // Before Cleaner, there should be 6 items: - // 2 original files, 2 delta directories and 2 base directories - Assert.assertEquals(6, status.length); + // 2 original files, 2 delta directories, 1 delete_delta directory and 2 base directories + Assert.assertEquals(7, status.length); runCleaner(hiveConf); // There should be only 1 directory left: base_0000001. // Original bucket files, delta directories and previous base directory should have been cleaned up. status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); + (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); Assert.assertEquals(1, status.length); Assert.assertEquals("base_0000023", status[0].getPath().getName()); FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); Arrays.sort(buckets); - Assert.assertEquals(BUCKET_COUNT, buckets.length); - Assert.assertEquals("bucket_00000", buckets[0].getPath().getName()); - Assert.assertEquals("bucket_00001", buckets[1].getPath().getName()); + Assert.assertEquals(1, buckets.length); + Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); resultData = new int[][] {{1, 3}, {3, 4}}; Assert.assertEquals(stringifyValues(resultData), rs); @@ -824,9 +862,6 @@ public void testNonAcidToAcidConversion3() throws Exception { resultCount = 2; Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); } - - - @Test public void testValidTxnsBookkeeping() throws Exception { // 1. Run a query against a non-ACID table, and we shouldn't have txn logged in conf diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdate.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdate.java deleted file mode 100644 index 520e958af3..0000000000 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdate.java +++ /dev/null @@ -1,545 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql; - -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; - -/** - * Same as TestTxnCommands2 but tests ACID tables with 'transactional_properties' set to 'default'. - * This tests whether ACID tables with split-update turned on are working correctly or not - * for the same set of tests when it is turned off. Of course, it also adds a few tests to test - * specific behaviors of ACID tables with split-update turned on. - */ -public class TestTxnCommands2WithSplitUpdate extends TestTxnCommands2 { - - public TestTxnCommands2WithSplitUpdate() { - super(); - } - - @Rule - public ExpectedException expectedException = ExpectedException.none(); - - @Override - @Before - public void setUp() throws Exception { - setUpWithTableProperties("'transactional'='true','transactional_properties'='default'"); - } - - @Override - @Test - public void testInitiatorWithMultipleFailedCompactions() throws Exception { - // Test with split-update turned on. - testInitiatorWithMultipleFailedCompactionsForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); - } - - @Override - @Test - public void writeBetweenWorkerAndCleaner() throws Exception { - writeBetweenWorkerAndCleanerForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); - } - - @Override - @Test - public void testACIDwithSchemaEvolutionAndCompaction() throws Exception { - testACIDwithSchemaEvolutionForVariousTblProperties("'transactional'='true','transactional_properties'='default'"); - } - - /** - * In current implementation of ACID, altering the value of transactional_properties or trying to - * set a value for previously unset value for an acid table will throw an exception. - * @throws Exception - */ - @Test - public void testFailureOnAlteringTransactionalProperties() throws Exception { - expectedException.expect(RuntimeException.class); - expectedException.expectMessage("TBLPROPERTIES with 'transactional_properties' cannot be altered after the table is created"); - runStatementOnDriver("create table acidTblLegacy (a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("alter table acidTblLegacy SET TBLPROPERTIES ('transactional_properties' = 'default')"); - } - /** - * Test the query correctness and directory layout for ACID table conversion with split-update - * enabled. - * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table with split-update enabled - * 3. Insert a row to ACID table - * 4. Perform Major compaction - * 5. Clean - * @throws Exception - */ - @Test - @Override - public void testNonAcidToAcidConversion1() throws Exception { - FileSystem fs = FileSystem.get(hiveConf); - FileStatus[] status; - - // 1. Insert a row to Non-ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - int [][] resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - int resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 2. Convert NONACIDORCTBL to ACID table - runStatementOnDriver("alter table " + Table.NONACIDORCTBL - + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 3. Insert another row to newly-converted ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus a new delta directory. - // The delta directory should also have only 1 bucket file (bucket_00001) - Assert.assertEquals(3, status.length); - boolean sawNewDelta = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("delta_.*")) { - sawNewDelta = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, buckets.length); // only one bucket file - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - } else { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - } - Assert.assertTrue(sawNewDelta); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL + " order by a,b"); - resultData = new int[][] {{1, 2}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 4. Perform a major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new directory: base_xxxxxxx. - // Original bucket files and delta directory should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(4, status.length); - boolean sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - } - } - Assert.assertTrue(sawNewBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 5. Let Cleaner delete obsolete files/dirs - // Note, here we create a fake directory along with fake files as original directories/files - String fakeFile0 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_0"; - String fakeFile1 = TEST_WAREHOUSE_DIR + "/" + (Table.NONACIDORCTBL).toString().toLowerCase() + - "/subdir/000000_1"; - fs.create(new Path(fakeFile0)); - fs.create(new Path(fakeFile1)); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 5 items: - // 2 original files, 1 original directory, 1 base directory and 1 delta directory - Assert.assertEquals(5, status.length); - runCleaner(hiveConf); - // There should be only 1 directory left: base_xxxxxxx. - // Original bucket files and delta directory should have been cleaned up. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, status.length); - Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); - FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - } - - /** - * Test the query correctness and directory layout for ACID table conversion with split-update - * enabled. - * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table with split update enabled. - * 3. Update the existing row in ACID table - * 4. Perform Major compaction - * 5. Clean - * @throws Exception - */ - @Test - @Override - public void testNonAcidToAcidConversion2() throws Exception { - FileSystem fs = FileSystem.get(hiveConf); - FileStatus[] status; - - // 1. Insert a row to Non-ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - int [][] resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - int resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 2. Convert NONACIDORCTBL to ACID table - runStatementOnDriver("alter table " + Table.NONACIDORCTBL - + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 3. Update the existing row in newly-converted ACID table - runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files (000000_0 and 000001_0), plus one delta directory - // and one delete_delta directory. When split-update is enabled, an update event is split into - // a combination of delete and insert, that generates the delete_delta directory. - // The delta directory should also have 2 bucket files (bucket_00000 and bucket_00001) - // and so should the delete_delta directory. - Assert.assertEquals(4, status.length); - boolean sawNewDelta = false; - boolean sawNewDeleteDelta = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("delta_.*")) { - sawNewDelta = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - } else if (status[i].getPath().getName().matches("delete_delta_.*")) { - sawNewDeleteDelta = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); - } else { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - } - Assert.assertTrue(sawNewDelta); - Assert.assertTrue(sawNewDeleteDelta); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 4. Perform a major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new directory: base_0000001. - // Original bucket files and delta directory should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(5, status.length); - boolean sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - } - } - Assert.assertTrue(sawNewBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 5. Let Cleaner delete obsolete files/dirs - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 5 items: - // 2 original files, 1 delta directory, 1 delete_delta directory and 1 base directory - Assert.assertEquals(5, status.length); - runCleaner(hiveConf); - // There should be only 1 directory left: base_0000001. - // Original bucket files, delta directory and delete_delta should have been cleaned up. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, status.length); - Assert.assertTrue(status[0].getPath().getName().matches("base_.*")); - FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_00001")); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - } - - /** - * Test the query correctness and directory layout for ACID table conversion with split-update - * enabled. - * 1. Insert a row to Non-ACID table - * 2. Convert Non-ACID to ACID table with split-update enabled - * 3. Perform Major compaction - * 4. Insert a new row to ACID table - * 5. Perform another Major compaction - * 6. Clean - * @throws Exception - */ - @Test - @Override - public void testNonAcidToAcidConversion3() throws Exception { - FileSystem fs = FileSystem.get(hiveConf); - FileStatus[] status; - - // 1. Insert a row to Non-ACID table - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(1,2)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // There should be 2 original bucket files in the location (000000_0 and 000001_0) - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - List rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - int [][] resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - int resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 2. Convert NONACIDORCTBL to ACID table with split_update enabled. (txn_props=default) - runStatementOnDriver("alter table " + Table.NONACIDORCTBL - + " SET TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default')"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Everything should be same as before - Assert.assertEquals(BUCKET_COUNT, status.length); - for (int i = 0; i < status.length; i++) { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 3. Perform a major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new directory: base_-9223372036854775808 - // Original bucket files should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(3, status.length); - boolean sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } - Assert.assertTrue(sawNewBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 2}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 1; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 4. Update the existing row, and insert another row to newly-converted ACID table - runStatementOnDriver("update " + Table.NONACIDORCTBL + " set b=3 where a=1"); - runStatementOnDriver("insert into " + Table.NONACIDORCTBL + "(a,b) values(3,4)"); - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(status); // make sure delta_0000001_0000001_0000 appears before delta_0000002_0000002_0000 - // There should be 2 original bucket files (000000_0 and 000001_0), a base directory, - // plus two new delta directories and one delete_delta directory that would be created due to - // the update statement (remember split-update U=D+I)! - Assert.assertEquals(6, status.length); - int numDelta = 0; - int numDeleteDelta = 0; - sawNewBase = false; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("delta_.*")) { - numDelta++; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - if (numDelta == 1) { - Assert.assertEquals("delta_0000022_0000022_0000", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } else if (numDelta == 2) { - Assert.assertEquals("delta_0000023_0000023_0000", status[i].getPath().getName()); - Assert.assertEquals(1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } else if (status[i].getPath().getName().matches("delete_delta_.*")) { - numDeleteDelta++; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - if (numDeleteDelta == 1) { - Assert.assertEquals("delete_delta_0000022_0000022_0000", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } else if (status[i].getPath().getName().matches("base_.*")) { - Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); - sawNewBase = true; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } else { - Assert.assertTrue(status[i].getPath().getName().matches("00000[01]_0")); - } - } - Assert.assertEquals(2, numDelta); - Assert.assertEquals(1, numDeleteDelta); - Assert.assertTrue(sawNewBase); - - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 5. Perform another major compaction - runStatementOnDriver("alter table "+ Table.NONACIDORCTBL + " compact 'MAJOR'"); - runWorker(hiveConf); - // There should be 1 new base directory: base_0000001 - // Original bucket files, delta directories, delete_delta directories and the - // previous base directory should stay until Cleaner kicks in. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(status); - Assert.assertEquals(7, status.length); - int numBase = 0; - for (int i = 0; i < status.length; i++) { - if (status[i].getPath().getName().matches("base_.*")) { - numBase++; - FileStatus[] buckets = fs.listStatus(status[i].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - if (numBase == 1) { - Assert.assertEquals("base_-9223372036854775808", status[i].getPath().getName()); - Assert.assertEquals(BUCKET_COUNT - 1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } else if (numBase == 2) { - // The new base dir now has two bucket files, since the delta dir has two bucket files - Assert.assertEquals("base_0000023", status[i].getPath().getName()); - Assert.assertEquals(1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - } - } - } - Assert.assertEquals(2, numBase); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - - // 6. Let Cleaner delete obsolete files/dirs - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - // Before Cleaner, there should be 6 items: - // 2 original files, 2 delta directories, 1 delete_delta directory and 2 base directories - Assert.assertEquals(7, status.length); - runCleaner(hiveConf); - // There should be only 1 directory left: base_0000001. - // Original bucket files, delta directories and previous base directory should have been cleaned up. - status = fs.listStatus(new Path(TEST_WAREHOUSE_DIR + "/" + - (Table.NONACIDORCTBL).toString().toLowerCase()), FileUtils.STAGING_DIR_PATH_FILTER); - Assert.assertEquals(1, status.length); - Assert.assertEquals("base_0000023", status[0].getPath().getName()); - FileStatus[] buckets = fs.listStatus(status[0].getPath(), FileUtils.STAGING_DIR_PATH_FILTER); - Arrays.sort(buckets); - Assert.assertEquals(1, buckets.length); - Assert.assertEquals("bucket_00001", buckets[0].getPath().getName()); - rs = runStatementOnDriver("select a,b from " + Table.NONACIDORCTBL); - resultData = new int[][] {{1, 3}, {3, 4}}; - Assert.assertEquals(stringifyValues(resultData), rs); - rs = runStatementOnDriver("select count(*) from " + Table.NONACIDORCTBL); - resultCount = 2; - Assert.assertEquals(resultCount, Integer.parseInt(rs.get(0))); - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java index 44a94127ec..c76d654b95 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2WithSplitUpdateAndVectorization.java @@ -23,11 +23,11 @@ import org.junit.Test; /** - * Same as TestTxnCommands2WithSplitUpdate but tests ACID tables with vectorization turned on by + * Same as TestTxnCommands2 but tests ACID tables with vectorization turned on by * default, and having 'transactional_properties' set to 'default'. This specifically tests the * fast VectorizedOrcAcidRowBatchReader for ACID tables with split-update turned on. */ -public class TestTxnCommands2WithSplitUpdateAndVectorization extends TestTxnCommands2WithSplitUpdate { +public class TestTxnCommands2WithSplitUpdateAndVectorization extends TestTxnCommands2 { public TestTxnCommands2WithSplitUpdateAndVectorization() { super(); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java index 44ff65c77d..545531aeac 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/TestAcidUtils.java @@ -669,21 +669,12 @@ public void testDeleteEventDeltaDirPathFilter() throws Exception { @Test public void testAcidOperationalProperties() throws Exception { - AcidUtils.AcidOperationalProperties testObj = AcidUtils.AcidOperationalProperties.getLegacy(); - assertsForAcidOperationalProperties(testObj, "legacy"); - - testObj = AcidUtils.AcidOperationalProperties.getDefault(); + AcidUtils.AcidOperationalProperties testObj = AcidUtils.AcidOperationalProperties.getDefault(); assertsForAcidOperationalProperties(testObj, "default"); - testObj = AcidUtils.AcidOperationalProperties.parseInt(0); - assertsForAcidOperationalProperties(testObj, "legacy"); - testObj = AcidUtils.AcidOperationalProperties.parseInt(1); assertsForAcidOperationalProperties(testObj, "split_update"); - testObj = AcidUtils.AcidOperationalProperties.parseString("legacy"); - assertsForAcidOperationalProperties(testObj, "legacy"); - testObj = AcidUtils.AcidOperationalProperties.parseString("default"); assertsForAcidOperationalProperties(testObj, "default"); @@ -699,12 +690,6 @@ private void assertsForAcidOperationalProperties(AcidUtils.AcidOperationalProper assertEquals(1, testObj.toInt()); assertEquals("|split_update", testObj.toString()); break; - case "legacy": - assertEquals(false, testObj.isSplitUpdate()); - assertEquals(false, testObj.isHashBasedMerge()); - assertEquals(0, testObj.toInt()); - assertEquals("", testObj.toString()); - break; default: break; } diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java index 439ec9bb8a..43e0a4a431 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedOrcAcidRowBatchReader.java @@ -246,10 +246,6 @@ private void testVectorizedOrcAcidRowBatchReader(String deleteEventRegistry) thr @Test public void testCanCreateVectorizedAcidRowBatchReaderOnSplit() throws Exception { OrcSplit mockSplit = Mockito.mock(OrcSplit.class); - conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, - AcidUtils.AcidOperationalProperties.getLegacy().toInt()); - // Test false when trying to create a vectorized ACID row batch reader for a legacy table. - assertFalse(VectorizedOrcAcidRowBatchReader.canCreateVectorizedAcidRowBatchReaderOnSplit(conf, mockSplit)); conf.setInt(HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES.varname, AcidUtils.AcidOperationalProperties.getDefault().toInt()); diff --git ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java index bbed591144..081340ab7c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java +++ ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java @@ -409,6 +409,12 @@ public ObjectInspector getObjectInspector() { return null; } + /** + * This is bogus especially with split update acid tables. This causes compaction to create + * delete_delta_x_y where none existed before + * @param value + * @return + */ @Override public boolean isDelete(Text value) { // Alternate between returning deleted and not. This is easier than actually @@ -552,4 +558,7 @@ String makeDeltaDirName(long minTxnId, long maxTxnId) { String makeDeltaDirNameCompacted(long minTxnId, long maxTxnId) { return AcidUtils.deltaSubdir(minTxnId, maxTxnId); } + String makeDeleteDeltaDirNameCompacted(long minTxnId, long maxTxnId) { + return AcidUtils.deleteDeltaSubdir(minTxnId, maxTxnId); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java index efd6ed8fe7..cc54534763 100644 --- ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java +++ ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestWorker.java @@ -289,7 +289,7 @@ public void minorTableWithBase() throws Exception { CompactionRequest rqst = new CompactionRequest("default", "mtwb", CompactionType.MINOR); txnHandler.compact(rqst); - startWorker(); + startWorker();//add delta and delete_delta ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); @@ -299,7 +299,7 @@ public void minorTableWithBase() throws Exception { // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + Assert.assertEquals(5, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewDelta = false; @@ -310,9 +310,19 @@ public void minorTableWithBase() throws Exception { Assert.assertEquals(2, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(208L, buckets[0].getLen()); - Assert.assertEquals(208L, buckets[1].getLen()); - } else { + Assert.assertEquals(104L, buckets[0].getLen()); + Assert.assertEquals(104L, buckets[1].getLen()); + } + if (stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(21, 24))) { + sawNewDelta = true; + FileStatus[] buckets = fs.listStatus(stat[i].getPath()); + Assert.assertEquals(2, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(104L, buckets[0].getLen()); + Assert.assertEquals(104L, buckets[1].getLen()); + } + else { LOG.debug("This is not the delta file you are looking for " + stat[i].getPath().getName()); } } @@ -348,14 +358,15 @@ public void minorWithOpenInMiddle() throws Exception { // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + Assert.assertEquals(5, stat.length); // Find the new delta file and make sure it has the right contents Arrays.sort(stat); Assert.assertEquals("base_20", stat[0].getPath().getName()); - Assert.assertEquals(makeDeltaDirNameCompacted(21, 22), stat[1].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(23, 25), stat[2].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(26, 27), stat[3].getPath().getName()); + Assert.assertEquals(makeDeleteDeltaDirNameCompacted(21, 22), stat[1].getPath().getName()); + Assert.assertEquals(makeDeltaDirNameCompacted(21, 22), stat[2].getPath().getName()); + Assert.assertEquals(makeDeltaDirName(23, 25), stat[3].getPath().getName()); + Assert.assertEquals(makeDeltaDirName(26, 27), stat[4].getPath().getName()); } @Test @@ -380,18 +391,19 @@ public void minorWithAborted() throws Exception { Assert.assertEquals(1, compacts.size()); Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); - // There should still now be 5 directories in the location + // There should still now be 6 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(5, stat.length); + Assert.assertEquals(6, stat.length); // Find the new delta file and make sure it has the right contents Arrays.sort(stat); Assert.assertEquals("base_20", stat[0].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(21, 22), stat[1].getPath().getName()); - Assert.assertEquals(makeDeltaDirNameCompacted(21, 27), stat[2].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(23, 25), stat[3].getPath().getName()); - Assert.assertEquals(makeDeltaDirName(26, 27), stat[4].getPath().getName()); + Assert.assertEquals(makeDeleteDeltaDirNameCompacted(21, 27), stat[1].getPath().getName()); + Assert.assertEquals(makeDeltaDirName(21, 22), stat[2].getPath().getName()); + Assert.assertEquals(makeDeltaDirNameCompacted(21, 27), stat[3].getPath().getName()); + Assert.assertEquals(makeDeltaDirName(23, 25), stat[4].getPath().getName()); + Assert.assertEquals(makeDeltaDirName(26, 27), stat[5].getPath().getName()); } @Test @@ -409,7 +421,7 @@ public void minorPartitionWithBase() throws Exception { rqst.setPartitionname("ds=today"); txnHandler.compact(rqst); - startWorker(); + startWorker();//this will create delta_20_24 and delete_delta_20_24. See MockRawReader ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); @@ -419,7 +431,7 @@ public void minorPartitionWithBase() throws Exception { // There should still be four directories in the location. FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation())); - Assert.assertEquals(4, stat.length); + Assert.assertEquals(5, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewDelta = false; @@ -430,9 +442,18 @@ public void minorPartitionWithBase() throws Exception { Assert.assertEquals(2, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(208L, buckets[0].getLen()); - Assert.assertEquals(208L, buckets[1].getLen()); - } else { + Assert.assertEquals(104L, buckets[0].getLen()); + Assert.assertEquals(104L, buckets[1].getLen()); + } + if (stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(21, 24))) { + sawNewDelta = true; + FileStatus[] buckets = fs.listStatus(stat[i].getPath()); + Assert.assertEquals(2, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(104L, buckets[0].getLen()); + Assert.assertEquals(104L, buckets[1].getLen()); + } else { LOG.debug("This is not the delta file you are looking for " + stat[i].getPath().getName()); } } @@ -462,7 +483,7 @@ public void minorTableNoBase() throws Exception { // There should still now be 5 directories in the location FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(t.getSd().getLocation())); - Assert.assertEquals(3, stat.length); + Assert.assertEquals(4, stat.length); // Find the new delta file and make sure it has the right contents boolean sawNewDelta = false; @@ -473,8 +494,17 @@ public void minorTableNoBase() throws Exception { Assert.assertEquals(2, buckets.length); Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); - Assert.assertEquals(208L, buckets[0].getLen()); - Assert.assertEquals(208L, buckets[1].getLen()); + Assert.assertEquals(104L, buckets[0].getLen()); + Assert.assertEquals(104L, buckets[1].getLen()); + } + if (stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(1, 4))) { + sawNewDelta = true; + FileStatus[] buckets = fs.listStatus(stat[i].getPath()); + Assert.assertEquals(2, buckets.length); + Assert.assertTrue(buckets[0].getPath().getName().matches("bucket_0000[01]")); + Assert.assertTrue(buckets[1].getPath().getName().matches("bucket_0000[01]")); + Assert.assertEquals(104L, buckets[0].getLen()); + Assert.assertEquals(104L, buckets[1].getLen()); } else { LOG.debug("This is not the delta file you are looking for " + stat[i].getPath().getName()); } @@ -534,6 +564,17 @@ public void minorNoBaseLotsOfDeltas() throws Exception { public void majorNoBaseLotsOfDeltas() throws Exception { compactNoBaseLotsOfDeltas(CompactionType.MAJOR); } + + /** + * These tests are starting to be a hack. The files writtern by addDeltaFile() are not proper + * Acid files and the {@link CompactorTest.MockRawReader} performs no merging of delta files and + * fakes isDelete() as a shortcut. This makes files created on disk to not be representative of + * what the should look like in a real system. + * Making {@link org.apache.hadoop.hive.ql.txn.compactor.CompactorTest.MockRawReader} do proper + * delete event handling would be duplicating either OrcRawRecordMerger or VectorizedOrcAcidRowBatchReaer. + * @param type + * @throws Exception + */ private void compactNoBaseLotsOfDeltas(CompactionType type) throws Exception { conf.setIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA, 2); Table t = newTable("default", "mapwb", true); @@ -570,45 +611,55 @@ private void compactNoBaseLotsOfDeltas(CompactionType type) throws Exception { FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(p.getSd().getLocation())); - Assert.assertEquals(9, stat.length); + /* delete_delta_21_23 and delete_delta_25_33 which are created as a result of compacting*/ + int numFilesExpected = 11 + (type == CompactionType.MINOR ? 1 : 0); + Assert.assertEquals(numFilesExpected, stat.length); // Find the new delta file and make sure it has the right contents - BitSet matchesFound = new BitSet(9); - for (int i = 0; i < stat.length; i++) { + BitSet matchesFound = new BitSet(numFilesExpected); + for (int i = 0, j = 0; i < stat.length; i++) { + if(stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(21,23))) { + matchesFound.set(j++); + } + if(stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(25,33))) { + matchesFound.set(j++); + } if(stat[i].getPath().getName().equals(makeDeltaDirName(21,21))) { - matchesFound.set(0); + matchesFound.set(j++); } else if(stat[i].getPath().getName().equals(makeDeltaDirName(23, 23))) { - matchesFound.set(1); + matchesFound.set(j++); } else if(stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(25, 29))) { - matchesFound.set(2); + matchesFound.set(j++); } else if(stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(31, 32))) { - matchesFound.set(3); + matchesFound.set(j++); } else if(stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(31, 33))) { - matchesFound.set(4); + matchesFound.set(j++); } else if(stat[i].getPath().getName().equals(makeDeltaDirName(35, 35))) { - matchesFound.set(5); + matchesFound.set(j++); } else if(stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21,23))) { - matchesFound.set(6); + matchesFound.set(j++); } else if(stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(25,33))) { - matchesFound.set(7); + matchesFound.set(j++); } switch (type) { - //yes, both do set(8) case MINOR: if(stat[i].getPath().getName().equals(makeDeltaDirNameCompacted(21,35))) { - matchesFound.set(8); + matchesFound.set(j++); + } + if (stat[i].getPath().getName().equals(makeDeleteDeltaDirNameCompacted(21, 35))) { + matchesFound.set(j++); } break; case MAJOR: if(stat[i].getPath().getName().equals(AcidUtils.baseDir(35))) { - matchesFound.set(8); + matchesFound.set(j++); } break; default: @@ -930,7 +981,7 @@ public void majorWithAborted() throws Exception { } @Override boolean useHive130DeltaDirName() { - return false; + return true; } @Test