diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 711dfbdc1f..6b05f4667a 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -531,7 +531,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be."), DYNAMICPARTITIONING("hive.exec.dynamic.partition", true, "Whether or not to allow dynamic partitions in DML/DDL."), - DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict", + DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "nostrict", "In strict mode, the user must specify at least one static partition\n" + "in case the user accidentally overwrites all partitions.\n" + "In nonstrict mode all partitions are allowed to be dynamic."), @@ -1845,7 +1845,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "filter operators."), // Concurrency - HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false, + HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", true, "Whether Hive supports concurrency control or not. \n" + "A ZooKeeper instance must be up and running when using zookeeper Hive lock manager "), HIVE_LOCK_MANAGER("hive.lock.manager", "org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager", ""), @@ -1894,7 +1894,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal // Transactions HIVE_TXN_MANAGER("hive.txn.manager", - "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager", + "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager", "Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive\n" + "transactions, which also requires appropriate settings for hive.compactor.initiator.on,\n" + "hive.compactor.worker.threads, hive.support.concurrency (true),\n" + @@ -2985,6 +2985,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_CREATE_TABLES_AS_INSERT_ONLY("hive.create.as.insert.only", false, "Whether the eligible tables should be created as ACID insert-only by default. Does \n" + "not apply to external tables, the ones using storage handlers, etc."), + HIVE_CREATE_TABLES_AS_ACID("hive.create.as.acid", true, + "Whether the eligible tables should be created as full ACID by default. Does \n" + + "not apply to external tables, the ones using storage handlers, etc."), // role names are case-insensitive USERS_IN_ADMIN_ROLE("hive.users.in.admin.role", "", false, "Comma separated list of users who are in admin role for bootstrapping.\n" + diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java index ae56ff7c81..29a006cd5e 100644 --- hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java @@ -217,6 +217,7 @@ public void createTable() throws Exception { tableParams.put(hive_metastoreConstants.IS_IMMUTABLE,"true"); } StatsSetupConst.setBasicStatsState(tableParams, StatsSetupConst.TRUE); + tableParams.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "false"); tbl.setParameters(tableParams); client.createTable(tbl); diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatLoaderTest.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatLoaderTest.java index 59d2efb156..124a5bcbf7 100644 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatLoaderTest.java +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatLoaderTest.java @@ -112,6 +112,8 @@ static void createTable(String tablename, String schema, String partitionedBy, D createTable = createTable + "partitioned by (" + partitionedBy + ") "; } createTable = createTable + "stored as " +storageFormat; + //HCat doesn't support transactional tables + createTable += " TBLPROPERTIES ('transactional'='false')"; executeStatementOnDriver(createTable, driver); } diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java index 4f7cf2b5e5..4e4638ae07 100644 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java @@ -417,14 +417,9 @@ static void dumpFile(String fileName) throws Exception { @Test public void testPartColsInData() throws IOException, CommandNeedRetryException { + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted","a int", "b string", driver, storageFormat); - driver.run("drop table junit_unparted"); - String createTable = - "create table junit_unparted(a int) partitioned by (b string) stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } int LOOP_SIZE = 11; String[] input = new String[LOOP_SIZE]; for (int i = 0; i < LOOP_SIZE; i++) { @@ -456,15 +451,10 @@ public void testPartColsInData() throws IOException, CommandNeedRetryException { @Test public void testMultiPartColsInData() throws Exception { - driver.run("drop table employee"); - String createTable = - "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " - + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("employee", driver); + AbstractHCatLoaderTest.createTable("employee", + "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", + "emp_country STRING , emp_state STRING", driver, storageFormat); String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", @@ -512,13 +502,10 @@ public void testMultiPartColsInData() throws Exception { @Test public void testStoreInPartiitonedTbl() throws Exception { - driver.run("drop table junit_unparted"); - String createTable = - "create table junit_unparted(a int) partitioned by (b string) stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted","a int", "b string", + driver, storageFormat); + int LOOP_SIZE = 11; String[] input = new String[LOOP_SIZE]; for (int i = 0; i < LOOP_SIZE; i++) { @@ -553,14 +540,8 @@ public void testStoreInPartiitonedTbl() throws Exception { @Test public void testNoAlias() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_parted"); - String createTable = - "create table junit_parted(a int, b string) partitioned by (ds string) stored as " - + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_parted", driver); + AbstractHCatLoaderTest.createTable("junit_parted","a int, b string", "ds string", driver, storageFormat); PigServer server = new PigServer(ExecType.LOCAL); boolean errCaught = false; try { @@ -603,19 +584,13 @@ public void testNoAlias() throws IOException, CommandNeedRetryException { @Test public void testStoreMultiTables() throws IOException, CommandNeedRetryException { + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted","a int, b string", null, + driver, storageFormat); - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - driver.run("drop table junit_unparted2"); - createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; - retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted2", driver); + AbstractHCatLoaderTest.createTable("junit_unparted2","a int, b string", null, + driver, "RCFILE"); int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; @@ -660,13 +635,9 @@ public void testStoreMultiTables() throws IOException, CommandNeedRetryException @Test public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted","a int, b string", null, + driver, storageFormat); int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; @@ -700,13 +671,9 @@ public void testStoreWithNoSchema() throws IOException, CommandNeedRetryExceptio @Test public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted","a int, b string", null, + driver, storageFormat); int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; @@ -741,12 +708,8 @@ public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryExcept @Test public void testEmptyStore() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted","a int, b string", null, driver, storageFormat); int LOOP_SIZE = 3; String[] input = new String[LOOP_SIZE * LOOP_SIZE]; @@ -777,15 +740,11 @@ public void testEmptyStore() throws IOException, CommandNeedRetryException { @Test public void testBagNStruct() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = - "create table junit_unparted(b string,a struct, arr_of_struct array, " - + "arr_of_struct2 array>, arr_of_struct3 array>) stored as " - + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted", + "b string,a struct, arr_of_struct array, " + + "arr_of_struct2 array>, arr_of_struct3 array>", + null, driver, storageFormat); String[] inputData = new String[] { "zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", @@ -823,15 +782,10 @@ public void testBagNStruct() throws IOException, CommandNeedRetryException { @Test public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = - "create table junit_unparted(a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) stored as " - + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted", + "a int, b float, c double, d bigint, e string, h boolean, f binary, g binary", null, + driver, storageFormat); int i = 0; String[] input = new String[3]; @@ -887,13 +841,9 @@ public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryEx @Test public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("junit_unparted", driver); + AbstractHCatLoaderTest.createTable("junit_unparted","a int, b string", null, + driver, storageFormat); int LOOP_SIZE = 3; String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; @@ -930,16 +880,10 @@ public void testStoreFuncSimple() throws IOException, CommandNeedRetryException @Test public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = - "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " - + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("employee", driver); + AbstractHCatLoaderTest.createTable("employee", + "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", + "emp_country STRING , emp_state STRING", driver, storageFormat); String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", @@ -970,16 +914,10 @@ public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOExc @Test public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = - "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " - + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } + AbstractHCatLoaderTest.dropTable("employee", driver); + AbstractHCatLoaderTest.createTable("employee", + "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", + "emp_country STRING , emp_state STRING", driver, storageFormat); String[] inputData = { "111237\tKrishna\t01/01/1990\tM\tIN\tTN", "111238\tKalpana\t01/01/2000\tF\tIN\tKA", @@ -1009,16 +947,11 @@ public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOExceptio @Test public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { + AbstractHCatLoaderTest.dropTable("employee", driver); + AbstractHCatLoaderTest.createTable("employee", + "emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING", + "emp_country STRING , emp_state STRING", driver, storageFormat); - driver.run("drop table if exists employee"); - String createTable = - "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " - + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS " + storageFormat; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } String[] inputData = {}; HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); @@ -1040,15 +973,10 @@ public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOEx @Test public void testPartitionPublish() throws IOException, CommandNeedRetryException { + AbstractHCatLoaderTest.dropTable("ptn_fail", driver); + AbstractHCatLoaderTest.createTable("ptn_fail","a int, c string", "b string", + driver, storageFormat); - driver.run("drop table ptn_fail"); - String createTable = - "create table ptn_fail(a int, c string) partitioned by (b string) stored as " - + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } int LOOP_SIZE = 11; String[] input = new String[LOOP_SIZE]; @@ -1065,7 +993,7 @@ public void testPartitionPublish() throws IOException, CommandNeedRetryException server.executeBatch(); String query = "show partitions ptn_fail"; - retCode = driver.run(query).getResponseCode(); + int retCode = driver.run(query).getResponseCode(); if (retCode != 0) { throw new IOException("Error " + retCode + " running query " + query); diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java index 4a6c6a3a6f..95a8164dfe 100644 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java @@ -112,15 +112,7 @@ private void dropTable(String tablename) throws IOException, CommandNeedRetryExc } private void createTable(String tablename, String schema, String partitionedBy, String storageFormat) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - if (storageFormat != null){ - createTable = createTable + "stored as " +storageFormat; - } - driverRun(createTable); + AbstractHCatLoaderTest.createTable(tablename, schema, partitionedBy, driver, storageFormat); } private void driverRun(String cmd) throws IOException, CommandNeedRetryException { diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java index ea9cdda31c..5c9bae48e5 100644 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java @@ -97,18 +97,7 @@ private void dropTable(String tablename) throws IOException, CommandNeedRetryExc } private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " + storageFormat; - LOG.info("Creating table:\n {}", createTable); - CommandProcessorResponse result = driver.run(createTable); - int retCode = result.getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); - } + AbstractHCatLoaderTest.createTable(tablename, schema, partitionedBy, driver, storageFormat); } private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java index 40ea923858..e2c9b2e33b 100644 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java @@ -83,16 +83,7 @@ private void dropTable(String tablename) throws IOException, CommandNeedRetryExc } private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " + storageFormat; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } + AbstractHCatLoaderTest.createTable(tablename, schema, partitionedBy, driver, storageFormat); } private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { diff --git hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java index db3109e069..3c56cbc70c 100644 --- hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java +++ hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java @@ -20,8 +20,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.metastore.TransactionalValidationListener; import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.cli.CliSessionState; @@ -336,15 +338,17 @@ private void checkEndPoint(HiveEndPoint endPoint, IMetaStoreClient msClient) LOG.warn("Unable to check the endPoint: " + endPoint, e); throw new InvalidTable(endPoint.database, endPoint.table, e); } - // 1 - check if TBLPROPERTIES ('transactional'='true') is set on table Map params = t.getParameters(); if (params != null) { String transactionalProp = params.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); - if (transactionalProp == null || !transactionalProp.equalsIgnoreCase("true")) { + boolean isAcid = TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY + .equalsIgnoreCase(params.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES)); + if (transactionalProp == null || !transactionalProp.equalsIgnoreCase("true") || !isAcid) { LOG.error("'transactional' property is not set on Table " + endPoint); throw new InvalidTable(endPoint.database, endPoint.table, "\'transactional\' property" + - " is not set on Table"); } + " is not set on Table"); + } } // 2 - check if partitionvals are legitimate diff --git hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java index 49aad392d8..f2a1ca7043 100644 --- hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java +++ hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java @@ -511,7 +511,7 @@ public void testTableValidation() throws Exception { runDDL(driver, "use testBucketing3"); runDDL(driver, "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into " - + bucketCount + " buckets stored as orc location " + tableLoc) ; + + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='false')") ; runDDL(driver, "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into " + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='false')") ; diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index f344c47443..0aa1d4e16a 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -2999,7 +2999,7 @@ public void testTransactionalValidation() throws Throwable { Table t = createTable(dbName, tblName, owner, params, null, sd, 0); Assert.assertTrue("Expected exception", false); } catch (MetaException e) { - Assert.assertEquals("'transactional' property of TBLPROPERTIES may only have value 'true'", e.getMessage()); + Assert.assertEquals("'transactional' property of TBLPROPERTIES may only have value 'true': acidDb.acidTable", e.getMessage()); } // Fail - "transactional" property is set to an invalid value @@ -3009,7 +3009,7 @@ public void testTransactionalValidation() throws Throwable { Table t = createTable(dbName, tblName, owner, params, null, sd, 0); Assert.assertTrue("Expected exception", false); } catch (MetaException e) { - Assert.assertEquals("'transactional' property of TBLPROPERTIES may only have value 'true'", e.getMessage()); + Assert.assertEquals("'transactional' property of TBLPROPERTIES may only have value 'true': acidDb.acidTable", e.getMessage()); } // Fail - "transactional" is set to true, but the table is not bucketed @@ -3019,7 +3019,7 @@ public void testTransactionalValidation() throws Throwable { Table t = createTable(dbName, tblName, owner, params, null, sd, 0); Assert.assertTrue("Expected exception", false); } catch (MetaException e) { - Assert.assertEquals("The table must be stored using an ACID compliant format (such as ORC)", e.getMessage()); + Assert.assertEquals("The table must be stored using an ACID compliant format (such as ORC): acidDb.acidTable", e.getMessage()); } // Fail - "transactional" is set to true, and the table is bucketed, but doesn't use ORC @@ -3032,7 +3032,7 @@ public void testTransactionalValidation() throws Throwable { Table t = createTable(dbName, tblName, owner, params, null, sd, 0); Assert.assertTrue("Expected exception", false); } catch (MetaException e) { - Assert.assertEquals("The table must be stored using an ACID compliant format (such as ORC)", e.getMessage()); + Assert.assertEquals("The table must be stored using an ACID compliant format (such as ORC): acidDb.acidTable", e.getMessage()); } // Succeed - "transactional" is set to true, and the table is bucketed, and uses ORC @@ -3052,12 +3052,11 @@ public void testTransactionalValidation() throws Throwable { try { params.clear(); params.put("transactional", "false"); - t = new Table(); t.setParameters(params); client.alter_table(dbName, tblName, t); Assert.assertTrue("Expected exception", false); } catch (MetaException e) { - Assert.assertEquals("TBLPROPERTIES with 'transactional'='true' cannot be unset", e.getMessage()); + Assert.assertEquals("TBLPROPERTIES with 'transactional'='true' cannot be unset: aciddb.acidtable", e.getMessage()); } // Fail - trying to set "transactional" to "true" but doesn't satisfy bucketing and Input/OutputFormat requirement @@ -3072,7 +3071,7 @@ public void testTransactionalValidation() throws Throwable { client.alter_table(dbName, tblName, t); Assert.assertTrue("Expected exception", false); } catch (MetaException e) { - Assert.assertEquals("The table must be stored using an ACID compliant format (such as ORC)", e.getMessage()); + Assert.assertEquals("The table must be stored using an ACID compliant format (such as ORC): aciddb.acidtable1", e.getMessage()); } // Succeed - trying to set "transactional" to "true", and satisfies bucketing and Input/OutputFormat requirement diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index d3df015288..dce4cec051 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -1303,6 +1303,9 @@ public void releaseLocksAndCommitOrRollback(boolean commit, HiveTxnManager txnMa // If we've opened a transaction we need to commit or rollback rather than explicitly // releasing the locks. conf.unset(ValidTxnList.VALID_TXNS_KEY); + if(!checkConcurrency()) { + return; + } if (txnMgr.isTxnOpen()) { if (commit) { if(conf.getBoolVar(ConfVars.HIVE_IN_TEST) && conf.getBoolVar(ConfVars.HIVETESTMODEROLLBACKTXN)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index c1dbd24018..fba9f84155 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -119,12 +119,7 @@ public int execute(DriverContext driverContext) { private Table getTable(Hive db) throws SemanticException, HiveException { - Table tbl = work.getTable(); - // FIXME for ctas this is still needed because location is not set sometimes - if (tbl.getSd().getLocation() == null) { - tbl = db.getTable(work.getFullTableName()); - } - return tbl; + return db.getTable(work.getFullTableName()); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index d571bd0b48..990e0cb962 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -415,7 +415,9 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti * If there are deletes and reading original file, we must produce synthetic ROW_IDs in order * to see if any deletes apply */ - if(needSyntheticRowIds(true, !deleteEventRegistry.isEmpty(), rowIdProjected)) { + boolean needSyntheticRowId = + needSyntheticRowIds(true, !deleteEventRegistry.isEmpty(), rowIdProjected); + if(needSyntheticRowId) { assert syntheticProps != null && syntheticProps.rowIdOffset >= 0 : "" + syntheticProps; assert syntheticProps != null && syntheticProps.bucketProperty >= 0 : "" + syntheticProps; if(innerReader == null) { @@ -459,8 +461,19 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti // txnid:0 which is always committed so there is no need to check wrt invalid transactions //But originals written by Load Data for example can be in base_x or delta_x_x so we must //check if 'x' is committed or not evn if ROW_ID is not needed in the Operator pipeline. - findRecordsWithInvalidTransactionIds(innerRecordIdColumnVector, - vectorizedRowBatchBase.size, selectedBitSet); + if (needSyntheticRowId) { + findRecordsWithInvalidTransactionIds(innerRecordIdColumnVector, + vectorizedRowBatchBase.size, selectedBitSet); + } else { + /*since ROW_IDs are not needed we didn't create the ColumnVectors to hold them but we + * still have to check if the data being read is committed as far as current + * reader (transactions) is concerned. Since here we are reading 'original' schema file, + * all rows in it have been created by the same txn, namely 'syntheticProps.syntheticTxnId' + */ + if (!validTxnList.isTxnValid(syntheticProps.syntheticTxnId)) { + selectedBitSet.clear(0, vectorizedRowBatchBase.size); + } + } } } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index 48ac22d3e4..f00ef01267 100644 --- ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -185,7 +185,9 @@ public void run() { void setHiveConf(HiveConf conf) { super.setHiveConf(conf); if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY)) { - throw new RuntimeException(ErrorMsg.DBTXNMGR_REQUIRES_CONCURRENCY.getMsg()); + //todo: hack for now - many (esp hcat) tests explicitly set concurrency to false so then + //since DbTxnManager is now default, this throws... + //throw new RuntimeException(ErrorMsg.DBTXNMGR_REQUIRES_CONCURRENCY.getMsg()); } } @@ -887,7 +889,7 @@ public long getCurrentTxnId() { } @Override public int getWriteIdAndIncrement() { - assert isTxnOpen(); + assert isTxnOpen() : "No txn context found."; return writeId++; } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 50bdce89a4..b2c671cdaa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -3376,16 +3376,30 @@ private static Path mvFile(HiveConf conf, FileSystem sourceFs, Path sourcePath, final String fullname = sourcePath.getName(); final String name = FilenameUtils.getBaseName(sourcePath.getName()); final String type = FilenameUtils.getExtension(sourcePath.getName()); + final boolean isAcidTarget = destDirPath.getName().startsWith(AcidUtils.BASE_PREFIX) || destDirPath.getName().startsWith(AcidUtils.DELTA_PREFIX); + Path destFilePath; + if(isAcidTarget && !AcidUtils.originalBucketFilter.accept(sourcePath)) { + //if here we are doing a load Data into acid table - todo: make this more explicit + //Acid tables can only deal with files matching AcidUtils.originalBucketFilter. + //so here we rename the input file and further logic will add a copy_N suffix in case of + //collisions. (This works since Load Data doesn't support bucketed tables for now) + destFilePath = new Path(destDirPath, "000000_0"); + } + else { + destFilePath = new Path(destDirPath, fullname); + } - Path destFilePath = new Path(destDirPath, fullname); - - /* + /* * The below loop may perform bad when the destination file already exists and it has too many _copy_ * files as well. A desired approach was to call listFiles() and get a complete list of files from * the destination, and check whether the file exists or not on that list. However, millions of files * could live on the destination directory, and on concurrent situations, this can cause OOM problems. * * I'll leave the below loop for now until a better approach is found. + * + * This is problematic: caller of mvFile() may use a thread pool to move files in parallel in + * which case there is a race condition between exists() and reaname() from different threads. + * I suppose in case of collisions the FileSystem will throw and the command will fail. */ for (int counter = 1; destFs.exists(destFilePath); counter++) { if (isOverwrite) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index cc956da575..8b5c5e4540 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -160,7 +160,7 @@ private URI initializeFromURI(String fromPath, boolean isLocal) throws IOExcepti "source contains directory: " + oneSrc.getPath().toString())); } if(AcidUtils.isFullAcidTable(table)) { - if(!AcidUtils.originalBucketFilter.accept(oneSrc.getPath())) { + if(false && !AcidUtils.originalBucketFilter.accept(oneSrc.getPath())) { //acid files (e.g. bucket_0000) have ROW_ID embedded in them and so can't be simply //copied to a table so only allow non-acid files for now throw new SemanticException(ErrorMsg.ACID_LOAD_DATA_INVALID_FILE_NAME, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 28e3621d32..7d27b01e81 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -108,6 +108,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; @@ -12212,14 +12213,6 @@ private void validate(Task task, boolean reworkMapredWor validate(childTask, reworkMapredWork); } } - - /** - * Get the row resolver given an operator. - */ - public RowResolver getRowResolver(Operator opt) { - return opParseCtx.get(opt).getRowResolver(); - } - /** * Add default properties for table property. If a default parameter exists * in the tblProp, the value in tblProp will be kept. @@ -12229,7 +12222,8 @@ public RowResolver getRowResolver(Operator opt) { * @return Modified table property map */ private Map addDefaultProperties( - Map tblProp, boolean isExt, StorageFormat storageFormat) { + Map tblProp, boolean isExt, StorageFormat storageFormat, + String qualifiedTableName, List sortCols) { Map retValue; if (tblProp == null) { retValue = new HashMap(); @@ -12248,16 +12242,45 @@ public RowResolver getRowResolver(Operator opt) { } } } - if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY) + boolean makeInsertOnly = HiveConf.getBoolVar(conf, ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY); + boolean makeAcid = HiveConf.getBoolVar(conf, ConfVars.HIVE_CREATE_TABLES_AS_ACID); + if ((makeInsertOnly || makeAcid) && !isExt && StringUtils.isBlank(storageFormat.getStorageHandler()) - && !retValue.containsKey(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL)) { - retValue.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); + && !AcidUtils.isTablePropertyTransactional(retValue)) { String oldProps = retValue.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); if (oldProps != null) { LOG.warn("Non-transactional table has transactional properties; overwriting " + oldProps); } - retValue.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, - TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY); + if(makeInsertOnly) { + retValue.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); + retValue.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, + TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY); + } + if(makeAcid) { + /*for CTAS, TransactionalValidationListener runs to late to make table Acid so the + * initial write ends up running as non-acid...*/ + try { + Class inputFormatClass = storageFormat.getInputFormat() == null ? null : + Class.forName(storageFormat.getInputFormat()); + Class outputFormatClass = storageFormat.getOutputFormat() == null ? null : + Class.forName(storageFormat.getOutputFormat()); + if (inputFormatClass == null || outputFormatClass == null || + !AcidInputFormat.class.isAssignableFrom(inputFormatClass) || + !AcidOutputFormat.class.isAssignableFrom(outputFormatClass)) { + return retValue; + } + } catch (ClassNotFoundException e) { + LOG.warn("Could not verify InputFormat=" + storageFormat.getInputFormat() + " or OutputFormat=" + + storageFormat.getOutputFormat() + " for " + qualifiedTableName); + return retValue; + } + if(sortCols != null && !sortCols.isEmpty()) { + return retValue; + } + retValue.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); + retValue.put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, + TransactionalValidationListener.DEFAULT_TRANSACTIONAL_PROPERTY); + } } return retValue; } @@ -12482,7 +12505,7 @@ ASTNode analyzeCreateTable( switch (command_type) { case CREATE_TABLE: // REGULAR CREATE TABLE DDL - tblProps = addDefaultProperties(tblProps, isExt, storageFormat); + tblProps = addDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols); CreateTableDesc crtTblDesc = new CreateTableDesc(dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, @@ -12503,7 +12526,7 @@ ASTNode analyzeCreateTable( break; case CTLT: // create table like - tblProps = addDefaultProperties(tblProps, isExt, storageFormat); + tblProps = addDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols); if (isTemporary) { Table likeTable = getTable(likeTableName, false); @@ -12580,7 +12603,7 @@ ASTNode analyzeCreateTable( } } - tblProps = addDefaultProperties(tblProps, isExt, storageFormat); + tblProps = addDefaultProperties(tblProps, isExt, storageFormat, dbDotTab, sortCols); tableDesc = new CreateTableDesc(qualifiedTabName[0], dbDotTab, isExt, isTemporary, cols, partCols, bucketCols, sortCols, numBuckets, rowFormatParams.fieldDelim, rowFormatParams.fieldEscape, rowFormatParams.collItemDelim, rowFormatParams.mapKeyDelim, diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java index b98c74a889..823acc846e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java @@ -18,25 +18,16 @@ package org.apache.hadoop.hive.ql; import org.apache.commons.io.FileUtils; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.io.NullWritable; import org.junit.Assert; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; -import java.io.IOException; import java.util.List; /** @@ -101,7 +92,7 @@ private void loadDataUpdate(boolean isVectorized) throws Exception { runStatementOnDriver( "create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); //Tstage is just a simple way to generate test data - runStatementOnDriver("create table Tstage (a int, b int) stored as orc"); + runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); @@ -169,7 +160,7 @@ private void loadData(boolean isVectorized) throws Exception { runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); runStatementOnDriver("insert into T values(0,2),(0,4)"); //Tstage is just a simple way to generate test data - runStatementOnDriver("create table Tstage (a int, b int) stored as orc"); + runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); @@ -239,10 +230,10 @@ private void loadData(boolean isVectorized) throws Exception { private void loadDataNonAcid2AcidConversion(boolean isVectorized) throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); - runStatementOnDriver("create table T (a int, b int) stored as orc"); + runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='false')"); //per acid write to test nonAcid2acid conversion mixed with load data runStatementOnDriver("insert into T values(0,2),(0,4)"); - runStatementOnDriver("create table Tstage (a int, b int) stored as orc"); + runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); //make 2 more inserts so that we have 000000_0_copy_1, 000000_0_copy_2 files in export //export works at file level so if you have copy_N in the table dir, you'll have those in output @@ -311,7 +302,7 @@ public void loadDataPartitioned() throws Exception { runStatementOnDriver("drop table if exists T"); runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) partitioned by (p int) stored as orc tblproperties('transactional'='true')"); - runStatementOnDriver("create table Tstage (a int, b int) stored as orc"); + runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(0,2),(0,4)"); runStatementOnDriver("export table Tstage to '" + getWarehouseDir() +"/1'"); @@ -365,7 +356,7 @@ public void testValidations() throws Exception { runStatementOnDriver("create table T (a int, b int) clustered by (a) into 2 buckets stored as orc tblproperties('transactional'='true')"); File createdFile= folder.newFile("myfile.txt"); FileUtils.writeStringToFile(createdFile, "hello world"); - runStatementOnDriver("create table Tstage (a int, b int) stored as orc"); + runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("insert into Tstage values(1,2),(3,4)"); CommandProcessorResponse cpr = runStatementOnDriverNegative("load data local inpath '" + getWarehouseDir() + "' into table T"); @@ -393,7 +384,7 @@ private void testMultiStatement(boolean isVectorized) throws Exception { runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); //Tstage is just a simple way to generate test data - runStatementOnDriver("create table Tstage (a int, b int) stored as orc"); + runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); @@ -434,7 +425,7 @@ public void testAbort() throws Exception { runStatementOnDriver("drop table if exists Tstage"); runStatementOnDriver("create table T (a int, b int) stored as orc tblproperties('transactional'='true')"); //Tstage is just a simple way to generate test data - runStatementOnDriver("create table Tstage (a int, b int) stored as orc"); + runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into Tstage values(5,5),(6,6)"); //this creates an ORC data file with correct schema under table root runStatementOnDriver("export table Tstage to '" + getWarehouseDir() + "/1'"); @@ -464,4 +455,26 @@ private void checkResult(String[][] expectedResult, String query, boolean isVect checkExpected(rs, expectedResult, msg + (isVectorized ? " vect" : ""), LOG, !isVectorized); assertVectorized(isVectorized, query); } + @Test + public void testAnyFileName() throws Exception { + boolean isVectorized = false; + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("drop table if exists Tstage"); + runStatementOnDriver("create table T ( ctinyint TINYINT,\n" + + " csmallint SMALLINT,\n" + + " cint INT,\n" + + " cbigint BIGINT,\n" + + " cfloat FLOAT,\n" + + " cdouble DOUBLE,\n" + + " cstring1 STRING,\n" + + " cstring2 STRING,\n" + + " ctimestamp1 TIMESTAMP,\n" + + " ctimestamp2 TIMESTAMP,\n" + + " cboolean1 BOOLEAN,\n" + + " cboolean2 BOOLEAN) stored as orc tblproperties('transactional'='true')"); + //ql/target/tmp/org.apache.hadoop.hive.ql.TestTxnNoBuckets-1512791382683/warehouse + runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/../../../../../data/files/alltypesorc' into table T"); + List rs = runStatementOnDriver("select count(*) from T"); + Assert.assertEquals("12288", rs.get(0)); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java index 780b97cb51..452923a203 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnNoBuckets.java @@ -67,7 +67,7 @@ public void testNoBuckets() throws Exception { int[][] sourceVals1 = {{0,0,0},{3,3,3}}; int[][] sourceVals2 = {{1,1,1},{2,2,2}}; runStatementOnDriver("drop table if exists tmp"); - runStatementOnDriver("create table tmp (c1 integer, c2 integer, c3 integer) stored as orc"); + runStatementOnDriver("create table tmp (c1 integer, c2 integer, c3 integer) stored as orc tblproperties('transactional'='false')"); runStatementOnDriver("insert into tmp " + makeValuesClause(sourceVals1)); runStatementOnDriver("insert into tmp " + makeValuesClause(sourceVals2)); runStatementOnDriver("drop table if exists nobuckets"); @@ -187,7 +187,7 @@ public void testCTAS() throws Exception { runStatementOnDriver("insert into " + Table.ACIDTBL + makeValuesClause(values)); runStatementOnDriver("create table myctas2 stored as ORC TBLPROPERTIES ('transactional" + - "'='true', 'transactional_properties'='default') as select a, b from " + Table.ACIDTBL); + "'='true', 'transactional_properties'='default') as select a, b from " + Table.ACIDTBL);//todo: try this with acid default - it seem makeing table acid in listener is too late rs = runStatementOnDriver("select ROW__ID, a, b, INPUT__FILE__NAME from myctas2 order by ROW__ID"); String expected2[][] = { {"{\"transactionid\":17,\"bucketid\":536870912,\"rowid\":0}\t3\t4", "warehouse/myctas2/delta_0000017_0000017_0000/bucket_00000"}, @@ -531,7 +531,7 @@ public void testNonAcidToAcidVectorzied() throws Exception { //this enables vectorization of ROW__ID hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED, true);//HIVE-12631 runStatementOnDriver("drop table if exists T"); - runStatementOnDriver("create table T(a int, b int) stored as orc"); + runStatementOnDriver("create table T(a int, b int) stored as orc tblproperties('transactional'='false')"); int[][] values = {{1,2},{2,4},{5,6},{6,8},{9,10}}; runStatementOnDriver("insert into T(a, b) " + makeValuesClause(values)); //, 'transactional_properties'='default' @@ -696,5 +696,20 @@ public void testCompactStatsGather() throws Exception { map = hms.getPartitionColumnStatistics("default","T", partNames, colNames); Assert.assertEquals("", 5, map.get(partNames.get(0)).get(0).getStatsData().getLongStats().getHighValue()); } + @Test + public void testDefault() throws Exception { + runStatementOnDriver("drop table if exists T"); + runStatementOnDriver("create table T (a int, b int) stored as orc"); + runStatementOnDriver("insert into T values(1,2),(3,4)"); + String query = "select ROW__ID, a, b, INPUT__FILE__NAME from T order by a, b"; + List rs = runStatementOnDriver(query); + String[][] expected = { + //this proves data is written in Acid layout so T was made Acid + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":0}\t1\t2", "t/delta_0000015_0000015_0000/bucket_00000"}, + {"{\"transactionid\":15,\"bucketid\":536870912,\"rowid\":1}\t3\t4", "t/delta_0000015_0000015_0000/bucket_00000"} + }; + checkExpected(rs, expected, "insert data"); + + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index 32a24898ee..30bbb3f149 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; @@ -71,7 +72,7 @@ public String toString() { public void setUp() throws Exception { setUpInternal(); } - protected void setUpInternal() throws Exception { + void setUpInternal() throws Exception { hiveConf = new HiveConf(this.getClass()); hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); @@ -100,7 +101,7 @@ protected void setUpInternal() throws Exception { runStatementOnDriver("create table " + Table.NONACIDORCTBL + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); runStatementOnDriver("create table " + Table.NONACIDORCTBL2 + "(a int, b int) clustered by (a) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='false')"); runStatementOnDriver("create temporary table " + Table.ACIDTBL2 + "(a int, b int, c int) clustered by (c) into " + BUCKET_COUNT + " buckets stored as orc TBLPROPERTIES ('transactional'='true')"); - runStatementOnDriver("create table " + Table.NONACIDNONBUCKET + "(a int, b int) stored as orc"); + runStatementOnDriver("create table " + Table.NONACIDNONBUCKET + "(a int, b int) stored as orc TBLPROPERTIES ('transactional'='false')"); } protected void dropTables() throws Exception { for(TxnCommandsBaseForTests.Table t : TxnCommandsBaseForTests.Table.values()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java index 406bdea96a..e84f63d798 100644 --- ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java +++ ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager.java @@ -388,6 +388,11 @@ public void testDDLNoLock() throws Exception { @Test public void concurrencyFalse() throws Exception { HiveConf badConf = new HiveConf(); + if(badConf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY)) { + //TxnManagerFactory is a singleton, so if the default is true, it has already been + //created and won't throw + return; + } badConf.setVar(HiveConf.ConfVars.HIVE_TXN_MANAGER, "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager"); badConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); diff --git ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java index d309e3d06f..28131a04e9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java +++ ql/src/test/org/apache/hadoop/hive/ql/lockmgr/TestDbTxnManager2.java @@ -623,7 +623,7 @@ public void checkExpectedLocks() throws Exception { CommandProcessorResponse cpr = null; cpr = driver.run("create table acidPart(a int, b int) partitioned by (p string) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')"); checkCmdOnDriver(cpr); - cpr = driver.run("create table nonAcidPart(a int, b int) partitioned by (p string) stored as orc"); + cpr = driver.run("create table nonAcidPart(a int, b int) partitioned by (p string) stored as orc TBLPROPERTIES ('transactional'='false')"); checkCmdOnDriver(cpr); cpr = driver.compileAndRespond("insert into nonAcidPart partition(p) values(1,2,3)"); diff --git ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q index 8cbe31083b..a734b149be 100644 --- ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q +++ ql/src/test/queries/clientnegative/lockneg_try_drop_locked_db.q @@ -1,3 +1,6 @@ +set hive.support.concurrency=false; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager; + set hive.lock.numretries=0; create database lockneg9; diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index da1031300a..663d7b9874 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.metastore; import java.io.IOException; +import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -34,6 +35,7 @@ import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.events.PreAlterTableEvent; import org.apache.hadoop.hive.metastore.events.PreCreateTableEvent; import org.apache.hadoop.hive.metastore.events.PreEventContext; @@ -137,7 +139,8 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw if (!conformToAcid(newTable)) { // INSERT_ONLY tables don't have to conform to ACID requirement like ORC or bucketing if (transactionalPropertiesValue == null || !"insert_only".equalsIgnoreCase(transactionalPropertiesValue)) { - throw new MetaException("The table must be stored using an ACID compliant format (such as ORC)"); + throw new MetaException("The table must be stored using an ACID compliant format (such as ORC): " + + Warehouse.getQualifiedName(newTable)); } } @@ -160,7 +163,8 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw if (!hasValidTransactionalValue && !MetaStoreUtils.isInsertOnlyTableParam(oldTable.getParameters())) { // if here, there is attempt to set transactional to something other than 'true' // and NOT the same value it was before - throw new MetaException("TBLPROPERTIES with 'transactional'='true' cannot be unset"); + throw new MetaException("TBLPROPERTIES with 'transactional'='true' cannot be unset: " + + Warehouse.getQualifiedName(newTable)); } if (isTransactionalPropertiesPresent) { @@ -178,13 +182,42 @@ private void handleAlterTableTransactionalProp(PreAlterTableEvent context) throw || !oldTransactionalPropertiesValue.equalsIgnoreCase(transactionalPropertiesValue)) && !MetaStoreUtils.isInsertOnlyTableParam(oldTable.getParameters())) { throw new MetaException("TBLPROPERTIES with 'transactional_properties' cannot be " - + "altered after the table is created"); + + "altered after the table is created: " + Warehouse.getQualifiedName(newTable)); } } } } /** + * Want to make a a newly create table Acid (unless it explicitly has transactional=true param) + * if table can support it. + */ + private void makeAcid(Table newTable) throws MetaException { + Configuration conf = MetastoreConf.newMetastoreConf(); + if("full".equalsIgnoreCase(MetastoreConf.getAsString(conf, MetastoreConf.ConfVars.ACID_DEFAULT))) { + if(!conformToAcid(newTable)) { + LOG.info("Could not make " + Warehouse.getQualifiedName(newTable) + " acid: wrong IO format"); + return; + } + if(!TableType.MANAGED_TABLE.toString().equalsIgnoreCase(newTable.getTableType())) { + //todo should this check be in conformToAcid()? + LOG.info("Could not make " + Warehouse.getQualifiedName(newTable) + " acid: it's " + newTable.getTableType()); + return; + } + if(newTable.getSd().getSortColsSize() > 0) { + LOG.info("Could not make " + Warehouse.getQualifiedName(newTable) + " acid: it's sorted"); + return; + } + //check if orc and not sorted + Map parameters = newTable.getParameters(); + if (parameters == null || parameters.isEmpty()) { + parameters = new HashMap<>(); + } + parameters.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); + newTable.setParameters(parameters); + } + } + /** * Normalize case and make sure: * 1. 'true' is the only value to be set for 'transactional' (if set at all) * 2. If set to 'true', we should also enforce bucketing and ORC format @@ -193,6 +226,7 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr Table newTable = context.getTable(); Map parameters = newTable.getParameters(); if (parameters == null || parameters.isEmpty()) { + makeAcid(newTable); return; } String transactional = null; @@ -212,13 +246,15 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr } if (transactional == null) { + makeAcid(newTable); return; } if ("false".equalsIgnoreCase(transactional)) { // just drop transactional=false. For backward compatibility in case someone has scripts // with transactional=false - LOG.info("'transactional'='false' is no longer a valid property and will be ignored"); + LOG.info("'transactional'='false' is no longer a valid property and will be ignored: " + + Warehouse.getQualifiedName(newTable)); return; } @@ -226,13 +262,15 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr if (!conformToAcid(newTable)) { // INSERT_ONLY tables don't have to conform to ACID requirement like ORC or bucketing if (transactionalProperties == null || !"insert_only".equalsIgnoreCase(transactionalProperties)) { - throw new MetaException("The table must be stored using an ACID compliant format (such as ORC)"); + throw new MetaException("The table must be stored using an ACID compliant format (such as ORC): " + + Warehouse.getQualifiedName(newTable)); } } if (newTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) { throw new MetaException(newTable.getDbName() + "." + newTable.getTableName() + - " cannot be declared transactional because it's an external table"); + " cannot be declared transactional because it's an external table: " + + Warehouse.getQualifiedName(newTable)); } // normalize prop name @@ -245,7 +283,8 @@ private void handleCreateTableTransactionalProp(PreCreateTableEvent context) thr } // transactional is found, but the value is not in expected range - throw new MetaException("'transactional' property of TBLPROPERTIES may only have value 'true'"); + throw new MetaException("'transactional' property of TBLPROPERTIES may only have value 'true': " + + Warehouse.getQualifiedName(newTable)); } /** @@ -260,13 +299,15 @@ private void normazlieTransactionalPropertyDefault(Table table) { } /** * Check that InputFormatClass/OutputFormatClass should implement - * AcidInputFormat/AcidOutputFormat + * AcidInputFormat/AcidOutputFormat todo: make this log what format it found */ - private boolean conformToAcid(Table table) throws MetaException { + public static boolean conformToAcid(Table table) throws MetaException { StorageDescriptor sd = table.getSd(); try { - Class inputFormatClass = Class.forName(sd.getInputFormat()); - Class outputFormatClass = Class.forName(sd.getOutputFormat()); + Class inputFormatClass = sd.getInputFormat() == null ? null : + Class.forName(sd.getInputFormat()); + Class outputFormatClass = sd.getOutputFormat() == null ? null : + Class.forName(sd.getOutputFormat()); if (inputFormatClass == null || outputFormatClass == null || !Class.forName("org.apache.hadoop.hive.ql.io.AcidInputFormat").isAssignableFrom(inputFormatClass) || @@ -274,7 +315,9 @@ private boolean conformToAcid(Table table) throws MetaException { return false; } } catch (ClassNotFoundException e) { - throw new MetaException("Invalid input/output format for table"); + LOG.warn("Could not verify InputFormat=" + sd.getInputFormat() + " or OutputFormat=" + + sd.getOutputFormat() + " for " + Warehouse.getQualifiedName(table)); + return false; } return true; @@ -299,8 +342,8 @@ private void initializeTransactionalProperties(Table table) throws MetaException parameters.remove(key); String validationError = validateTransactionalProperties(tableTransactionalProperties); if (validationError != null) { - throw new MetaException("Invalid transactional properties specified for the " - + "table with the error " + validationError); + throw new MetaException("Invalid transactional properties specified for " + + Warehouse.getQualifiedName(table) + " with the error " + validationError); } break; } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index b46cc38a22..325318712a 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -215,6 +215,12 @@ public static ConfVars getMetaConf(String name) { public enum ConfVars { // alpha order, PLEASE! + ACID_DEFAULT("metastore.acid.default", "metastore.acid.default", "full", + new Validator.StringSet("none",//default not acid, i.e. leave the tbl as is + "all",//if possible make full acid, if not MM, if still not possible should be External tbl + "full",//if possible make full acid else do nothing + "mm"//if possible make table MM else should be External + ), "For testing. Causes the system to make suitable tables acid/mm automatically."), ADDED_JARS("metastore.added.jars.path", "hive.added.jars.path", "", "This an internal parameter."), AGGREGATE_STATS_CACHE_CLEAN_UNTIL("metastore.aggregate.stats.cache.clean.until", @@ -826,7 +832,7 @@ public static ConfVars getMetaConf(String name) { // The metastore shouldn't care what txn manager Hive is running, but in various tests it // needs to set these values. We should do the work to detangle this. HIVE_TXN_MANAGER("hive.txn.manager", "hive.txn.manager", - "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager", + "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager", "Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive\n" + "transactions, which also requires appropriate settings for hive.compactor.initiator.on,\n" + "hive.compactor.worker.threads, hive.support.concurrency (true),\n" + @@ -835,7 +841,7 @@ public static ConfVars getMetaConf(String name) { "no transactions."), // Metastore always support concurrency, but certain ACID tests depend on this being set. We // need to do the work to detangle this - HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", "hive.support.concurrency", false, + HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", "hive.support.concurrency", true, "Whether Hive supports concurrency control or not. \n" + "A ZooKeeper instance must be up and running when using zookeeper Hive lock manager "),