diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b5e2d86e62..0135400f12 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -646,6 +646,11 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal @Deprecated METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse", "location of default database for the warehouse"), + + HIVE_METASTORE_WAREHOUSE_EXTERNAL("hive.metastore.warehouse.external.dir", null, + "Default location for external tables created in the warehouse. " + + "If not set or null, then the normal warehouse location will be used as the default location."), + /** * @deprecated Use MetastoreConf.THRIFT_URIS */ diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java index 235bd11107..5ab4f91486 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java @@ -157,15 +157,15 @@ public void testRecyclePartTable() throws Exception { Partition part3 = createPartition(dbName, tblName, columns, values, serdeInfo); client.add_partition(part3); - Path part1Path = new Path(warehouse.getDefaultPartitionPath(db, tblName, ImmutableMap.of("dt", "20160101")), "part"); + Path part1Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160101")), "part"); createFile(part1Path, "p1"); String path1Chksum = ReplChangeManager.checksumFor(part1Path, fs); - Path part2Path = new Path(warehouse.getDefaultPartitionPath(db, tblName, ImmutableMap.of("dt", "20160102")), "part"); + Path part2Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160102")), "part"); createFile(part2Path, "p2"); String path2Chksum = ReplChangeManager.checksumFor(part2Path, fs); - Path part3Path = new Path(warehouse.getDefaultPartitionPath(db, tblName, ImmutableMap.of("dt", "20160103")), "part"); + Path part3Path = new Path(warehouse.getDefaultPartitionPath(db, tbl, ImmutableMap.of("dt", "20160103")), "part"); createFile(part3Path, "p3"); String path3Chksum = ReplChangeManager.checksumFor(part3Path, fs); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestWarehouseExternalDir.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestWarehouseExternalDir.java new file mode 100644 index 0000000000..e87daf8983 --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestWarehouseExternalDir.java @@ -0,0 +1,193 @@ +/* + * Copyright 2014 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql; + +import java.net.URI; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.MetaStoreTestUtils; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hive.jdbc.miniHS2.MiniHS2; +import org.apache.hive.jdbc.miniHS2.MiniHS2.MiniClusterType; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class TestWarehouseExternalDir { + private static final Logger LOG = LoggerFactory.getLogger(TestWarehouseExternalDir.class); + + static MiniHS2 miniHS2; + static Hive db; + static Connection conn; + + String whRootExternal = "/wh_ext"; + Path whRootExternalPath; + Path whRootManagedPath; + FileSystem fs; + + @BeforeClass + public static void beforeTest() throws Exception { + } + + @AfterClass + public static void afterTest() throws Exception { + if (db != null) { + db.closeCurrent(); + db = null; + } + + if (conn != null) { + // TODO: delete tables/databases? + try (Statement stmt = conn.createStatement()) { + stmt.execute("drop database if exists twed_db1 cascade"); + } + conn.close(); + conn = null; + } + + if (miniHS2 != null) { + miniHS2.stop(); + miniHS2.cleanup(); + MiniHS2.cleanupLocalDir(); + miniHS2 = null; + } + } + + @Before + public void setUp() throws Exception { + } + + @After + public void tearDown() throws Exception { + } + + public TestWarehouseExternalDir() throws Exception { + HiveConf conf = new HiveConf(); + + // Specify the external warehouse root + conf.setVar(ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL, whRootExternal); + + // Settings borrowed from TestJdbcWithMiniHS2 + conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); + conf.setBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED, false); + conf.setBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER, false); + + MiniHS2.Builder builder = new MiniHS2.Builder() + .withConf(conf) + .cleanupLocalDirOnStartup(true) + .withMiniMR() + .withRemoteMetastore(); + miniHS2 = builder.build(); + + Map confOverlay = new HashMap(); + miniHS2.start(confOverlay); + + HiveConf dbConf = miniHS2.getHiveConf(); + db = Hive.get(dbConf); + + fs = miniHS2.getDfs().getFileSystem(); + whRootExternalPath = fs.makeQualified(new Path(whRootExternal)); + whRootManagedPath = fs.makeQualified(new Path(dbConf.getVar(ConfVars.METASTOREWAREHOUSE))); + + LOG.info("fs: {}", miniHS2.getDfs().getFileSystem().getUri()); + LOG.info("warehouse location: {}", whRootManagedPath); + LOG.info("whRootExternalPath: {}", whRootExternalPath); + + conn = getConnection(); + try (Statement stmt = conn.createStatement()) { + stmt.execute("create database if not exists twed_db1"); + } + } + + private static Connection getConnection() throws Exception { + return getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar"); + } + + private static Connection getConnection(String dbName) throws Exception { + return getConnection(miniHS2.getJdbcURL(dbName), System.getProperty("user.name"), "bar"); + } + + private static Connection getConnection(String jdbcURL, String user, String pwd) + throws SQLException { + Connection conn = DriverManager.getConnection(jdbcURL, user, pwd); + assertNotNull(conn); + return conn; + } + + static void checkTableLocation(Table table, Path expectedPath) throws Exception { + LOG.info("Table {}: location {}", table.getTableName(), table.getDataLocation()); + assertEquals(table.getTableName(), expectedPath, table.getDataLocation()); + assertTrue(miniHS2.getDfs().getFileSystem().exists(table.getDataLocation())); + } + + @Test + public void testManagedPaths() throws Exception { + try (Statement stmt = conn.createStatement()) { + // Confirm default managed table paths + stmt.execute("create table default.twed_1(c1 string)"); + Table tab = db.getTable("default", "twed_1"); + checkTableLocation(tab, new Path(whRootManagedPath, "twed_1")); + + stmt.execute("create table twed_db1.tab1(c1 string, c2 string)"); + tab = db.getTable("twed_db1", "tab1"); + checkTableLocation(tab, new Path(new Path(whRootManagedPath, "twed_db1.db"), "tab1")); + } + } + + @Test + public void testExternalDefaultPaths() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("create external table default.twed_ext1(c1 string)"); + Table tab = db.getTable("default", "twed_ext1"); + checkTableLocation(tab, new Path(whRootExternalPath, "twed_ext1")); + + stmt.execute("create external table twed_db1.twed_ext2(c1 string)"); + tab = db.getTable("twed_db1", "twed_ext2"); + checkTableLocation(tab, new Path(new Path(whRootExternalPath, "twed_db1.db"), "twed_ext2")); + + stmt.execute("create external table default.twed_ext3 like default.twed_ext1"); + tab = db.getTable("default", "twed_ext3"); + checkTableLocation(tab, new Path(whRootExternalPath, "twed_ext3")); + + stmt.execute("create external table twed_db1.twed_ext4 like default.twed_ext1"); + tab = db.getTable("twed_db1", "twed_ext4"); + checkTableLocation(tab, new Path(new Path(whRootExternalPath, "twed_db1.db"), "twed_ext4")); + } + } +} diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b584c72650..518734c891 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -410,6 +410,7 @@ minillap.query.files=acid_bucket_pruning.q,\ tez_union_dynamic_partition.q,\ tez_union_dynamic_partition_2.q,\ unionDistinct_1.q,\ + whroot_external1.q,\ load_fs2.q,\ llap_stats.q,\ multi_count_distinct_null.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index e06949928d..75cd18000d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -5270,17 +5270,13 @@ public String getName() { public static void makeLocationQualified(String databaseName, StorageDescriptor sd, String name, HiveConf conf) throws HiveException { Path path = null; - if (!sd.isSetLocation()) - { - // Location is not set, leave it as-is if this is not a default DB - if (databaseName.equalsIgnoreCase(Warehouse.DEFAULT_DATABASE_NAME)) - { - // Default database name path is always ignored, use METASTOREWAREHOUSE and object name - // instead - path = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREWAREHOUSE), org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(name.toLowerCase())); - } - } - else + // If the table's location is currently unset, it is left unset, allowing the metastore to + // fill in the table's location. + // Note that the previous logic for some reason would make a special case if the DB was the + // default database, and actually attempt to generate a location. + // This seems incorrect and uncessary, since the metastore is just as able to fill in the + // default table location in the case of the default DB, as it is for non-default DBs. + if (sd.isSetLocation()) { path = new Path(sd.getLocation()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java index 870f70a420..f5b8e86841 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java @@ -100,12 +100,11 @@ public LoadPartitions(Context context, ReplLogger replLogger, TableContext table private String location() throws MetaException, HiveException { Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName()); if (!tableContext.waitOnPrecursor()) { - return context.warehouse.getDefaultTablePath(parentDb, tableDesc.getTableName()).toString(); + return context.warehouse.getDefaultTablePath( + parentDb, tableDesc.getTableName(), tableDesc.isExternal()).toString(); } else { - Path tablePath = new Path( - context.warehouse.getDefaultDatabasePath(tableDesc.getDatabaseName()), - MetaStoreUtils.encodeTableName(tableDesc.getTableName().toLowerCase()) - ); + Path tablePath = context.warehouse.getDefaultTablePath( + tableDesc.getDatabaseName(), tableDesc.getTableName(), tableDesc.isExternal()); return context.warehouse.getDnsPath(tablePath).toString(); } } @@ -262,7 +261,8 @@ private Path locationOnReplicaWarehouse(Table table, AddPartitionDesc.OnePartiti if (table.getDataLocation() == null) { Database parentDb = context.hiveDb.getDatabase(tableDesc.getDatabaseName()); return new Path( - context.warehouse.getDefaultTablePath(parentDb, tableDesc.getTableName()), child); + context.warehouse.getDefaultTablePath(parentDb, tableDesc.getTableName(), tableDesc.isExternal()), + child); } else { return new Path(table.getDataLocation().toString(), child); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index f2b7fa42ad..6d093fb81e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -213,12 +213,11 @@ private void newTableTasks(ImportTableDesc tblDesc) throws Exception { private String location(ImportTableDesc tblDesc, Database parentDb) throws MetaException, SemanticException { if (!tableContext.waitOnPrecursor()) { - return context.warehouse.getDefaultTablePath(parentDb, tblDesc.getTableName()).toString(); + return context.warehouse.getDefaultTablePath( + parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString(); } else { - Path tablePath = new Path( - context.warehouse.getDefaultDatabasePath(tblDesc.getDatabaseName()), - MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()) - ); + Path tablePath = context.warehouse.getDefaultTablePath( + tblDesc.getDatabaseName(), tblDesc.getTableName(), tblDesc.isExternal()); return context.warehouse.getDnsPath(tablePath).toString(); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index cc7f0d5ca0..d34de61842 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -557,7 +557,7 @@ private static void fixLocationInPartSpec( } else { Database parentDb = x.getHive().getDatabase(tblDesc.getDatabaseName()); tgtPath = new Path( - wh.getDefaultTablePath( parentDb, tblDesc.getTableName()), + wh.getDefaultTablePath( parentDb, tblDesc.getTableName(), tblDesc.isExternal()), Warehouse.makePartPath(partSpec.getPartSpec())); } } else { @@ -881,7 +881,7 @@ private static void createRegularImportTasks( if (tblDesc.getLocation() != null) { tablePath = new Path(tblDesc.getLocation()); } else { - tablePath = wh.getDefaultTablePath(parentDb, tblDesc.getTableName()); + tablePath = wh.getDefaultTablePath(parentDb, tblDesc.getTableName(), tblDesc.isExternal()); } FileSystem tgtFs = FileSystem.get(tablePath.toUri(), x.getConf()); checkTargetLocationEmpty(tgtFs, tablePath, replicationSpec,x.getLOG()); @@ -972,13 +972,10 @@ private static void createReplImportTasks( if (tblDesc.getLocation() == null) { if (!waitOnPrecursor){ - tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName()).toString()); + tblDesc.setLocation(wh.getDefaultTablePath(parentDb, tblDesc.getTableName(), tblDesc.isExternal()).toString()); } else { tblDesc.setLocation( - wh.getDnsPath(new Path( - wh.getDefaultDatabasePath(tblDesc.getDatabaseName()), - org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.encodeTableName(tblDesc.getTableName().toLowerCase()) - ) + wh.getDnsPath(wh.getDefaultTablePath(tblDesc.getDatabaseName(), tblDesc.getTableName(), tblDesc.isExternal()) ).toString()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2e055aba4b..c2a07308a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7824,7 +7824,8 @@ private void handleLineage(LoadTableDesc ltd, Operator output) String tName = Utilities.getDbTableName(tableDesc.getTableName())[1]; try { Warehouse wh = new Warehouse(conf); - tlocation = wh.getDefaultTablePath(db.getDatabase(tableDesc.getDatabaseName()), tName); + tlocation = wh.getDefaultTablePath(db.getDatabase(tableDesc.getDatabaseName()), + tName, tableDesc.isExternal()); } catch (MetaException|HiveException e) { throw new SemanticException(e); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 95e1c31419..49709e596e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -434,8 +434,10 @@ private void setLoadFileLocation( private Path getDefaultCtasLocation(final ParseContext pCtx) throws SemanticException { try { String protoName = null; + boolean isExternal = false; if (pCtx.getQueryProperties().isCTAS()) { protoName = pCtx.getCreateTable().getTableName(); + isExternal = pCtx.getCreateTable().isExternal(); } else if (pCtx.getQueryProperties().isMaterializedView()) { protoName = pCtx.getCreateViewDesc().getViewName(); } @@ -444,7 +446,7 @@ private Path getDefaultCtasLocation(final ParseContext pCtx) throws SemanticExce throw new SemanticException("ERROR: The database " + names[0] + " does not exist."); } Warehouse wh = new Warehouse(conf); - return wh.getDefaultTablePath(db.getDatabase(names[0]), names[1]); + return wh.getDefaultTablePath(db.getDatabase(names[0]), names[1], isExternal); } catch (HiveException e) { throw new SemanticException(e); } catch (MetaException e) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java index a2861c5338..604e4d27c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java @@ -470,7 +470,7 @@ boolean shouldModifyPartitionLocation(Database dbObj, Table tableObj, Partition throws IOException, MetaException { String tableName = tableObj.getTableName(); String partLocation = partObj.getSd().getLocation(); - Path oldDefaultPartLocation = oldWh.getDefaultPartitionPath(dbObj, tableName, partSpec); + Path oldDefaultPartLocation = oldWh.getDefaultPartitionPath(dbObj, tableObj, partSpec); return arePathsEqual(conf, partLocation, oldDefaultPartLocation.toString()); } diff --git a/ql/src/test/queries/clientpositive/whroot_external1.q b/ql/src/test/queries/clientpositive/whroot_external1.q new file mode 100644 index 0000000000..e44c9b8f41 --- /dev/null +++ b/ql/src/test/queries/clientpositive/whroot_external1.q @@ -0,0 +1,107 @@ +--! qt:dataset:src + +dfs -rmr -f hdfs:///tmp/whroot_ext; +dfs -mkdir -p hdfs:///tmp/whroot_ext; + +set hive.metastore.warehouse.external.dir=hdfs:///tmp/whroot_ext; + +create table wre1_managed1 (c1 string, c2 string); +show create table wre1_managed1; + +insert into table wre1_managed1 select * from src where key = '0'; +select count(*) from wre1_managed1; + + +-- external table with default location +create external table wre1_ext1 (c1 string, c2 string); +show create table wre1_ext1; + +insert into table wre1_ext1 select * from src where key < 5; +select count(*) from wre1_ext1; + +insert overwrite table wre1_ext1 select * from src where key < 10; +select count(*) from wre1_ext1; + +load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext1; +select count(*) from wre1_ext1; + +-- external table with specified location should still work +dfs -rmr -f hdfs:///tmp/wre1_ext2; +dfs -mkdir -p hdfs:///tmp/wre1_ext2; +create external table wre1_ext2 (c1 string, c2 string) location 'hdfs:///tmp/wre1_ext2'; +show create table wre1_ext2; + +insert into table wre1_ext2 select * from src where key < 5; +select count(*) from wre1_ext2; + +insert overwrite table wre1_ext2 select * from src where key < 10; +select count(*) from wre1_ext2; + +load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext2; +select count(*) from wre1_ext2; + +-- Try with non-default db +create database wre1_db; + +-- external table with default location +create external table wre1_db.wre1_ext3 (c1 string, c2 string); +show create table wre1_db.wre1_ext3; + +insert into table wre1_db.wre1_ext3 select * from src where key < 5; +select count(*) from wre1_db.wre1_ext3; + +insert overwrite table wre1_db.wre1_ext3 select * from src where key < 10; +select count(*) from wre1_db.wre1_ext3; + +load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext3; +select count(*) from wre1_db.wre1_ext3; + +-- external table with specified location should still work +dfs -rmr -f hdfs:///tmp/wre1_ext4; +dfs -mkdir -p hdfs:///tmp/wre1_ext4; +create external table wre1_db.wre1_ext4 (c1 string, c2 string) location 'hdfs:///tmp/wre1_ext4'; +show create table wre1_db.wre1_ext4; + +insert into table wre1_db.wre1_ext4 select * from src where key < 5; +select count(*) from wre1_db.wre1_ext4; + +insert overwrite table wre1_db.wre1_ext4 select * from src where key < 10; +select count(*) from wre1_db.wre1_ext4; + +load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext4; +select count(*) from wre1_db.wre1_ext4; + +-- create table like +create external table wre1_ext5 like wre1_ext2; +show create table wre1_ext5; + +insert into table wre1_ext5 select * from src where key < 5; +select count(*) from wre1_ext5; + +insert overwrite table wre1_ext5 select * from src where key < 10; +select count(*) from wre1_ext5; + +load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext5; +select count(*) from wre1_ext5; + + +create external table wre1_db.wre1_ext6 like wre1_ext2; +show create table wre1_db.wre1_ext6; + +insert into table wre1_db.wre1_ext6 select * from src where key < 5; +select count(*) from wre1_db.wre1_ext6; + +insert overwrite table wre1_db.wre1_ext6 select * from src where key < 10; +select count(*) from wre1_db.wre1_ext6; + +load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext6; +select count(*) from wre1_db.wre1_ext6; + +drop table wre1_managed1; +drop table wre1_ext1; +drop table wre1_ext2; +drop database wre1_db cascade; + +dfs -rmr -f hdfs:///tmp/wre1_ext2; +dfs -rmr -f hdfs:///tmp/wre1_ext4; +dfs -rmr -f hdfs:///tmp/whroot_ext; diff --git a/ql/src/test/results/clientpositive/llap/whroot_external1.q.out b/ql/src/test/results/clientpositive/llap/whroot_external1.q.out new file mode 100644 index 0000000000..cac158c926 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/whroot_external1.q.out @@ -0,0 +1,592 @@ +PREHOOK: query: create table wre1_managed1 (c1 string, c2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@wre1_managed1 +POSTHOOK: query: create table wre1_managed1 (c1 string, c2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@wre1_managed1 +PREHOOK: query: show create table wre1_managed1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@wre1_managed1 +POSTHOOK: query: show create table wre1_managed1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@wre1_managed1 +CREATE TABLE `wre1_managed1`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### +PREHOOK: query: insert into table wre1_managed1 select * from src where key = '0' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@wre1_managed1 +POSTHOOK: query: insert into table wre1_managed1 select * from src where key = '0' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@wre1_managed1 +POSTHOOK: Lineage: wre1_managed1.c1 SIMPLE [] +POSTHOOK: Lineage: wre1_managed1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_managed1 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_managed1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_managed1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_managed1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +3 +PREHOOK: query: create external table wre1_ext1 (c1 string, c2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@wre1_ext1 +POSTHOOK: query: create external table wre1_ext1 (c1 string, c2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@wre1_ext1 +PREHOOK: query: show create table wre1_ext1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@wre1_ext1 +POSTHOOK: query: show create table wre1_ext1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@wre1_ext1 +CREATE EXTERNAL TABLE `wre1_ext1`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### +PREHOOK: query: insert into table wre1_ext1 select * from src where key < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@wre1_ext1 +POSTHOOK: query: insert into table wre1_ext1 select * from src where key < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@wre1_ext1 +POSTHOOK: Lineage: wre1_ext1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_ext1 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 +PREHOOK: query: insert overwrite table wre1_ext1 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@wre1_ext1 +POSTHOOK: query: insert overwrite table wre1_ext1 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@wre1_ext1 +POSTHOOK: Lineage: wre1_ext1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_ext1 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +10 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@wre1_ext1 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@wre1_ext1 +PREHOOK: query: select count(*) from wre1_ext1 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: create external table wre1_ext2 (c1 string, c2 string) location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@wre1_ext2 +POSTHOOK: query: create external table wre1_ext2 (c1 string, c2 string) location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@wre1_ext2 +PREHOOK: query: show create table wre1_ext2 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@wre1_ext2 +POSTHOOK: query: show create table wre1_ext2 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@wre1_ext2 +CREATE EXTERNAL TABLE `wre1_ext2`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### +PREHOOK: query: insert into table wre1_ext2 select * from src where key < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@wre1_ext2 +POSTHOOK: query: insert into table wre1_ext2 select * from src where key < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@wre1_ext2 +POSTHOOK: Lineage: wre1_ext2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_ext2 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 +PREHOOK: query: insert overwrite table wre1_ext2 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@wre1_ext2 +POSTHOOK: query: insert overwrite table wre1_ext2 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@wre1_ext2 +POSTHOOK: Lineage: wre1_ext2.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_ext2 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +10 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext2 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@wre1_ext2 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext2 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@wre1_ext2 +PREHOOK: query: select count(*) from wre1_ext2 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: create database wre1_db +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:wre1_db +POSTHOOK: query: create database wre1_db +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:wre1_db +PREHOOK: query: create external table wre1_db.wre1_ext3 (c1 string, c2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:wre1_db +PREHOOK: Output: wre1_db@wre1_ext3 +POSTHOOK: query: create external table wre1_db.wre1_ext3 (c1 string, c2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:wre1_db +POSTHOOK: Output: wre1_db@wre1_ext3 +PREHOOK: query: show create table wre1_db.wre1_ext3 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: wre1_db@wre1_ext3 +POSTHOOK: query: show create table wre1_db.wre1_ext3 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: wre1_db@wre1_ext3 +CREATE EXTERNAL TABLE `wre1_db.wre1_ext3`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### +PREHOOK: query: insert into table wre1_db.wre1_ext3 select * from src where key < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: wre1_db@wre1_ext3 +POSTHOOK: query: insert into table wre1_db.wre1_ext3 select * from src where key < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: wre1_db@wre1_ext3 +POSTHOOK: Lineage: wre1_ext3.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext3.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_db.wre1_ext3 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext3 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 +PREHOOK: query: insert overwrite table wre1_db.wre1_ext3 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: wre1_db@wre1_ext3 +POSTHOOK: query: insert overwrite table wre1_db.wre1_ext3 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: wre1_db@wre1_ext3 +POSTHOOK: Lineage: wre1_ext3.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext3.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_db.wre1_ext3 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext3 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +10 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext3 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: wre1_db@wre1_ext3 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext3 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: wre1_db@wre1_ext3 +PREHOOK: query: select count(*) from wre1_db.wre1_ext3 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext3 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext3 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext3 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: create external table wre1_db.wre1_ext4 (c1 string, c2 string) location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:wre1_db +PREHOOK: Output: wre1_db@wre1_ext4 +POSTHOOK: query: create external table wre1_db.wre1_ext4 (c1 string, c2 string) location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:wre1_db +POSTHOOK: Output: wre1_db@wre1_ext4 +PREHOOK: query: show create table wre1_db.wre1_ext4 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: wre1_db@wre1_ext4 +POSTHOOK: query: show create table wre1_db.wre1_ext4 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: wre1_db@wre1_ext4 +CREATE EXTERNAL TABLE `wre1_db.wre1_ext4`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### +PREHOOK: query: insert into table wre1_db.wre1_ext4 select * from src where key < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: wre1_db@wre1_ext4 +POSTHOOK: query: insert into table wre1_db.wre1_ext4 select * from src where key < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: wre1_db@wre1_ext4 +POSTHOOK: Lineage: wre1_ext4.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext4.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_db.wre1_ext4 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext4 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 +PREHOOK: query: insert overwrite table wre1_db.wre1_ext4 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: wre1_db@wre1_ext4 +POSTHOOK: query: insert overwrite table wre1_db.wre1_ext4 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: wre1_db@wre1_ext4 +POSTHOOK: Lineage: wre1_ext4.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext4.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_db.wre1_ext4 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext4 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +10 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: wre1_db@wre1_ext4 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: wre1_db@wre1_ext4 +PREHOOK: query: select count(*) from wre1_db.wre1_ext4 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext4 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext4 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext4 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: create external table wre1_ext5 like wre1_ext2 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@wre1_ext5 +POSTHOOK: query: create external table wre1_ext5 like wre1_ext2 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@wre1_ext5 +PREHOOK: query: show create table wre1_ext5 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@wre1_ext5 +POSTHOOK: query: show create table wre1_ext5 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@wre1_ext5 +CREATE EXTERNAL TABLE `wre1_ext5`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( +#### A masked pattern was here #### +PREHOOK: query: insert into table wre1_ext5 select * from src where key < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@wre1_ext5 +POSTHOOK: query: insert into table wre1_ext5 select * from src where key < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@wre1_ext5 +POSTHOOK: Lineage: wre1_ext5.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext5.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_ext5 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext5 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext5 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 +PREHOOK: query: insert overwrite table wre1_ext5 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@wre1_ext5 +POSTHOOK: query: insert overwrite table wre1_ext5 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@wre1_ext5 +POSTHOOK: Lineage: wre1_ext5.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext5.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_ext5 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext5 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext5 +POSTHOOK: Output: hdfs://### HDFS PATH ### +10 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext5 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@wre1_ext5 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_ext5 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@wre1_ext5 +PREHOOK: query: select count(*) from wre1_ext5 +PREHOOK: type: QUERY +PREHOOK: Input: default@wre1_ext5 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_ext5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@wre1_ext5 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: create external table wre1_db.wre1_ext6 like wre1_ext2 +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:wre1_db +PREHOOK: Output: wre1_db@wre1_ext6 +POSTHOOK: query: create external table wre1_db.wre1_ext6 like wre1_ext2 +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:wre1_db +POSTHOOK: Output: wre1_db@wre1_ext6 +PREHOOK: query: show create table wre1_db.wre1_ext6 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: wre1_db@wre1_ext6 +POSTHOOK: query: show create table wre1_db.wre1_ext6 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: wre1_db@wre1_ext6 +CREATE EXTERNAL TABLE `wre1_db.wre1_ext6`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( +#### A masked pattern was here #### +PREHOOK: query: insert into table wre1_db.wre1_ext6 select * from src where key < 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: wre1_db@wre1_ext6 +POSTHOOK: query: insert into table wre1_db.wre1_ext6 select * from src where key < 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: wre1_db@wre1_ext6 +POSTHOOK: Lineage: wre1_ext6.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext6.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_db.wre1_ext6 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext6 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext6 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext6 +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 +PREHOOK: query: insert overwrite table wre1_db.wre1_ext6 select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: wre1_db@wre1_ext6 +POSTHOOK: query: insert overwrite table wre1_db.wre1_ext6 select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: wre1_db@wre1_ext6 +POSTHOOK: Lineage: wre1_ext6.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: wre1_ext6.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from wre1_db.wre1_ext6 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext6 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext6 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext6 +POSTHOOK: Output: hdfs://### HDFS PATH ### +10 +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext6 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: wre1_db@wre1_ext6 +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' overwrite into table wre1_db.wre1_ext6 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: wre1_db@wre1_ext6 +PREHOOK: query: select count(*) from wre1_db.wre1_ext6 +PREHOOK: type: QUERY +PREHOOK: Input: wre1_db@wre1_ext6 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from wre1_db.wre1_ext6 +POSTHOOK: type: QUERY +POSTHOOK: Input: wre1_db@wre1_ext6 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: drop table wre1_managed1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@wre1_managed1 +PREHOOK: Output: default@wre1_managed1 +POSTHOOK: query: drop table wre1_managed1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@wre1_managed1 +POSTHOOK: Output: default@wre1_managed1 +PREHOOK: query: drop table wre1_ext1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@wre1_ext1 +PREHOOK: Output: default@wre1_ext1 +POSTHOOK: query: drop table wre1_ext1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@wre1_ext1 +POSTHOOK: Output: default@wre1_ext1 +PREHOOK: query: drop table wre1_ext2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@wre1_ext2 +PREHOOK: Output: default@wre1_ext2 +POSTHOOK: query: drop table wre1_ext2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@wre1_ext2 +POSTHOOK: Output: default@wre1_ext2 +PREHOOK: query: drop database wre1_db cascade +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:wre1_db +PREHOOK: Output: database:wre1_db +PREHOOK: Output: wre1_db@wre1_ext3 +PREHOOK: Output: wre1_db@wre1_ext4 +PREHOOK: Output: wre1_db@wre1_ext6 +POSTHOOK: query: drop database wre1_db cascade +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:wre1_db +POSTHOOK: Output: database:wre1_db +POSTHOOK: Output: wre1_db@wre1_ext3 +POSTHOOK: Output: wre1_db@wre1_ext4 +POSTHOOK: Output: wre1_db@wre1_ext6 +#### A masked pattern was here #### diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index b9f5fb874d..7eaadf9110 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1826,7 +1826,7 @@ private void create_table_core(final RawStore ms, final Table tbl, if (!TableType.VIRTUAL_VIEW.toString().equals(tbl.getTableType())) { if (tbl.getSd().getLocation() == null || tbl.getSd().getLocation().isEmpty()) { - tblPath = wh.getDefaultTablePath(db, tbl.getTableName()); + tblPath = wh.getDefaultTablePath(db, tbl); } else { if (!isExternal(tbl) && !MetaStoreUtils.isNonNativeTable(tbl)) { LOG.warn("Location: " + tbl.getSd().getLocation() diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java index 56da1151cc..76069bb8cf 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/TransactionalValidationListener.java @@ -440,7 +440,7 @@ private void validateTableStructure(IHMSHandler hmsHandler, Table table) String catName = table.isSetCatName() ? table.getCatName() : MetaStoreUtils.getDefaultCatalog(getConf()); tablePath = wh.getDefaultTablePath(hmsHandler.getMS().getDatabase( - catName, table.getDbName()), table.getTableName()); + catName, table.getDbName()), table); } else { tablePath = wh.getDnsPath(new Path(table.getSd().getLocation())); } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java index e31935ebf5..44d06d1ce1 100755 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -68,8 +68,10 @@ private static final String CAT_DB_TABLE_SEPARATOR = "."; private Path whRoot; + private Path whRootExternal; private final Configuration conf; private final String whRootString; + private final String whRootExternalString; public static final Logger LOG = LoggerFactory.getLogger("hive.metastore.warehouse"); @@ -84,6 +86,7 @@ public Warehouse(Configuration conf) throws MetaException { throw new MetaException(ConfVars.WAREHOUSE.getVarname() + " is not set in the config or blank"); } + whRootExternalString = MetastoreConf.getVar(conf, ConfVars.WAREHOUSE_EXTERNAL); fsHandler = getMetaStoreFsHandler(conf); cm = ReplChangeManager.getInstance(conf); storageAuthCheck = MetastoreConf.getBoolVar(conf, ConfVars.AUTHORIZATION_STORAGE_AUTH_CHECKS); @@ -158,6 +161,18 @@ public Path getWhRoot() throws MetaException { return whRoot; } + public Path getWhRootExternal() throws MetaException { + if (whRootExternal != null) { + return whRootExternal; + } + if (StringUtils.isBlank(whRootExternalString)) { + whRootExternal = getWhRoot(); + } else { + whRootExternal = getDnsPath(new Path(whRootExternalString)); + } + return whRootExternal; + } + /** * Build the database path based on catalog name and database name. This should only be used * when a database is being created or altered. If you just want to find out the path a @@ -217,6 +232,13 @@ public Path getDefaultDatabasePath(String dbName) throws MetaException { return new Path(getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); } + public Path getDefaultExternalDatabasePath(String dbName) throws MetaException { + if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) { + return getWhRootExternal(); + } + return new Path(getWhRootExternal(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); + } + /** * Returns the default location of the table path using the parent database's location * @param db Database where the table is created @@ -224,10 +246,37 @@ public Path getDefaultDatabasePath(String dbName) throws MetaException { * @return * @throws MetaException */ + @Deprecated public Path getDefaultTablePath(Database db, String tableName) throws MetaException { - return getDnsPath(new Path(getDatabasePath(db), - MetaStoreUtils.encodeTableName(tableName.toLowerCase()))); + return getDefaultTablePath(db, tableName, false); + } + + public Path getDefaultTablePath(Database db, String tableName, boolean isExternal) throws MetaException { + Path dbPath = null; + if (isExternal) { + dbPath = getDefaultExternalDatabasePath(db.getName()); + } else { + dbPath = getDatabasePath(db); + } + return getDnsPath( + new Path(dbPath, MetaStoreUtils.encodeTableName(tableName.toLowerCase()))); + } + + // A few situations where we need the default table path, without a DB object + public Path getDefaultTablePath(String dbName, String tableName, boolean isExternal) throws MetaException { + Path dbPath = null; + if (isExternal) { + dbPath = getDefaultExternalDatabasePath(dbName); + } else { + dbPath = getDefaultDatabasePath(dbName); + } + return getDnsPath( + new Path(dbPath, MetaStoreUtils.encodeTableName(tableName.toLowerCase()))); + } + + public Path getDefaultTablePath(Database db, Table table) throws MetaException { + return getDefaultTablePath(db, table.getTableName(), MetaStoreUtils.isExternalTable(table)); } public static String getQualifiedName(Table table) { @@ -551,9 +600,9 @@ public static boolean makeSpecFromName(Map partSpec, Path currPa * @return * @throws MetaException */ - public Path getDefaultPartitionPath(Database db, String tableName, + public Path getDefaultPartitionPath(Database db, Table table, Map pm) throws MetaException { - return getPartitionPath(getDefaultTablePath(db, tableName), pm); + return getPartitionPath(getDefaultTablePath(db, table), pm); } /** @@ -597,7 +646,7 @@ public Path getPartitionPath(Database db, Table table, List vals) if (table.getSd().getLocation() != null) { return getPartitionPath(getDnsPath(new Path(table.getSd().getLocation())), pm); } else { - return getDefaultPartitionPath(db, table.getTableName(), pm); + return getDefaultPartitionPath(db, table, pm); } } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index ab03adbf59..54d8830347 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -862,6 +862,10 @@ public static ConfVars getMetaConf(String name) { "validates existing schema against code. turn this on if you want to verify existing schema"), WAREHOUSE("metastore.warehouse.dir", "hive.metastore.warehouse.dir", "/user/hive/warehouse", "location of default database for the warehouse"), + WAREHOUSE_EXTERNAL("metastore.warehouse.external.dir", + "hive.metastore.warehouse.external.dir", "", + "Default location for external tables created in the warehouse. " + + "If not set or null, then the normal warehouse location will be used as the default location."), WRITE_SET_REAPER_INTERVAL("metastore.writeset.reaper.interval", "hive.writeset.reaper.interval", 60, TimeUnit.SECONDS, "Frequency of WriteSet reaper runs"),