diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java index 81b7ff0eb4..404b5565fb 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java @@ -19,15 +19,22 @@ package org.apache.hadoop.hive.metastore.tools.metatool; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; - +import org.json.JSONObject; +import org.json.JSONArray; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.Database; @@ -35,12 +42,25 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; +import org.apache.hadoop.hive.metastore.txn.TxnStore; +import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hive.ql.io.HiveInputFormat; +import org.apache.hadoop.hive.ql.processors.CommandProcessorException; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties; import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; +import org.junit.Assert; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; +import com.google.gson.JsonParser; +import org.json.JSONObject; import org.junit.Before; import org.junit.After; import org.junit.Test; @@ -57,7 +77,12 @@ private HiveMetaStoreClient client; private OutputStream os; - + protected Driver d; + protected TxnStore txnHandler; + private static HiveConf hiveConf; + private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") + + File.separator + TestHiveMetaTool.class.getCanonicalName() + "-" + System.currentTimeMillis() + ).getPath().replaceAll("\\\\", "/"); @Before public void setUp() throws Exception { @@ -66,19 +91,60 @@ public void setUp() throws Exception { os = new ByteArrayOutputStream(); System.setOut(new PrintStream(os)); - HiveConf hiveConf = new HiveConf(HiveMetaTool.class); + hiveConf = new HiveConf(HiveMetaTool.class); client = new HiveMetaStoreClient(hiveConf); createDatabase(); createTable(); client.close(); + Path workDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + hiveConf.set("mapred.local.dir", workDir + File.separator + this.getClass().getSimpleName() + + File.separator + "mapred" + File.separator + "local"); + hiveConf.set("mapred.system.dir", workDir + File.separator + this.getClass().getSimpleName() + + File.separator + "mapred" + File.separator + "system"); + hiveConf.set("mapreduce.jobtracker.staging.root.dir", workDir + File.separator + this.getClass().getSimpleName() + + File.separator + "mapred" + File.separator + "staging"); + hiveConf.set("mapred.temp.dir", workDir + File.separator + this.getClass().getSimpleName() + + File.separator + "mapred" + File.separator + "temp"); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, getWarehouseDir()); + hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName()); + hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); + hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); + HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.MERGE_SPLIT_UPDATE, true); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); + hiveConf.setBoolean("mapred.input.dir.recursive", true); + TxnDbUtil.setConfValues(hiveConf); + txnHandler = TxnUtils.getTxnStore(hiveConf); + TxnDbUtil.prepDb(hiveConf); + File f = new File(getWarehouseDir()); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + if (!(new File(getWarehouseDir()).mkdirs())) { + throw new RuntimeException("Could not create " + getWarehouseDir()); + } + SessionState ss = SessionState.start(hiveConf); + ss.applyAuthorizationPolicy(); + d = new Driver(new QueryState.Builder().withHiveConf(hiveConf).nonIsolated().build()); + d.setMaxRows(10000); } catch (Exception e) { System.err.println("Unable to setup the hive metatool test"); System.err.println(StringUtils.stringifyException(e)); throw new Exception(e); } } + protected String getWarehouseDir() { + return getTestDataDir() + "/warehouse"; + } + + private String getTestDataDir() { + return TEST_DATA_DIR; + } private void createDatabase() throws Exception { if (client.getAllDatabases().contains(DB_NAME)) { @@ -142,17 +208,262 @@ public void testUpdateFSRootLocation() throws Exception { checkAvroSchemaURLProps(AVRO_URI); } + /* + * Tests -listExtTblLocs option on various input combinations. + */ + @Test + public void testListExtTblLocs() throws Exception { + String extTblLocation = getTestDataDir() + "/ext"; + String outLocation = getTestDataDir() + "/extTblOutput/"; + + //Case 1: Multiple unpartitioned external tables, expected o/p: 1 location + runStatementOnDriver("create external table ext1(a int) location '" + extTblLocation + "/t1'"); + runStatementOnDriver("create external table ext2(a int) location '" + extTblLocation + "/t2'" ); + runStatementOnDriver("create external table ext3(a int) location '" + extTblLocation + "/t3'" ); + JSONObject outJS = getListExtTblLocs("default", outLocation); + Set outLocationSet = outJS.keySet(); + Assert.assertTrue(outLocationSet.contains(getAbsolutePath(extTblLocation))); + Assert.assertEquals(outLocationSet.size(), 1); + runStatementOnDriver("drop table ext1"); + runStatementOnDriver("drop table ext2"); + runStatementOnDriver("drop table ext3"); + + //Case 2: 1 table containing all partitions in table location, expected o/p : 1 location with "*" + runStatementOnDriver("create external table ext(a int) partitioned by (b int) location '" + extTblLocation +"'"); + runStatementOnDriver("alter table ext add partition(b = 1)"); + runStatementOnDriver("alter table ext add partition(b = 2)"); + runStatementOnDriver("alter table ext add partition(b = 3)"); + outJS = getListExtTblLocs("default", outLocation); + String expectedOutLoc = getAbsolutePath(extTblLocation); + outLocationSet = outJS.keySet(); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc)); + Assert.assertEquals(outLocationSet.size(), 1); + JSONArray outArr = outJS.getJSONArray(expectedOutLoc); + Assert.assertEquals(outArr.length(), 1); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext.*")); + + //Case 3 : Table contains no partitions, 3 partitions outside table. + // inputs ../ext/t1- table location, partitions locations: + // ../ext/b1 + // ../ext/b2 + // ../ext/b3 + // expected output : ../ext containing 4 elements + runStatementOnDriver("drop table ext"); + runStatementOnDriver("create external table ext(a int) partitioned by (b int) " + + "location '" + getTestDataDir() + "/ext/t1'"); + runStatementOnDriver("alter table ext add partition(b = 1) location '" + getTestDataDir() + "/ext/b1'" ); + runStatementOnDriver("alter table ext add partition(b = 2) location '" + getTestDataDir() + "/ext/b2'" ); + runStatementOnDriver("alter table ext add partition(b = 3) location '" + getTestDataDir() + "/ext/b3'" ); + outJS = getListExtTblLocs("default", outLocation); + expectedOutLoc = getAbsolutePath(extTblLocation); + outLocationSet = outJS.keySet(); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc)); + Assert.assertEquals(outLocationSet.size(), 1); + outArr = outJS.getJSONArray(expectedOutLoc); + Assert.assertEquals(outArr.length(), 4); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext p(0/3)")); + Assert.assertTrue(outArr.getString(1).equalsIgnoreCase("default.ext.b=1")); + Assert.assertTrue(outArr.getString(2).equalsIgnoreCase("default.ext.b=2")); + Assert.assertTrue(outArr.getString(3).equalsIgnoreCase("default.ext.b=3")); + + + //Case 4 : Partitions at multiple depths + // inputs ../ext/b0 - contains tbl-loc(t1) + // ../ext/p=0 - contains 1 partition + // ../ext/b1/b2/b3 - contains 3 partitions of table (p1, p2, p3) + // expected output : [../ext/b1/b2/b3 containing 3 elements, t1, p0] + runStatementOnDriver("drop table ext"); + runStatementOnDriver("create external table ext(a int) partitioned by (p int) " + + "location '" + getTestDataDir() + "/ext/b0'"); + runStatementOnDriver("alter table ext add partition(p = 0) location '" + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext add partition(p = 1) location '" + getTestDataDir() + "/ext/b1/b2/b3'" ); + runStatementOnDriver("alter table ext add partition(p = 2) location '" + getTestDataDir() + "/ext/b1/b2/b3'" ); + runStatementOnDriver("alter table ext add partition(p = 3) location '" + getTestDataDir() + "/ext/b1/b2/b3'" ); + outJS = getListExtTblLocs("default", outLocation); + String expectedOutLoc1 = getAbsolutePath(extTblLocation + "/b0"); + String expectedOutLoc2 = getAbsolutePath(extTblLocation + "/p=0"); + String expectedOutLoc3 = getAbsolutePath(extTblLocation + "/b1/b2/b3"); + outLocationSet = outJS.keySet(); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc1)); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc2)); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc3)); + Assert.assertEquals(outLocationSet.size(), 3); + outArr = outJS.getJSONArray(expectedOutLoc1); + Assert.assertEquals(outArr.length(), 1); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext p(0/4)")); + outArr = outJS.getJSONArray(expectedOutLoc2); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext.p=0")); + outArr = outJS.getJSONArray(expectedOutLoc3); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext.p=1")); + Assert.assertTrue(outArr.getString(1).equalsIgnoreCase("default.ext.p=2")); + Assert.assertTrue(outArr.getString(2).equalsIgnoreCase("default.ext.p=3")); + + + // Case 5 : Root location contains a lot of leaves + // inputs ../ext/b0 - contains tbllocation and 2 partitions (p0,p1) (3 elements total) + // ../ext/b1 - contains 2 partitions of table (p2, p3) + // ../ext - contains 6 partitions (p4,..p9) + // expected output : in this case, we take the root (ext) itself because it covers more than half the locations + // exp o/p: [/ext containing 11 elements] + runStatementOnDriver("drop table ext"); + runStatementOnDriver("create external table ext(a int) partitioned by (p int) " + + "location '" + getTestDataDir() + "/ext/b0'"); + runStatementOnDriver("alter table ext add partition(p = 0) location '" + getTestDataDir() + "/ext/b0'" ); + runStatementOnDriver("alter table ext add partition(p = 1) location '" + getTestDataDir() + "/ext/b0'" ); + runStatementOnDriver("alter table ext add partition(p = 2) location '" + getTestDataDir() + "/ext/b1'" ); + runStatementOnDriver("alter table ext add partition(p = 3) location '" + getTestDataDir() + "/ext/b1'" ); + runStatementOnDriver("alter table ext add partition(p = 4) location '" + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext add partition(p = 5) location '" + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext add partition(p = 6) location '" + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext add partition(p = 7) location '" + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext add partition(p = 8) location '" + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext add partition(p = 9) location '" + getTestDataDir() + "/ext'" ); + outJS = getListExtTblLocs("default", outLocation); + expectedOutLoc1 = getAbsolutePath(extTblLocation); + outLocationSet = outJS.keySet(); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc1)); + Assert.assertEquals(outLocationSet.size(), 1); + outArr = outJS.getJSONArray(expectedOutLoc1); + Assert.assertEquals(outArr.length(), 11); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext p(0/10)")); + Assert.assertTrue(outArr.getString(1).equalsIgnoreCase("default.ext.p=0")); + Assert.assertTrue(outArr.getString(2).equalsIgnoreCase("default.ext.p=1")); + Assert.assertTrue(outArr.getString(3).equalsIgnoreCase("default.ext.p=2")); + Assert.assertTrue(outArr.getString(4).equalsIgnoreCase("default.ext.p=3")); + Assert.assertTrue(outArr.getString(5).equalsIgnoreCase("default.ext.p=4")); + Assert.assertTrue(outArr.getString(6).equalsIgnoreCase("default.ext.p=5")); + Assert.assertTrue(outArr.getString(7).equalsIgnoreCase("default.ext.p=6")); + Assert.assertTrue(outArr.getString(8).equalsIgnoreCase("default.ext.p=7")); + Assert.assertTrue(outArr.getString(9).equalsIgnoreCase("default.ext.p=8")); + Assert.assertTrue(outArr.getString(10).equalsIgnoreCase("default.ext.p=9")); + + + // Case 6 : Check count of partitions contained in tbl-loc + // inputs ../ext/b0 - table1 location containing 3/5 partitions + // ../ext/b1 - table2 location containining 2/4 partitions + // ../ext/b2/b3/b4 - 2 partitions of table1, 1 partition of table2, table loc of table3 + // ../ext - partitions of table3 which is less in number than all above combined + // expected output : [../ext/b0, ../ext/b1, ../ext/b2,b3,b4, table3 partitions individually] + runStatementOnDriver("drop table ext"); + runStatementOnDriver("create external table ext(a int) partitioned by (p int) " + + "location '" + getTestDataDir() + "/ext/b0'"); + runStatementOnDriver("create external table ext2(a int) partitioned by (p int, p1 int) " + + "location '" + getTestDataDir() + "/ext/b1'"); + runStatementOnDriver("create external table ext3(a int) partitioned by (p int) " + + "location '" + getTestDataDir() + "/ext/b2/b3/b4'"); + runStatementOnDriver("alter table ext add partition(p = 0)" ); + runStatementOnDriver("alter table ext add partition(p = 1)" ); + runStatementOnDriver("alter table ext add partition(p = 2)" ); + runStatementOnDriver("alter table ext2 add partition(p = 0, p1 = 0)"); + runStatementOnDriver("alter table ext2 add partition(p = 0, p1 = 1)"); + runStatementOnDriver("alter table ext3 add partition(p = 0) location '" + + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext3 add partition(p = 1) location '" + + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext3 add partition(p = 2) location '" + + getTestDataDir() + "/ext'" ); + runStatementOnDriver("alter table ext add partition(p = 3) location '" + + getTestDataDir() + "/ext/b2/b3/b4'" ); + runStatementOnDriver("alter table ext add partition(p = 4) location '" + + getTestDataDir() + "/ext/b2/b3/b4'" ); + runStatementOnDriver("alter table ext2 add partition(p = 0, p1 = 2) location '" + + getTestDataDir() + "/ext/b2/b3/b4'"); + runStatementOnDriver("alter table ext2 add partition(p = 0, p1 = 3) location '" + + getTestDataDir() + "/ext/b2/b3/b4'"); + + + outJS = getListExtTblLocs("default", outLocation); + expectedOutLoc1 = getAbsolutePath(extTblLocation + "/b0"); + expectedOutLoc2 = getAbsolutePath(extTblLocation + "/b1"); + expectedOutLoc3 = getAbsolutePath(extTblLocation + "/b2/b3/b4"); + String expectedOutLoc4 = getAbsolutePath(extTblLocation + "/p=0"); + String expectedOutLoc5 = getAbsolutePath(extTblLocation + "/p=1"); + String expectedOutLoc6 = getAbsolutePath(extTblLocation + "/p=2"); + + outLocationSet = outJS.keySet(); + Assert.assertEquals(outLocationSet.size(), 6); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc1)); + outArr = outJS.getJSONArray(expectedOutLoc1); + Assert.assertEquals(outArr.length(), 1); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext p(3/5)")); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc2)); + outArr = outJS.getJSONArray(expectedOutLoc2); + Assert.assertEquals(outArr.length(), 1); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext2 p(2/4)")); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc3)); + outArr = outJS.getJSONArray(expectedOutLoc3); + Assert.assertEquals(outArr.length(), 5); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext.p=3")); + Assert.assertTrue(outArr.getString(1).equalsIgnoreCase("default.ext.p=4")); + Assert.assertTrue(outArr.getString(2).equalsIgnoreCase("default.ext2.p=0/p1=2")); + Assert.assertTrue(outArr.getString(3).equalsIgnoreCase("default.ext2.p=0/p1=3")); + Assert.assertTrue(outArr.getString(4).equalsIgnoreCase("default.ext3 p(0/3)")); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc4)); + outArr = outJS.getJSONArray(expectedOutLoc4); + Assert.assertEquals(outArr.length(), 1); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext3.p=0")); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc5)); + outArr = outJS.getJSONArray(expectedOutLoc5); + Assert.assertEquals(outArr.length(), 1); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext3.p=1")); + Assert.assertTrue(outLocationSet.contains(expectedOutLoc6)); + outArr = outJS.getJSONArray(expectedOutLoc6); + Assert.assertEquals(outArr.length(), 1); + Assert.assertTrue(outArr.getString(0).equalsIgnoreCase("default.ext3.p=2")); + } + + private String getAbsolutePath(String extTblLocation) { + return "file:" + extTblLocation; + } + + private JSONObject getListExtTblLocs(String dbName, String outLocation) throws IOException { + File f = new File(outLocation); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + if (!(new File(outLocation).mkdirs())) { + throw new RuntimeException("Could not create " + outLocation); + } + HiveMetaTool.main(new String[] {"-listExtTblLocs", dbName, outLocation}); + for (File outFile : f.listFiles()) { + String contents = new String(Files.readAllBytes(Paths.get(outFile.getAbsolutePath()))); + return new JSONObject(contents); + } + return null; + } + private void checkAvroSchemaURLProps(String expectedUri) throws TException { Table table = client.getTable(DB_NAME, TABLE_NAME); assertEquals(expectedUri, table.getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName())); assertEquals(expectedUri, table.getSd().getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName())); } + protected List runStatementOnDriver(String stmt) throws Exception { + try { + d.run(stmt); + } catch (CommandProcessorException e) { + throw new RuntimeException(stmt + " failed: " + e); + } + List rs = new ArrayList<>(); + d.getResults(rs); + return rs; + } + @After public void tearDown() throws Exception { try { client.dropTable(DB_NAME, TABLE_NAME); client.dropDatabase(DB_NAME); + try { + if (d != null) { + d.close(); + d.destroy(); + d = null; + } + } finally { + TxnDbUtil.cleanDb(hiveConf); + FileUtils.deleteDirectory(new File(getTestDataDir())); + } client.close(); } catch (Throwable e) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java index 760d78df24..e7de8784a9 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java @@ -50,6 +50,8 @@ public static void main(String[] args) { task = new MetaToolTaskExecuteJDOQLQuery(); } else if (cl.isUpdateLocation()) { task = new MetaToolTaskUpdateLocation(); + } else if (cl.isListExtTblLocs()) { + task = new MetaToolTaskListExtTblLocs(); } else { throw new IllegalArgumentException("No task was specified!"); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java index 1223f0d623..5b26aec623 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java @@ -57,6 +57,16 @@ "serde-prop-key/table-prop-key that is specified and updates its value if found.") .create("updateLocation"); + @SuppressWarnings("static-access") + private static final Option LIST_EXT_TBL_LOCS = OptionBuilder + .withArgName("dbName> " + " > coverageList = new HashMap<>(); + private final HashMap inputLocations = new HashMap<>(); + + @Override + void execute() { + String[] loc = getCl().getListExtTblLocsParams(); + try{ + generateExternalTableInfo(loc[0], loc[1]); + } catch (IOException | TException | JSONException e) { + LOG.error("Listing external table locations failed: ", e); + } + } + + private void generateExternalTableInfo(String dbPattern, String outputDir) throws TException, IOException, + JSONException { + ObjectStore objectStore = getObjectStore(); + Configuration conf = msConf != null ? msConf : objectStore.getConf(); + String defaultCatalog = MetaStoreUtils.getDefaultCatalog(conf); + List databases = objectStore.getDatabases(defaultCatalog, dbPattern); + System.out.println("Number of databases found for given pattern: " + databases.size()); + TreeSet locations = new TreeSet<>(); + for (String db : databases) { + List tables = objectStore.getAllTables(defaultCatalog, db); + for(String tblName : tables) { + Table t = objectStore.getTable(defaultCatalog, db, tblName); + if(TableType.EXTERNAL_TABLE.name().equalsIgnoreCase(t.getTableType())) { + String tblLocation = t.getSd().getLocation(); + DataLocation dataLocation = new DataLocation(db, tblName, 0, 0, + null); + inputLocations.put(tblLocation, dataLocation); + if (!isCovered(locations, new Path(tblLocation))) { + locations.add(tblLocation); + } + //retrieving partition locations outside table-location + Map partitionLocations = objectStore.getPartitionLocations(defaultCatalog, db, tblName, + tblLocation, -1); + dataLocation.setTotalPartitions(partitionLocations.size()); + for (String partitionName : partitionLocations.keySet()) { + String partLocation = partitionLocations.get(partitionName); + //null value means partition is in table location, we do not add it to input in this case. + if(partLocation == null) { + dataLocation.incrementNumPartsInTblLoc(); + } + else { + partLocation = partLocation + Path.SEPARATOR + + Warehouse.makePartName(Warehouse.makeSpecFromName(partitionName), false); + inputLocations.put(partLocation, new DataLocation(db, tblName, 0, + 0, partitionName)); + if(!isCovered(locations, new Path(partLocation))) { + locations.add(partLocation); + } + } + } + } + } + } + if(!locations.isEmpty()) { + removeNestedStructure(locations); + createOutputList(locations, outputDir, dbPattern); + } + else { + System.out.println("No external tables found to process."); + } + } + + private boolean isPathWithinSubtree(Path path, Path subtree) { + int subtreeDepth = subtree.depth(); + while(path != null){ + if (subtreeDepth > path.depth()) { + return false; + } + if(subtree.equals(path)){ + return true; + } + path = path.getParent(); + } + return false; + } + + + /* + * Method to determine if an existing location covers the given location and record the coverage in output. + */ + private boolean isCovered(TreeSet locations, Path path) { + Path originalPath = new Path(path.toString()); + while(path != null){ + if(locations.contains(path.toString())){ + addCoverage(path, originalPath, true); + return true; + } + path = path.getParent(); + } + return false; + } + + /* + * Method to cover a child node using a parent. + * Removes the child and marks all nodes covered by the child as being covered by the parent. + */ + private void addCoverage(Path parentPath, Path childPath, boolean addChild) { + String childLoc = childPath.toString(); + String parentLoc = parentPath.toString(); + HashSet pathsUnderChild = coverageList.get(childLoc); + coverageList.remove(childLoc); + if(coverageList.get(parentLoc) == null) { + coverageList.put(parentLoc, new HashSet<>()); + } + HashSet pathsUnderParent = coverageList.get(parentLoc); + if(addChild) { + pathsUnderParent.add(childPath.toString()); + } + if(pathsUnderChild != null) { + pathsUnderParent.addAll(pathsUnderChild); + pathsUnderChild = null; + } + } + + /* + * Transforms a collection so that no element is an ancestor of another. + */ + private void removeNestedStructure(TreeSet locations) { + List locationList = new ArrayList<>(); + locationList.addAll(locations); + for(int i = 0; i < locationList.size(); i++) { + String currLoc = locationList.get(i); + Path currPath = new Path(currLoc); + for(int j = i + 1; j < locationList.size(); j++) { + String nextLoc = locationList.get(j); + Path nextPath = new Path (nextLoc); + if(isPathWithinSubtree(nextPath, currPath)) { + addCoverage(currPath, nextPath, true); + locations.remove(nextLoc); + i = j; + } + else { + i = j - 1; + break; + } + } + } + } + + /* + * Method to write the output to the given location. + * We construct a tree out of external table - locations and use it to determine suitable directories covering all locations. + */ + private void createOutputList(TreeSet locations, String outputDir, String dbPattern) throws IOException, JSONException { + ExternalTableGraphNode rootNode = constructTree(locations); + //Traverse through the tree in breadth-first manner and decide which nodes to include. + //For every node, either cover all leaves in its subtree using itself + // or delegate this duty to its child nodes. + Queue queue = new LinkedList<>(); + queue.add(rootNode); + while(!queue.isEmpty()){ + ExternalTableGraphNode current = queue.remove(); + if(current.isLeaf()) { + // in this case, the leaf needs to be added to the solution, i.e. marked as being covered. + // This was done during graph construction, so we continue. + continue; + } + int nonTrivialCoverage = 0; + List childNodes = current.getChildNodes(); + for(ExternalTableGraphNode child : childNodes) { + if (child.getNumLeavesCovered() > 1) { + nonTrivialCoverage += child.getNumLeavesCovered(); + } + } + int numLeavesCovered = current.getNumLeavesCovered(); + if((nonTrivialCoverage >= (numLeavesCovered + 1) / 2) || numLeavesCovered == 1) { + queue.addAll(childNodes); + } else { + addToSolution(current); + } + } + String outFileName = "externalTableLocations_" + dbPattern + "_" + System.currentTimeMillis() + ".txt"; + System.out.println("Writing output to " + outFileName); + FileWriter fw = new FileWriter(outputDir + "/" + outFileName); + PrintWriter pw = new PrintWriter(fw); + JSONObject jsonObject = new JSONObject(); + for(String outputLocation : coverageList.keySet()) { + HashSet coveredLocations = coverageList.get(outputLocation); + JSONArray outputEntities = listOutputEntities(coveredLocations); + jsonObject.put(outputLocation, outputEntities); + } + pw.println(jsonObject.toString(4).replace("\\","")); + pw.close(); + } + + /* + * Returns a comma separated list of entities(tables or partition names) covered by to a location. + * Table-name followed by "*" indicates that all partitions are inside table location. + * Otherwise, we record the number of partitions covered by table location. + */ + private JSONArray listOutputEntities(HashSet locations) { + List listEntities = new ArrayList<>(); + for(String loc : locations) { + DataLocation data = inputLocations.get(loc); + String out = data.getDbName() + "." + data.getTblName(); + String partName = data .getPartName(); + if (partName == null) { + int numPartInTblLoc = data.getNumPartitionsInTblLoc(); + int totPartitions = data.getTotalPartitions(); + if (totPartitions > 0 && numPartInTblLoc == totPartitions) { + out = out + ".*"; + } + else if (totPartitions > 0) { + out = out + " p(" + numPartInTblLoc + "/" + totPartitions + ")"; + } + } + else { + out = out + "." + partName; + } + listEntities.add(out); + } + Collections.sort(listEntities); + return new JSONArray(listEntities); + } + + private ExternalTableGraphNode constructTree(TreeSet locations) { + ExternalTableGraphNode rootNode = null; + HashMap locationGraph = new HashMap<>(); + // Every location is represented by a leaf in the tree. + // We traverse through the input locations and construct the tree. + for (String leaf : locations) { + ExternalTableGraphNode currNode = new ExternalTableGraphNode(leaf, new ArrayList<>(), true); + locationGraph.put(leaf, currNode); + if (coverageList.get(leaf) == null) { + coverageList.put(leaf, new HashSet<>()); + } + //mark the leaf as being covered as itself + HashSet currCoverage = coverageList.get(leaf); + currCoverage.add(leaf); + currNode.setNumLeavesCovered(currCoverage.size()); + Path parent = new Path(leaf).getParent(); + ExternalTableGraphNode parNode; + //traverse upward to the root in order to construct the graph + while (parent != null) { + String parentLoc = parent.toString(); + if (!locationGraph.containsKey(parentLoc)) { + //if parent doesn't exist in graph then create it + parNode = new ExternalTableGraphNode(parentLoc, new ArrayList<>(), false); + locationGraph.put(parentLoc, parNode); + } + else { + parNode = locationGraph.get(parentLoc); + } + if(currNode.getParent() == null) { + parNode.addChild(currNode); + currNode.setParent(parNode); + } + else { + break; + } + currNode = parNode; + parent = parent.getParent(); + } + if (parent == null && rootNode == null) { + rootNode = currNode; + rootNode.setParent(rootNode); + } + } + rootNode.updateNumLeavesCovered(); + return rootNode; + } + + private void addToSolution(ExternalTableGraphNode node) { + //since this node is in the solution, all its children should be covered using this node. + if(!node.isLeaf()) { + addCoverageRecursive(node); + } + } + + private void addCoverageRecursive(ExternalTableGraphNode node) { + for(ExternalTableGraphNode child : node.getChildNodes()) { + if(child.isLeaf()) { + addCoverage(new Path(node.getLocation()), new Path(child.getLocation()), true); + } + else { + addCoverageRecursive(child); + addCoverage(new Path(node.getLocation()), new Path(child.getLocation()), false); + } + } + } + + @VisibleForTesting + static Configuration msConf = null; + + /* + * Class denoting every external table data location. + * Each location can be either a table location(in this case, partition-name is not set) or + * a partition location which is outside table location. + * If the location is a table location, we store additional data like how many partitions are there in the table + * and how many of them are there in the table loc itself. + */ + private class DataLocation { + private String dbName; + private String tblName; + private int numPartitionsInTblLoc; + private String partName; + private int totalPartitions; + + private DataLocation (String dbName, String tblName, int totalPartitions, int numPartitionsInTblLoc, + String partName) { + this.dbName = dbName; + this.tblName = tblName; + this.totalPartitions = totalPartitions; + this.numPartitionsInTblLoc = numPartitionsInTblLoc; + this.partName = partName; + } + + private void incrementNumPartsInTblLoc() { + this.numPartitionsInTblLoc++; + } + + private String getPartName() { + return this.partName; + } + + private String getDbName() { + return this.dbName; + } + + private String getTblName() { + return this.tblName; + } + + private int getNumPartitionsInTblLoc() { + return this.numPartitionsInTblLoc; + } + + private int getTotalPartitions() { + return this.totalPartitions; + } + + private void setTotalPartitions(int totalPartitions) { + this.totalPartitions = totalPartitions; + } + } + + private class ExternalTableGraphNode { + private String location; + private List childNodes; + private ExternalTableGraphNode parent; + private boolean isLeaf; + private int numLeavesCovered; + + private ExternalTableGraphNode(String location, List childNodes, boolean isLeaf) { + this.location = location; + this.childNodes = childNodes; + this.isLeaf = isLeaf; + this.parent = null; + } + + private void addChild(ExternalTableGraphNode child) { + this.childNodes.add(child); + } + + private List getChildNodes() { + return this.childNodes; + } + + private boolean isLeaf() { + return this.isLeaf; + } + + private void setNumLeavesCovered(int numLeavesCovered) { + this.numLeavesCovered = numLeavesCovered; + } + + private int getNumLeavesCovered() { + return this.numLeavesCovered; + } + + private String getLocation() { + return this.location; + } + + private void setParent(ExternalTableGraphNode node) { + this.parent = node; + } + + private ExternalTableGraphNode getParent() { + return this.parent; + } + + private void updateNumLeavesCovered() { + if(this.isLeaf) { + return; + } + this.numLeavesCovered = 0; + for(ExternalTableGraphNode currChild : childNodes) { + currChild.updateNumLeavesCovered(); + this.numLeavesCovered += currChild.getNumLeavesCovered(); + } + } + } +} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java index 9563bd63aa..62f0283711 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java @@ -44,6 +44,8 @@ public void testParseListFSRoot() throws ParseException { assertNull(cl.getJDOQLQuery()); assertFalse(cl.isUpdateLocation()); assertNull(cl.getUpddateLocationParams()); + assertFalse(cl.isListExtTblLocs()); + assertNull(cl.getListExtTblLocsParams()); assertFalse(cl.isDryRun()); assertNull(cl.getSerdePropKey()); assertNull(cl.getTablePropKey()); @@ -57,6 +59,8 @@ public void testParseExecuteJDOQL() throws ParseException { assertEquals("select a from b", cl.getJDOQLQuery()); assertFalse(cl.isUpdateLocation()); assertNull(cl.getUpddateLocationParams()); + assertFalse(cl.isListExtTblLocs()); + assertNull(cl.getListExtTblLocsParams()); assertFalse(cl.isDryRun()); assertNull(cl.getSerdePropKey()); assertNull(cl.getTablePropKey()); @@ -73,6 +77,8 @@ public void testParseUpdateLocation() throws ParseException { assertTrue(cl.isUpdateLocation()); assertEquals("hdfs://new.loc", cl.getUpddateLocationParams()[0]); assertEquals("hdfs://old.loc", cl.getUpddateLocationParams()[1]); + assertFalse(cl.isListExtTblLocs()); + assertNull(cl.getListExtTblLocsParams()); assertTrue(cl.isDryRun()); assertEquals("abc", cl.getSerdePropKey()); assertEquals("def", cl.getTablePropKey()); @@ -81,7 +87,7 @@ public void testParseUpdateLocation() throws ParseException { @Test public void testNoTask() throws ParseException { exception.expect(IllegalArgumentException.class); - exception.expectMessage("exectly one of -listFSRoot, -executeJDOQL, -updateLocation must be set"); + exception.expectMessage("exectly one of -listFSRoot, -executeJDOQL, -updateLocation, -listExtTblLocs must be set"); new HiveMetaToolCommandLine(new String[] {}); } @@ -89,7 +95,7 @@ public void testNoTask() throws ParseException { @Test public void testMultipleTask() throws ParseException { exception.expect(IllegalArgumentException.class); - exception.expectMessage("exectly one of -listFSRoot, -executeJDOQL, -updateLocation must be set"); + exception.expectMessage("exectly one of -listFSRoot, -executeJDOQL, -updateLocation, -listExtTblLocs must be set"); new HiveMetaToolCommandLine(new String[] {"-listFSRoot", "-executeJDOQL", "select a from b"}); } @@ -102,6 +108,14 @@ public void testUpdateLocationOneArgument() throws ParseException { new HiveMetaToolCommandLine(new String[] {"-updateLocation", "hdfs://abc.de"}); } + @Test + public void testListExtTblLocsOneArgument() throws ParseException { + exception.expect(IllegalArgumentException.class); + exception.expectMessage("HiveMetaTool:listExtTblLocs takes in 2 arguments but was passed 1 arguments"); + + new HiveMetaToolCommandLine(new String[] {"-listExtTblLocs", "db1"}); + } + @Test public void testDryRunNotAllowed() throws ParseException { exception.expect(IllegalArgumentException.class);