diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java new file mode 100644 index 0000000..d3b8615 --- /dev/null +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool.java @@ -0,0 +1,584 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.FunctionType; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.HiveObjectType; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.PrivilegeBag; +import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class TestHBaseSchemaTool extends HBaseIntegrationTests { + private static final Log LOG = LogFactory.getLog(TestHBaseSchemaTool.class.getName()); + private String lsep = System.getProperty("line.separator"); + + @BeforeClass + public static void startup() throws Exception { + HBaseIntegrationTests.startMiniCluster(); + } + + @AfterClass + public static void shutdown() throws Exception { + HBaseIntegrationTests.shutdownMiniCluster(); + } + + @Before + public void setup() throws IOException { + setupHBaseStore(); + } + + @Test + public void listTables() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(true, null, null, null, conf, out, err); + Assert.assertEquals(StringUtils.join(HBaseReadWrite.tableNames, lsep) + lsep, + outStr.toString()); + } + + @Test + public void bogusTable() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, "nosuch", null, null, conf, out, err); + Assert.assertEquals("Unknown table: nosuch" + lsep, errStr.toString()); + } + + @Test + public void noSuchDb() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.DB_TABLE, "nosuch", null, conf, out, err); + Assert.assertEquals("No such database: nosuch" + lsep, outStr.toString()); + } + + @Test + public void noMatchingDb() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.DB_TABLE, null, "nomatch", conf, out, err); + Assert.assertEquals("No matching database: nomatch" + lsep, outStr.toString()); + } + + @Test + public void noSuchRole() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.ROLE_TABLE, "nosuch", null, conf, out, err); + Assert.assertEquals("No such role: nosuch" + lsep, outStr.toString()); + } + + @Test + public void noMatchingRole() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.ROLE_TABLE, null, "nomatch", conf, out, err); + Assert.assertEquals("No matching role: nomatch" + lsep, outStr.toString()); + } + + @Test + public void noSuchUser() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.USER_TO_ROLE_TABLE, "nosuch", null, conf, out, err); + Assert.assertEquals("No such user: nosuch" + lsep, outStr.toString()); + } + + @Test + public void noMatchingUser() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.USER_TO_ROLE_TABLE, null, "nomatch", conf, out, err); + Assert.assertEquals("No matching user: nomatch" + lsep, outStr.toString()); + } + + @Test + public void noSuchFunction() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.FUNC_TABLE, "nosuch", null, conf, out, err); + Assert.assertEquals("No such function: nosuch" + lsep, outStr.toString()); + } + + @Test + public void noMatchingFunction() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.FUNC_TABLE, null, "nomatch", conf, out, + err); + Assert.assertEquals("No matching function: nomatch" + lsep, outStr.toString()); + } + + @Test + public void noSuchTable() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.TABLE_TABLE, "nosuch", null, conf, out, err); + Assert.assertEquals("No such table: nosuch" + lsep, outStr.toString()); + } + + @Test + public void noMatchingTable() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.TABLE_TABLE, null, "nomatch", conf, out, err); + Assert.assertEquals("No matching table: nomatch" + lsep, outStr.toString()); + } + + @Test + public void noSuchPart() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.PART_TABLE, "nosuch", null, conf, out, err); + Assert.assertEquals("No such partition: nosuch" + lsep, outStr.toString()); + } + + @Test + public void noSuchPartValidFormat() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + // Test with something that looks like a valid entry + new HBaseSchemaTool().go(false, HBaseReadWrite.PART_TABLE, "default.nosuch.nosuch", null, conf, + out, err); + Assert.assertEquals("No such partition: default.nosuch.nosuch" + lsep, outStr.toString()); + } + + + @Test + public void noMatchingPart() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.PART_TABLE, null, "nomatch", conf, out, err); + Assert.assertEquals("No matching partition: nomatch" + lsep, outStr.toString()); + } + + @Test + public void noMatchingPartValidFormat() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + new HBaseSchemaTool().go(false, HBaseReadWrite.PART_TABLE, null, "nomatch.a.b", conf, out, err); + Assert.assertEquals("No matching partition: nomatch.a.b" + lsep, outStr.toString()); + } + + @Test + public void noSuchStorageDescriptor() throws Exception { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + // Strangly enough things don't come back quite the same when going through the Base64 + // encode/decode. + new HBaseSchemaTool().go(false, HBaseReadWrite.SD_TABLE, "nosuch", null, conf, out, err); + Assert.assertEquals("No such storage descriptor: nosucg" + lsep, outStr.toString()); + } + + @Test + public void oneMondoTest() throws Exception { + // This is a pain to do in one big test, but we have to control the order so that we have tests + // without dbs, etc. + HBaseSchemaTool tool = new HBaseSchemaTool(); + + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + // This needs to be up front before we create any tables or partitions + tool.go(false, HBaseReadWrite.SD_TABLE, null, "whatever", conf, out, err); + Assert.assertEquals("No storage descriptors" + lsep, outStr.toString()); + + // This one needs to be up front too + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.SEQUENCES_TABLE, null, "whatever", conf, out, err); + Assert.assertEquals("No sequences" + lsep, outStr.toString()); + + // Create some databases + String[] dbNames = new String[3]; + for (int i = 0; i < dbNames.length; i++) { + dbNames[i] = "db" + i; + Database db = new Database(dbNames[i], "no description", "file:///tmp", emptyParameters); + store.createDatabase(db); + } + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.DB_TABLE, "db0", null, conf, out, err); + Assert.assertEquals("{\"name\":\"db0\",\"description\":\"no description\"," + + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.DB_TABLE, null, ".*", conf, out, err); + Assert.assertEquals("{\"name\":\"db0\",\"description\":\"no description\"," + + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep + + "{\"name\":\"db1\",\"description\":\"no description\"," + + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep + + "{\"name\":\"db2\",\"description\":\"no description\"," + + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep, + outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.DB_TABLE, null, "db[12]", conf, out, err); + Assert.assertEquals("{\"name\":\"db1\",\"description\":\"no description\"," + + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep + + "{\"name\":\"db2\",\"description\":\"no description\"," + + "\"locationUri\":\"file:///tmp\",\"parameters\":{}}" + lsep, + outStr.toString()); + + String[] roleNames = new String[2]; + for (int i = 0; i < roleNames.length; i++) { + roleNames[i] = "role" + i; + store.addRole(roleNames[i], "me"); + } + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.ROLE_TABLE, null, "role.", conf, out, err); + Assert.assertEquals("{\"roleName\":\"role0\",\"createTime\":now,\"ownerName\":\"me\"}" + + lsep + "{\"roleName\":\"role1\",\"createTime\":now,\"ownerName\":\"me\"}" + lsep, + outStr.toString().replaceAll("createTime\":[0-9]+", "createTime\":now")); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.ROLE_TABLE, "role1", null, conf, out, err); + Assert.assertEquals("{\"roleName\":\"role1\",\"createTime\":now,\"ownerName\":\"me\"}" + lsep, + outStr.toString().replaceAll("createTime\":[0-9]+", "createTime\":now")); + + Role role1 = store.getRole("role1"); + store.grantRole(role1, "fred", PrincipalType.USER, "me", PrincipalType.USER, false); + store.grantRole(role1, "joanne", PrincipalType.USER, "me", PrincipalType.USER, false); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.USER_TO_ROLE_TABLE, null, ".*", conf, out, err); + Assert.assertEquals("fred: role1" + lsep + "joanne: role1" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.USER_TO_ROLE_TABLE, "joanne", null, conf, out, err); + Assert.assertEquals("role1" + lsep, outStr.toString()); + + String[] funcNames = new String[3]; + for (int i = 0; i < funcNames.length; i++) { + funcNames[i] = "func" + i; + Function function = new Function(funcNames[i], "db1", "Function", "me", PrincipalType.USER, 0, + FunctionType.JAVA, null); + store.createFunction(function); + } + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.FUNC_TABLE, "db1.func0", null, conf, out, err); + Assert.assertEquals("{\"functionName\":\"func0\",\"dbName\":\"db1\"," + + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + + "\"functionType\":1}" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.FUNC_TABLE, null, ".*", conf, out, err); + Assert.assertEquals("{\"functionName\":\"func0\",\"dbName\":\"db1\"," + + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + + "\"functionType\":1}" + lsep + + "{\"functionName\":\"func1\",\"dbName\":\"db1\"," + + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + + "\"functionType\":1}" + lsep + + "{\"functionName\":\"func2\",\"dbName\":\"db1\"," + + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + + "\"functionType\":1}" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.FUNC_TABLE, null, "db1.func[12]", conf, out, err); + Assert.assertEquals("{\"functionName\":\"func1\",\"dbName\":\"db1\"," + + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + + "\"functionType\":1}" + lsep + + "{\"functionName\":\"func2\",\"dbName\":\"db1\"," + + "\"className\":\"Function\",\"ownerName\":\"me\",\"ownerType\":1,\"createTime\":0," + + "\"functionType\":1}" + lsep, outStr.toString()); + + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.GLOBAL_PRIVS_TABLE, null, null, conf, out, err); + Assert.assertEquals("No global privileges" + lsep, outStr.toString()); + + List privileges = new ArrayList<>(); + HiveObjectRef hiveObjRef = new HiveObjectRef(HiveObjectType.GLOBAL, "db0", "tab0", null, + null); + PrivilegeGrantInfo grantInfo = + new PrivilegeGrantInfo("read", 0, "me", PrincipalType.USER, false); + HiveObjectPrivilege hop = new HiveObjectPrivilege(hiveObjRef, "user", PrincipalType.USER, + grantInfo); + privileges.add(hop); + + grantInfo = new PrivilegeGrantInfo("create", 0, "me", PrincipalType.USER, true); + hop = new HiveObjectPrivilege(hiveObjRef, "user", PrincipalType.USER, grantInfo); + privileges.add(hop); + + PrivilegeBag pBag = new PrivilegeBag(privileges); + store.grantPrivileges(pBag); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.GLOBAL_PRIVS_TABLE, null, null, conf, out, err); + Assert.assertEquals( + "{\"userPrivileges\":{\"user\":[{\"privilege\":\"read\",\"createTime\":0," + + "\"grantor\":\"me\",\"grantorType\":1,\"grantOption\":0},{\"privilege\":\"create\"," + + "\"createTime\":0,\"grantor\":\"me\",\"grantorType\":1,\"grantOption\":1}]}}" + lsep, + outStr.toString()); + + + String[] tableNames = new String[3]; + for (int i = 0; i < tableNames.length; i++) { + tableNames[i] = "tab" + i; + StorageDescriptor sd = new StorageDescriptor(Arrays.asList(new FieldSchema("col1", "int", + ""), new FieldSchema("col2", "varchar(32)", "")), + "/tmp", null, null, false, 0, null, null, null, Collections.emptyMap()); + Table tab = new Table(tableNames[i], dbNames[0], "me", 0, 0, 0, sd, + Arrays.asList(new FieldSchema("pcol1", "string", ""), + new FieldSchema("pcol2", "string", "")), + Collections.emptyMap(), null, null, null); + store.createTable(tab); + } + + ColumnStatisticsDesc tableStatsDesc = new ColumnStatisticsDesc(false, "db0", "tab0"); + ColumnStatisticsData tcsd = new ColumnStatisticsData(); + LongColumnStatsData tlcsd = new LongColumnStatsData(1, 2); + tlcsd.setLowValue(-95); + tlcsd.setHighValue(95); + tcsd.setLongStats(tlcsd); + ColumnStatisticsData tcsd2 = new ColumnStatisticsData(); + tcsd2.setStringStats(new StringColumnStatsData(97, 18.78, 29, 397)); + List tcsos = Arrays.asList( + new ColumnStatisticsObj("col1", "int", tcsd), + new ColumnStatisticsObj("col2", "varchar(32)", tcsd2)); + ColumnStatistics tStatObj = new ColumnStatistics(tableStatsDesc, tcsos); + store.updateTableColumnStatistics(tStatObj); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.TABLE_TABLE, "db0.tab1", null, conf, out, err); + Assert.assertEquals("{\"tableName\":\"tab1\",\"dbName\":\"db0\",\"owner\":\"me\"," + + "\"createTime\":0,\"lastAccessTime\":0,\"retention\":0," + + "\"partitionKeys\":[{\"name\":\"pcol1\",\"type\":\"string\",\"comment\":\"\"}," + + "{\"name\":\"pcol2\",\"type\":\"string\",\"comment\":\"\"}],\"parameters\":{}," + + "\"tableType\":\"\"} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep, + outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.TABLE_TABLE, null, "db0.*", conf, out, err); + Assert.assertEquals("{\"tableName\":\"tab0\",\"dbName\":\"db0\",\"owner\":\"me\"," + + "\"createTime\":0,\"lastAccessTime\":0,\"retention\":0," + + "\"partitionKeys\":[{\"name\":\"pcol1\",\"type\":\"string\",\"comment\":\"\"}," + + "{\"name\":\"pcol2\",\"type\":\"string\",\"comment\":\"\"}],\"parameters\":{}," + + "\"tableType\":\"\"} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " + + "col1: {\"colName\":\"col1\",\"colType\":\"int\"," + + "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," + + "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," + + "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78," + + "\"numNulls\":29,\"numDVs\":397}}}" + lsep + + "{\"tableName\":\"tab1\",\"dbName\":\"db0\",\"owner\":\"me\",\"createTime\":0," + + "\"lastAccessTime\":0,\"retention\":0,\"partitionKeys\":[{\"name\":\"pcol1\"," + + "\"type\":\"string\",\"comment\":\"\"},{\"name\":\"pcol2\",\"type\":\"string\"," + + "\"comment\":\"\"}],\"parameters\":{},\"tableType\":\"\"} sdHash: " + + "qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep + + "{\"tableName\":\"tab2\",\"dbName\":\"db0\",\"owner\":\"me\",\"createTime\":0," + + "\"lastAccessTime\":0,\"retention\":0,\"partitionKeys\":[{\"name\":\"pcol1\"," + + "\"type\":\"string\",\"comment\":\"\"},{\"name\":\"pcol2\",\"type\":\"string\"," + + "\"comment\":\"\"}],\"parameters\":{},\"tableType\":\"\"} sdHash: " + + "qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep, outStr.toString()); + + List> partVals = Arrays.asList(Arrays.asList("a", "b"), Arrays.asList("c", "d")); + for (List pv : partVals) { + StorageDescriptor sd = new StorageDescriptor(Arrays.asList(new FieldSchema("col1", "int", + ""), new FieldSchema("col2", "varchar(32)", "")), + "/tmp", null, null, false, 0, null, null, null, Collections.emptyMap()); + Partition p = new Partition(pv, "db0", "tab1", 0, 0, sd, Collections.emptyMap()); + store.addPartition(p); + } + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.PART_TABLE, "db0.tab1.a.b", null, conf, out, err); + Assert.assertEquals("{\"values\":[\"a\",\"b\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{}} sdHash: " + + "qQTgZAi5VzgpozzFGmIVTQ stats:" + lsep, outStr.toString()); + + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, "db0", "tab1"); + statsDesc.setPartName("pcol1=c/pcol2=d"); + ColumnStatisticsData csd1 = new ColumnStatisticsData(); + LongColumnStatsData lcsd = new LongColumnStatsData(1, 2); + lcsd.setLowValue(-95); + lcsd.setHighValue(95); + csd1.setLongStats(lcsd); + ColumnStatisticsData csd2 = new ColumnStatisticsData(); + csd2.setStringStats(new StringColumnStatsData(97, 18.78, 29, 397)); + List csos = Arrays.asList( + new ColumnStatisticsObj("col1", "int", csd1), + new ColumnStatisticsObj("col2", "varchar(32)", csd2)); + ColumnStatistics statsObj = new ColumnStatistics(statsDesc, csos); + store.updatePartitionColumnStatistics(statsObj, partVals.get(1)); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.PART_TABLE, "db0.tab1.c.d", null, conf, out, err); + Assert.assertEquals("{\"values\":[\"c\",\"d\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " + + "stats: column col1: {\"colName\":\"col1\",\"colType\":\"int\"," + + "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," + + "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," + + "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," + + "\"numDVs\":397}}}" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.PART_TABLE, null, "db0.tab1", conf, out, err); + Assert.assertEquals("{\"values\":[\"a\",\"b\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " + + "stats:" + lsep + + "{\"values\":[\"c\",\"d\"],\"dbName\":\"db0\",\"tableName\":\"tab1\",\"createTime\":0," + + "\"lastAccessTime\":0,\"parameters\":{}} sdHash: qQTgZAi5VzgpozzFGmIVTQ stats: column " + + "col1: {\"colName\":\"col1\",\"colType\":\"int\"," + + "\"statsData\":{\"longStats\":{\"lowValue\":-95,\"highValue\":95,\"numNulls\":1," + + "\"numDVs\":2}}} column col2: {\"colName\":\"col2\",\"colType\":\"varchar(32)\"," + + "\"statsData\":{\"stringStats\":{\"maxColLen\":97,\"avgColLen\":18.78,\"numNulls\":29," + + "\"numDVs\":397}}}" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.PART_TABLE, null, "db0.tab1.a", conf, out, err); + Assert.assertEquals("{\"values\":[\"a\",\"b\"],\"dbName\":\"db0\",\"tableName\":\"tab1\"," + + "\"createTime\":0,\"lastAccessTime\":0,\"parameters\":{}} sdHash: qQTgZAi5VzgpozzFGmIVTQ " + + "stats:" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.SD_TABLE, "qQTgZAi5VzgpozzFGmIVTQ", null, conf, out, err); + Assert.assertEquals("{\"cols\":[{\"name\":\"col1\",\"type\":\"int\",\"comment\":\"\"}," + + "{\"name\":\"col2\",\"type\":\"varchar(32)\",\"comment\":\"\"}],\"compressed\":0," + + "\"numBuckets\":0,\"bucketCols\":[],\"sortCols\":[],\"storedAsSubDirectories\":0}" + lsep, + outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.SD_TABLE, null, "whatever", conf, out, err); + Assert.assertEquals("qQTgZAi5VzgpozzFGmIVTQ: {\"cols\":[{\"name\":\"col1\",\"type\":\"int\"," + + "\"comment\":\"\"}," + + "{\"name\":\"col2\",\"type\":\"varchar(32)\",\"comment\":\"\"}],\"compressed\":0," + + "\"numBuckets\":0,\"bucketCols\":[],\"sortCols\":[],\"storedAsSubDirectories\":0}" + lsep, + outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.SECURITY_TABLE, null, "whatever", conf, out, err); + Assert.assertEquals("No security related entries" + lsep, outStr.toString()); + + store.addMasterKey("this be a key"); + store.addToken("tokenid", "delegation token"); + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.SECURITY_TABLE, null, "whatever", conf, out, err); + Assert.assertEquals("Master key 0: this be a key" + lsep + + "Delegation token tokenid: delegation token" + lsep, outStr.toString()); + + outStr = new ByteArrayOutputStream(); + out = new PrintStream(outStr); + tool.go(false, HBaseReadWrite.SEQUENCES_TABLE, null, "whatever", conf, out, err); + Assert.assertEquals("mk: 1" + lsep, outStr.toString()); + } +} diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool2.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool2.java new file mode 100644 index 0000000..e343035 --- /dev/null +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseSchemaTool2.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + +/** + * This is in a separate class because install tests shouldn't set up the metastore first. + */ +public class TestHBaseSchemaTool2 extends HBaseIntegrationTests { + private static final Log LOG = LogFactory.getLog(TestHBaseSchemaTool.class.getName()); + private String lsep = System.getProperty("line.separator"); + + @BeforeClass + public static void startup() throws Exception { + HBaseIntegrationTests.startMiniCluster(); + } + + @AfterClass + public static void shutdown() throws Exception { + HBaseIntegrationTests.shutdownMiniCluster(); + } + + @Test + public void install() { + ByteArrayOutputStream outStr = new ByteArrayOutputStream(); + PrintStream out = new PrintStream(outStr); + ByteArrayOutputStream errStr = new ByteArrayOutputStream(); + PrintStream err = new PrintStream(errStr); + + HBaseSchemaTool tool = new HBaseSchemaTool(); + tool.install(conf, err); + tool.go(true, null, null, null, conf, out, err); + Assert.assertEquals(StringUtils.join(HBaseReadWrite.tableNames, lsep) + lsep, + outStr.toString()); + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java index 781f562..85c3700 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java @@ -57,8 +57,14 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.hbase.PartitionKeyComparator.Operator; import org.apache.hive.common.util.BloomFilter; +import org.apache.thrift.TBase; +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.protocol.TSimpleJSONProtocol; +import org.apache.thrift.transport.TMemoryBuffer; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -71,6 +77,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.NavigableMap; import java.util.Set; @@ -79,30 +86,29 @@ */ public class HBaseReadWrite { - @VisibleForTesting final static String AGGR_STATS_TABLE = "HBMS_AGGR_STATS"; - @VisibleForTesting final static String DB_TABLE = "HBMS_DBS"; - @VisibleForTesting final static String FUNC_TABLE = "HBMS_FUNCS"; - @VisibleForTesting final static String GLOBAL_PRIVS_TABLE = "HBMS_GLOBAL_PRIVS"; - @VisibleForTesting final static String PART_TABLE = "HBMS_PARTITIONS"; - @VisibleForTesting final static String ROLE_TABLE = "HBMS_ROLES"; - @VisibleForTesting final static String SD_TABLE = "HBMS_SDS"; - @VisibleForTesting final static String SECURITY_TABLE = "HBMS_SECURITY"; - @VisibleForTesting final static String SEQUENCES_TABLE = "HBMS_SEQUENCES"; - @VisibleForTesting final static String TABLE_TABLE = "HBMS_TBLS"; - @VisibleForTesting final static String USER_TO_ROLE_TABLE = "HBMS_USER_TO_ROLE"; - @VisibleForTesting final static String FILE_METADATA_TABLE = "HBMS_FILE_METADATA"; - @VisibleForTesting final static byte[] CATALOG_CF = "c".getBytes(HBaseUtils.ENCODING); - @VisibleForTesting final static byte[] STATS_CF = "s".getBytes(HBaseUtils.ENCODING); - @VisibleForTesting final static String NO_CACHE_CONF = "no.use.cache"; + final static String AGGR_STATS_TABLE = "HBMS_AGGR_STATS"; + final static String DB_TABLE = "HBMS_DBS"; + final static String FUNC_TABLE = "HBMS_FUNCS"; + final static String GLOBAL_PRIVS_TABLE = "HBMS_GLOBAL_PRIVS"; + final static String PART_TABLE = "HBMS_PARTITIONS"; + final static String ROLE_TABLE = "HBMS_ROLES"; + final static String SD_TABLE = "HBMS_SDS"; + final static String SECURITY_TABLE = "HBMS_SECURITY"; + final static String SEQUENCES_TABLE = "HBMS_SEQUENCES"; + final static String TABLE_TABLE = "HBMS_TBLS"; + final static String USER_TO_ROLE_TABLE = "HBMS_USER_TO_ROLE"; + final static String FILE_METADATA_TABLE = "HBMS_FILE_METADATA"; + final static byte[] CATALOG_CF = "c".getBytes(HBaseUtils.ENCODING); + final static byte[] STATS_CF = "s".getBytes(HBaseUtils.ENCODING); + final static String NO_CACHE_CONF = "no.use.cache"; /** * List of tables in HBase */ - public final static String[] tableNames = { AGGR_STATS_TABLE, DB_TABLE, FUNC_TABLE, GLOBAL_PRIVS_TABLE, - PART_TABLE, USER_TO_ROLE_TABLE, ROLE_TABLE, SD_TABLE, - SECURITY_TABLE, SEQUENCES_TABLE, TABLE_TABLE, - FILE_METADATA_TABLE }; - public final static Map> columnFamilies = - new HashMap> (tableNames.length); + public final static String[] tableNames = { AGGR_STATS_TABLE, DB_TABLE, FUNC_TABLE, + GLOBAL_PRIVS_TABLE, PART_TABLE, USER_TO_ROLE_TABLE, + ROLE_TABLE, SD_TABLE, SECURITY_TABLE, SEQUENCES_TABLE, + TABLE_TABLE, FILE_METADATA_TABLE }; + public final static Map> columnFamilies = new HashMap<> (tableNames.length); static { columnFamilies.put(AGGR_STATS_TABLE, Arrays.asList(CATALOG_CF)); @@ -120,18 +126,14 @@ columnFamilies.put(FILE_METADATA_TABLE, Arrays.asList(CATALOG_CF, STATS_CF)); } - /** - * Stores the bloom filter for the aggregated stats, to determine what partitions are in this - * aggregate. - */ - final static byte[] MASTER_KEY_SEQUENCE = "mk".getBytes(HBaseUtils.ENCODING); final static byte[] AGGR_STATS_BLOOM_COL = "b".getBytes(HBaseUtils.ENCODING); + private final static byte[] AGGR_STATS_STATS_COL = "s".getBytes(HBaseUtils.ENCODING); + final static byte[] MASTER_KEY_SEQUENCE = "mk".getBytes(HBaseUtils.ENCODING); private final static byte[] CATALOG_COL = "c".getBytes(HBaseUtils.ENCODING); private final static byte[] ROLES_COL = "roles".getBytes(HBaseUtils.ENCODING); private final static byte[] REF_COUNT_COL = "ref".getBytes(HBaseUtils.ENCODING); private final static byte[] DELEGATION_TOKEN_COL = "dt".getBytes(HBaseUtils.ENCODING); private final static byte[] MASTER_KEY_COL = "mk".getBytes(HBaseUtils.ENCODING); - private final static byte[] AGGR_STATS_STATS_COL = "s".getBytes(HBaseUtils.ENCODING); private final static byte[] GLOBAL_PRIVS_KEY = "gp".getBytes(HBaseUtils.ENCODING); private final static byte[] SEQUENCES_KEY = "seq".getBytes(HBaseUtils.ENCODING); private final static int TABLES_TO_CACHE = 10; @@ -205,6 +207,10 @@ static HBaseReadWrite getInstance() { return self.get(); } + public Configuration getConf() { + return conf; + } + private HBaseReadWrite(Configuration configuration) { conf = configuration; HBaseConfiguration.addHbaseResources(conf); @@ -386,6 +392,35 @@ void deleteDb(String name) throws IOException { delete(DB_TABLE, key, null, null); } + /** + * Print out the database. Intended for use by {@link org.apache.hadoop.hive.metastore.hbase.HBaseSchemaTool} + * @param name name of database to print + * @return string printout of database + */ + String printDatabase(String name) throws IOException, TException { + Database db = getDb(name); + if (db == null) return noSuch(name, "database"); + else return dumpThriftObject(db); + } + + /** + * Print out databases. + * @param regex regular to use to search for databases + * @return databases as a string, one each + * @throws IOException + * @throws TException + */ + List printDatabases(String regex) throws IOException, TException { + List dbs = scanDatabases(regex); + if (dbs.size() == 0) { + return noMatch(regex, "database"); + } else { + List lines = new ArrayList<>(); + for (Database db : dbs) lines.add(dumpThriftObject(db)); + return lines; + } + } + /********************************************************************************************** * Function related methods *********************************************************************************************/ @@ -453,6 +488,41 @@ void deleteFunction(String dbName, String functionName) throws IOException { delete(FUNC_TABLE, key, null, null); } + /** + * Print out a function + * @param key key to get the function, must include dbname. + * @return string of the function + * @throws IOException + * @throws TException + */ + String printFunction(String key) throws IOException, TException { + byte[] k = HBaseUtils.buildKey(key); + byte[] serialized = read(FUNC_TABLE, k, CATALOG_CF, CATALOG_COL); + if (serialized == null) return noSuch(key, "function"); + Function func = HBaseUtils.deserializeFunction(k, serialized); + return dumpThriftObject(func); + } + + /** + * Print out functions + * @param regex regular expression to use in matching functions + * @return list of strings, one function each + * @throws IOException + * @throws TException + */ + List printFunctions(String regex) throws IOException, TException { + Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); + Iterator iter = scan(FUNC_TABLE, null, null, CATALOG_CF, CATALOG_COL, filter); + List lines = new ArrayList<>(); + while (iter.hasNext()) { + Result result = iter.next(); + lines.add(dumpThriftObject(HBaseUtils.deserializeFunction(result.getRow(), + result.getValue(CATALOG_CF, CATALOG_COL)))); + } + if (lines.size() == 0) lines = noMatch(regex, "function"); + return lines; + } + /********************************************************************************************** * Global privilege related methods *********************************************************************************************/ @@ -479,6 +549,18 @@ void putGlobalPrivs(PrincipalPrivilegeSet privs) throws IOException { store(GLOBAL_PRIVS_TABLE, key, CATALOG_CF, CATALOG_COL, serialized); } + /** + * Print out the global privileges. + * @return string containing the global privileges + * @throws IOException + * @throws TException + */ + String printGlobalPrivs() throws IOException, TException { + PrincipalPrivilegeSet pps = getGlobalPrivs(); + if (pps == null) return "No global privileges"; + else return dumpThriftObject(pps); + } + /********************************************************************************************** * Partition related methods *********************************************************************************************/ @@ -645,7 +727,8 @@ void replacePartitions(List oldParts, List newParts, List< } byte[] keyPrefix = HBaseUtils.buildPartitionKey(dbName, tableName, new ArrayList(), new ArrayList(), false); - List parts = scanPartitionsWithFilter(dbName, tableName, keyPrefix, HBaseUtils.getEndPrefix(keyPrefix), -1, null); + List parts = scanPartitionsWithFilter(dbName, tableName, keyPrefix, + HBaseUtils.getEndPrefix(keyPrefix), -1, null); partCache.put(dbName, tableName, parts, true); return maxPartitions < parts.size() ? parts.subList(0, maxPartitions) : parts; } @@ -670,6 +753,206 @@ void replacePartitions(List oldParts, List newParts, List< */ List scanPartitions(String dbName, String tableName, List partVals, int maxPartitions) throws IOException, NoSuchObjectException { + + PartitionScanInfo psi = scanPartitionsInternal(dbName, tableName, partVals, maxPartitions); + List parts = scanPartitionsWithFilter(dbName, tableName, psi.keyPrefix, + psi.endKeyPrefix, maxPartitions, psi.filter); + partCache.put(dbName, tableName, parts, false); + return parts; + } + + List scanPartitions(String dbName, String tableName, byte[] keyStart, byte[] keyEnd, + Filter filter, int maxPartitions) + throws IOException, NoSuchObjectException { + byte[] startRow = keyStart; + byte[] endRow; + if (keyEnd == null || keyEnd.length == 0) { + // stop when current db+table entries are over + endRow = HBaseUtils.getEndPrefix(startRow); + } else { + endRow = keyEnd; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Scanning partitions with start row <" + new String(startRow) + "> and end row <" + + new String(endRow) + ">"); + } + return scanPartitionsWithFilter(dbName, tableName, startRow, endRow, maxPartitions, filter); + } + + /** + * Delete a partition + * @param dbName database name that table is in + * @param tableName table partition is in + * @param partVals partition values that define this partition, in the same order as the + * partition columns they are values for + * @throws IOException + */ + void deletePartition(String dbName, String tableName, List partTypes, + List partVals) throws IOException { + deletePartition(dbName, tableName, partTypes, partVals, true); + } + + /** + * Print out a partition. + * @param partKey The key for the partition. This must include dbname.tablename._partkeys_ + * where _partkeys_ is a dot separated list of partition values in the proper + * order. + * @return string containing the partition + * @throws IOException + * @throws TException + */ + String printPartition(String partKey) throws IOException, TException { + // First figure out the table and fetch it + String[] partKeyParts = partKey.split(HBaseUtils.KEY_SEPARATOR_STR); + if (partKeyParts.length < 3) return noSuch(partKey, "partition"); + Table table = getTable(partKeyParts[0], partKeyParts[1]); + if (table == null) return noSuch(partKey, "partition"); + + byte[] key = HBaseUtils.buildPartitionKey(partKeyParts[0], partKeyParts[1], + HBaseUtils.getPartitionKeyTypes(table.getPartitionKeys()), + Arrays.asList(Arrays.copyOfRange(partKeyParts, 2, partKeyParts.length))); + @SuppressWarnings("deprecation") + HTableInterface htab = conn.getHBaseTable(PART_TABLE); + Get g = new Get(key); + g.addColumn(CATALOG_CF, CATALOG_COL); + g.addFamily(STATS_CF); + Result result = htab.get(g); + if (result.isEmpty()) return noSuch(partKey, "partition"); + return printOnePartition(result); + } + + /** + * Print partitions + * @param partKey a partial partition key. This must match the beginings of the partition key. + * It can be just dbname.tablename, or dbname.table.pval... where pval are the + * partition values in order. They must be in the correct order and they must + * be literal values (no regular expressions) + * @return partitions as strings + * @throws IOException + * @throws TException + */ + List printPartitions(String partKey) throws IOException, TException { + // First figure out the table and fetch it + // Split on dot here rather than the standard separator because this will be passed in as a + // regex, even though we aren't fully supporting regex's. + String[] partKeyParts = partKey.split("\\."); + if (partKeyParts.length < 2) return noMatch(partKey, "partition"); + List partVals = partKeyParts.length == 2 ? Arrays.asList("*") : + Arrays.asList(Arrays.copyOfRange(partKeyParts, 2, partKeyParts.length)); + PartitionScanInfo psi; + try { + psi = + scanPartitionsInternal(partKeyParts[0], partKeyParts[1], partVals, -1); + } catch (NoSuchObjectException e) { + return noMatch(partKey, "partition"); + } + + @SuppressWarnings("deprecation") + HTableInterface htab = conn.getHBaseTable(PART_TABLE); + Scan scan = new Scan(); + scan.addColumn(CATALOG_CF, CATALOG_COL); + scan.addFamily(STATS_CF); + scan.setStartRow(psi.keyPrefix); + scan.setStopRow(psi.endKeyPrefix); + scan.setFilter(psi.filter); + Iterator iter = htab.getScanner(scan).iterator(); + if (!iter.hasNext()) return noMatch(partKey, "partition"); + List lines = new ArrayList<>(); + while (iter.hasNext()) { + lines.add(printOnePartition(iter.next())); + } + return lines; + } + + private String printOnePartition(Result result) throws IOException, TException { + byte[] key = result.getRow(); + HBaseUtils.StorageDescriptorParts sdParts = + HBaseUtils.deserializePartition(key, result.getValue(CATALOG_CF, CATALOG_COL), this); + StringBuilder builder = new StringBuilder(); + builder.append(dumpThriftObject(sdParts.containingPartition)) + .append(" sdHash: ") + .append(Base64.encodeBase64URLSafeString(sdParts.sdHash)) + .append(" stats:"); + NavigableMap statsCols = result.getFamilyMap(STATS_CF); + for (Map.Entry statsCol : statsCols.entrySet()) { + builder.append(" column ") + .append(new String(statsCol.getKey(), HBaseUtils.ENCODING)) + .append(": "); + ColumnStatistics pcs = buildColStats(key, false); + ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(pcs, statsCol.getValue()); + builder.append(dumpThriftObject(cso)); + } + return builder.toString(); + } + + private void deletePartition(String dbName, String tableName, List partTypes, + List partVals, boolean decrementRefCnt) throws IOException { + // Find the partition so I can get the storage descriptor and drop it + partCache.remove(dbName, tableName, partVals); + if (decrementRefCnt) { + Partition p = getPartition(dbName, tableName, partVals, false); + decrementStorageDescriptorRefCount(p.getSd()); + } + byte[] key = HBaseUtils.buildPartitionKey(dbName, tableName, partTypes, partVals); + delete(PART_TABLE, key, null, null); + } + + private Partition getPartition(String dbName, String tableName, List partVals, + boolean populateCache) throws IOException { + Partition cached = partCache.get(dbName, tableName, partVals); + if (cached != null) return cached; + byte[] key = HBaseUtils.buildPartitionKey(dbName, tableName, + HBaseUtils.getPartitionKeyTypes(getTable(dbName, tableName).getPartitionKeys()), partVals); + byte[] serialized = read(PART_TABLE, key, CATALOG_CF, CATALOG_COL); + if (serialized == null) return null; + HBaseUtils.StorageDescriptorParts sdParts = + HBaseUtils.deserializePartition(dbName, tableName, partVals, serialized); + StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); + HBaseUtils.assembleStorageDescriptor(sd, sdParts); + if (populateCache) partCache.put(dbName, tableName, sdParts.containingPartition); + return sdParts.containingPartition; + } + + + private static class PartitionScanInfo { + final String dbName; + final String tableName; + final byte[] keyPrefix; + final byte[] endKeyPrefix; + final int maxPartitions; + final Filter filter; + + PartitionScanInfo(String d, String t, byte[] k, byte[] e, int m, Filter f) { + dbName = d; + tableName = t; + keyPrefix = k; + endKeyPrefix = e; + maxPartitions = m; + filter = f; + } + + @Override + public String toString() { + return new StringBuilder("dbName:") + .append(dbName) + .append(" tableName:") + .append(tableName) + .append(" keyPrefix:") + .append(Base64.encodeBase64URLSafeString(keyPrefix)) + .append(" endKeyPrefix:") + .append(Base64.encodeBase64URLSafeString(endKeyPrefix)) + .append(" maxPartitions:") + .append(maxPartitions) + .append(" filter:") + .append(filter.toString()) + .toString(); + } + } + + private PartitionScanInfo scanPartitionsInternal(String dbName, String tableName, + List partVals, int maxPartitions) + throws IOException, NoSuchObjectException { // First, build as much of the key as we can so that we make the scan as tight as possible. List keyElements = new ArrayList<>(); keyElements.add(dbName); @@ -700,7 +983,7 @@ void replacePartitions(List oldParts, List newParts, List< } keyPrefix = HBaseUtils.buildPartitionKey(dbName, tableName, HBaseUtils.getPartitionKeyTypes(table.getPartitionKeys().subList(0, keyElements.size()-2)), - keyElements.subList(2, keyElements.size())); + keyElements.subList(2, keyElements.size())); // Now, build a filter out of the remaining keys List ranges = new ArrayList(); @@ -739,76 +1022,13 @@ void replacePartitions(List oldParts, List newParts, List< filter + ">"); } - List parts = scanPartitionsWithFilter(dbName, tableName, keyPrefix, - HBaseUtils.getEndPrefix(keyPrefix), maxPartitions, filter); - partCache.put(dbName, tableName, parts, false); - return parts; - } - - List scanPartitions(String dbName, String tableName, byte[] keyStart, byte[] keyEnd, - Filter filter, int maxPartitions) throws IOException, NoSuchObjectException { - byte[] startRow = keyStart; - byte[] endRow; - if (keyEnd == null || keyEnd.length == 0) { - // stop when current db+table entries are over - endRow = HBaseUtils.getEndPrefix(startRow); - } else { - endRow = keyEnd; - } - - if (LOG.isDebugEnabled()) { - LOG.debug("Scanning partitions with start row <" + new String(startRow) + "> and end row <" - + new String(endRow) + ">"); - } - return scanPartitionsWithFilter(dbName, tableName, startRow, endRow, maxPartitions, filter); - } - - - - /** - * Delete a partition - * @param dbName database name that table is in - * @param tableName table partition is in - * @param partVals partition values that define this partition, in the same order as the - * partition columns they are values for - * @throws IOException - */ - void deletePartition(String dbName, String tableName, List partTypes, - List partVals) throws IOException { - deletePartition(dbName, tableName, partTypes, partVals, true); - } - - private void deletePartition(String dbName, String tableName, List partTypes, - List partVals, boolean decrementRefCnt) throws IOException { - // Find the partition so I can get the storage descriptor and drop it - partCache.remove(dbName, tableName, partVals); - if (decrementRefCnt) { - Partition p = getPartition(dbName, tableName, partVals, false); - decrementStorageDescriptorRefCount(p.getSd()); - } - byte[] key = HBaseUtils.buildPartitionKey(dbName, tableName, partTypes, partVals); - delete(PART_TABLE, key, null, null); - } - - private Partition getPartition(String dbName, String tableName, List partVals, - boolean populateCache) throws IOException { - Partition cached = partCache.get(dbName, tableName, partVals); - if (cached != null) return cached; - byte[] key = HBaseUtils.buildPartitionKey(dbName, tableName, - HBaseUtils.getPartitionKeyTypes(getTable(dbName, tableName).getPartitionKeys()), partVals); - byte[] serialized = read(PART_TABLE, key, CATALOG_CF, CATALOG_COL); - if (serialized == null) return null; - HBaseUtils.StorageDescriptorParts sdParts = - HBaseUtils.deserializePartition(dbName, tableName, partVals, serialized); - StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); - HBaseUtils.assembleStorageDescriptor(sd, sdParts); - if (populateCache) partCache.put(dbName, tableName, sdParts.containingPartition); - return sdParts.containingPartition; + return new PartitionScanInfo(dbName, tableName, keyPrefix, HBaseUtils.getEndPrefix(keyPrefix), + maxPartitions, filter); } private List scanPartitionsWithFilter(String dbName, String tableName, - byte[] startRow, byte [] endRow, int maxResults, Filter filter) - throws IOException { + byte[] startRow, byte [] endRow, int maxResults, + Filter filter) throws IOException { Iterator iter = scan(PART_TABLE, startRow, endRow, CATALOG_CF, CATALOG_COL, filter); List tablePartitions = getTable(dbName, tableName).getPartitionKeys(); @@ -817,7 +1037,7 @@ private Partition getPartition(String dbName, String tableName, List par for (int i = 0; i < numToFetch && iter.hasNext(); i++) { Result result = iter.next(); HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializePartition(dbName, tableName, - tablePartitions, result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL), staticConf); + tablePartitions, result.getRow(), result.getValue(CATALOG_CF, CATALOG_COL), conf); StorageDescriptor sd = getStorageDescriptor(sdParts.sdHash); HBaseUtils.assembleStorageDescriptor(sd, sdParts); parts.add(sdParts.containingPartition); @@ -1158,14 +1378,7 @@ Role getRole(String roleName) throws IOException { * @throws IOException */ List scanRoles() throws IOException { - Iterator iter = scan(ROLE_TABLE, CATALOG_CF, CATALOG_COL); - List roles = new ArrayList<>(); - while (iter.hasNext()) { - Result result = iter.next(); - roles.add(HBaseUtils.deserializeRole(result.getRow(), - result.getValue(CATALOG_CF, CATALOG_COL))); - } - return roles; + return scanRoles(null); } /** @@ -1189,6 +1402,70 @@ void deleteRole(String roleName) throws IOException { roleCache.remove(roleName); } + String printRolesForUser(String userName) throws IOException { + List roles = getUserRoles(userName); + if (roles == null || roles.size() == 0) return noSuch(userName, "user"); + return org.apache.commons.lang.StringUtils.join(roles, ','); + } + + List printRolesForUsers(String regex) throws IOException { + Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); + Iterator iter = scan(USER_TO_ROLE_TABLE, null, null, CATALOG_CF, CATALOG_COL, filter); + List lines = new ArrayList<>(); + while (iter.hasNext()) { + Result result = iter.next(); + lines.add(new String(result.getRow(), HBaseUtils.ENCODING) + ": " + + org.apache.commons.lang.StringUtils.join( + HBaseUtils.deserializeRoleList(result.getValue(CATALOG_CF, CATALOG_COL)), ',')); + } + if (lines.size() == 0) lines = noMatch(regex, "user"); + return lines; + } + + /** + * Print out a role + * @param name name of role to print + * @return string printout of role + */ + String printRole(String name) throws IOException, TException { + Role role = getRole(name); + if (role == null) return noSuch(name, "role"); + else return dumpThriftObject(role); + } + + /** + * Print out roles. + * @param regex regular to use to search for roles + * @return string printout of roles + * @throws IOException + * @throws TException + */ + List printRoles(String regex) throws IOException, TException { + List roles = scanRoles(regex); + if (roles.size() == 0) { + return noMatch(regex, "role"); + } else { + List lines = new ArrayList<>(); + for (Role role : roles) lines.add(dumpThriftObject(role)); + return lines; + } + } + + private List scanRoles(String regex) throws IOException { + Filter filter = null; + if (regex != null) { + filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); + } + Iterator iter = scan(ROLE_TABLE, null, null, CATALOG_CF, CATALOG_COL, filter); + List roles = new ArrayList<>(); + while (iter.hasNext()) { + Result result = iter.next(); + roles.add(HBaseUtils.deserializeRole(result.getRow(), + result.getValue(CATALOG_CF, CATALOG_COL))); + } + return roles; + } + private void buildRoleCache() throws IOException { if (!entireRoleTableInCache) { Iterator roles = scan(ROLE_TABLE, CATALOG_CF, ROLES_COL); @@ -1350,6 +1627,71 @@ void deleteTable(String dbName, String tableName) throws IOException { deleteTable(dbName, tableName, true); } + /** + * Print out a table. + * @param name The name for the table. This must include dbname.tablename + * @return string containing the table + * @throws IOException + * @throws TException + */ + String printTable(String name) throws IOException, TException { + byte[] key = HBaseUtils.buildKey(name); + @SuppressWarnings("deprecation") + HTableInterface htab = conn.getHBaseTable(TABLE_TABLE); + Get g = new Get(key); + g.addColumn(CATALOG_CF, CATALOG_COL); + g.addFamily(STATS_CF); + Result result = htab.get(g); + if (result.isEmpty()) return noSuch(name, "table"); + return printOneTable(result); + } + + /** + * Print tables + * @param regex to use to find the tables. Remember that dbname is in each + * table name. + * @return tables as strings + * @throws IOException + * @throws TException + */ + List printTables(String regex) throws IOException, TException { + Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regex)); + @SuppressWarnings("deprecation") + HTableInterface htab = conn.getHBaseTable(TABLE_TABLE); + Scan scan = new Scan(); + scan.addColumn(CATALOG_CF, CATALOG_COL); + scan.addFamily(STATS_CF); + scan.setFilter(filter); + Iterator iter = htab.getScanner(scan).iterator(); + if (!iter.hasNext()) return noMatch(regex, "table"); + List lines = new ArrayList<>(); + while (iter.hasNext()) { + lines.add(printOneTable(iter.next())); + } + return lines; + } + + private String printOneTable(Result result) throws IOException, TException { + byte[] key = result.getRow(); + HBaseUtils.StorageDescriptorParts sdParts = + HBaseUtils.deserializeTable(key, result.getValue(CATALOG_CF, CATALOG_COL)); + StringBuilder builder = new StringBuilder(); + builder.append(dumpThriftObject(sdParts.containingTable)) + .append(" sdHash: ") + .append(Base64.encodeBase64URLSafeString(sdParts.sdHash)) + .append(" stats:"); + NavigableMap statsCols = result.getFamilyMap(STATS_CF); + for (Map.Entry statsCol : statsCols.entrySet()) { + builder.append(" column ") + .append(new String(statsCol.getKey(), HBaseUtils.ENCODING)) + .append(": "); + ColumnStatistics pcs = buildColStats(key, true); + ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(pcs, statsCol.getValue()); + builder.append(dumpThriftObject(cso)); + } + return builder.toString(); + } + private void deleteTable(String dbName, String tableName, boolean decrementRefCnt) throws IOException { tableCache.remove(new ObjectPair<>(dbName, tableName)); @@ -1466,6 +1808,37 @@ void decrementStorageDescriptorRefCount(StorageDescriptor sd) throws IOException return key; } + /** + * Print out a storage descriptor. + * @param hash hash that is the key of the storage descriptor + * @return string version of the storage descriptor + */ + String printStorageDescriptor(byte[] hash) throws IOException, TException { + byte[] serialized = read(SD_TABLE, hash, CATALOG_CF, CATALOG_COL); + if (serialized == null) return noSuch(Base64.encodeBase64URLSafeString(hash), "storage descriptor"); + return dumpThriftObject(HBaseUtils.deserializeStorageDescriptor(serialized)); + } + + /** + * Print all of the storage descriptors. This doesn't take a regular expression since the key + * is an md5 hash and it's hard to see how a regex on this would be useful. + * @return list of all storage descriptors as strings + * @throws IOException + * @throws TException + */ + List printStorageDescriptors() throws IOException, TException { + Iterator results = scan(SD_TABLE, CATALOG_CF, CATALOG_COL); + if (!results.hasNext()) return Arrays.asList("No storage descriptors"); + List lines = new ArrayList<>(); + while (results.hasNext()) { + Result result = results.next(); + lines.add(Base64.encodeBase64URLSafeString(result.getRow()) + ": " + + dumpThriftObject(HBaseUtils.deserializeStorageDescriptor(result.getValue(CATALOG_CF, + CATALOG_COL)))); + } + return lines; + } + private static class ByteArrayWrapper { byte[] wrapped; @@ -1604,25 +1977,9 @@ ColumnStatistics getTableStatistics(String dbName, String tblName, List if (colStats == null) { // We initialize this late so that we don't create extras in the case of // partitions with no stats - colStats = new ColumnStatistics(); + colStats = buildColStats(results[i].getRow(), false); statsList.add(colStats); - ColumnStatisticsDesc csd = new ColumnStatisticsDesc(); - - // We need to figure out which partition these call stats are from. To do that we - // recontruct the key. We have to pull the dbName and tableName out of the key to - // find the partition values. - byte[] key = results[i].getRow(); - List reconstructedPartVals = - HBaseUtils.deserializePartitionKey(getTable(dbName, tblName).getPartitionKeys(), key, - staticConf); - String partName = valToPartMap.get(reconstructedPartVals); - assert partName != null; - csd.setIsTblLevel(false); - csd.setDbName(dbName); - csd.setTableName(tblName); - csd.setPartName(partName); - colStats.setStatsDesc(csd); - } + } ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(colStats, serializedColStats); cso.setColName(colNames.get(j)); @@ -1729,6 +2086,36 @@ private String getStatisticsTable(List partVals) { return partVals == null ? TABLE_TABLE : PART_TABLE; } + private ColumnStatistics buildColStats(byte[] key, boolean fromTable) throws IOException { + // We initialize this late so that we don't create extras in the case of + // partitions with no stats + ColumnStatistics colStats = new ColumnStatistics(); + ColumnStatisticsDesc csd = new ColumnStatisticsDesc(); + + // If this is a table key, parse it as one + List reconstructedKey; + if (fromTable) { + reconstructedKey = Arrays.asList(HBaseUtils.deserializeKey(key)); + csd.setIsTblLevel(true); + } else { + reconstructedKey = HBaseUtils.deserializePartitionKey(key, this); + csd.setIsTblLevel(false); + } + csd.setDbName(reconstructedKey.get(0)); + csd.setTableName(reconstructedKey.get(1)); + if (!fromTable) { + // Build the part name, for which we need the table + Table table = getTable(reconstructedKey.get(0), reconstructedKey.get(1)); + if (table == null) { + throw new RuntimeException("Unable to find table " + reconstructedKey.get(0) + "." + + reconstructedKey.get(1) + " even though I have a partition for it!"); + } + csd.setPartName(HBaseStore.buildExternalPartName(table, reconstructedKey.subList(2, + reconstructedKey.size()))); + } + colStats.setStatsDesc(csd); + return colStats; + } /********************************************************************************************** * File metadata related methods *********************************************************************************************/ @@ -1881,6 +2268,35 @@ void deleteMasterKey(Integer seqNo) throws IOException { delete(SECURITY_TABLE, key, CATALOG_CF, MASTER_KEY_COL); } + /** + * One method to print all rows in the security table. It's not expected to be large. + * @return each row as one string + * @throws IOException + */ + List printSecurity() throws IOException { + HTableInterface htab = conn.getHBaseTable(SECURITY_TABLE); + Scan scan = new Scan(); + scan.addColumn(CATALOG_CF, MASTER_KEY_COL); + scan.addColumn(CATALOG_CF, DELEGATION_TOKEN_COL); + Iterator iter = htab.getScanner(scan).iterator(); + if (!iter.hasNext()) return Arrays.asList("No security related entries"); + List lines = new ArrayList<>(); + while (iter.hasNext()) { + Result result = iter.next(); + byte[] val = result.getValue(CATALOG_CF, MASTER_KEY_COL); + if (val != null) { + int seqNo = Integer.valueOf(new String(result.getRow(), HBaseUtils.ENCODING)); + lines.add("Master key " + seqNo + ": " + HBaseUtils.deserializeMasterKey(val)); + } else { + val = result.getValue(CATALOG_CF, DELEGATION_TOKEN_COL); + if (val == null) throw new RuntimeException("Huh? No master key, no delegation token!"); + lines.add("Delegation token " + new String(result.getRow(), HBaseUtils.ENCODING) + ": " + + HBaseUtils.deserializeDelegationToken(val)); + } + } + return lines; + } + /********************************************************************************************** * Sequence methods *********************************************************************************************/ @@ -1896,6 +2312,25 @@ long getNextSequence(byte[] sequence) throws IOException { return val; } + /** + * One method to print all entries in the sequence table. It's not expected to be large. + * @return each sequence as one string + * @throws IOException + */ + List printSequences() throws IOException { + HTableInterface htab = conn.getHBaseTable(SEQUENCES_TABLE); + Get g = new Get(SEQUENCES_KEY); + g.addFamily(CATALOG_CF); + Result result = htab.get(g); + if (result.isEmpty()) return Arrays.asList("No sequences"); + List lines = new ArrayList<>(); + for (Map.Entry entry : result.getFamilyMap(CATALOG_CF).entrySet()) { + lines.add(new String(entry.getKey(), HBaseUtils.ENCODING) + ": " + + new String(entry.getValue(), HBaseUtils.ENCODING)); + } + return lines; + } + /********************************************************************************************** * Cache methods *********************************************************************************************/ @@ -2055,12 +2490,27 @@ private void delete(String table, byte[] key, byte[] colFam, byte[] colName) thr return scanner.iterator(); } + /********************************************************************************************** + * Printing methods + *********************************************************************************************/ + private String noSuch(String name, String type) { + return "No such " + type + ": " + name.replaceAll(HBaseUtils.KEY_SEPARATOR_STR, "."); + } + private List noMatch(String regex, String type) { + return Arrays.asList("No matching " + type + ": " + regex); + } + + private String dumpThriftObject(TBase obj) throws TException, UnsupportedEncodingException { + TMemoryBuffer buf = new TMemoryBuffer(1000); + TProtocol protocol = new TSimpleJSONProtocol(buf); + obj.write(protocol); + return buf.toString("UTF-8"); + } /********************************************************************************************** * Testing methods and classes *********************************************************************************************/ - @VisibleForTesting int countStorageDescriptor() throws IOException { ResultScanner scanner = conn.getHBaseTable(SD_TABLE).getScanner(new Scan()); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseSchemaTool.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseSchemaTool.java index 8eb6116..25598b0 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseSchemaTool.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseSchemaTool.java @@ -18,27 +18,20 @@ */ package org.apache.hadoop.hive.metastore.hbase; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.cli.ParseException; +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.Function; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Role; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.thrift.TBase; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.protocol.TSimpleJSONProtocol; -import org.apache.thrift.transport.TMemoryBuffer; - -import java.io.IOException; -import java.lang.reflect.Method; + +import java.io.PrintStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -47,194 +40,157 @@ */ public class HBaseSchemaTool { - private static String[] commands = {"db", "part", "parts", "role", "table", "function", - "install"}; + static final private Log LOG = LogFactory.getLog(HBaseReadWrite.class.getName()); - public static void main(String[] args) throws Exception { + public static void main(String[] args) { Options options = new Options(); options.addOption(OptionBuilder - .withLongOpt("column") - .withDescription("Comma separated list of column names") - .hasArg() - .create('c')); - - options.addOption(OptionBuilder - .withLongOpt("db") - .withDescription("Database name") - .hasArg() - .create('d')); - - options.addOption(OptionBuilder - .withLongOpt("function") - .withDescription("Function name") - .hasArg() - .create('f')); - - options.addOption(OptionBuilder .withLongOpt("help") .withDescription("You're looking at it") .create('h')); options.addOption(OptionBuilder - .withLongOpt("role") - .withDescription("Role name") + .withLongOpt("install") + .withDescription("Install the schema onto an HBase cluster.") + .create('i')); + + options.addOption(OptionBuilder + .withLongOpt("key") + .withDescription("Key to scan with. This should be an exact key (not a regular expression") .hasArg() - .create('r')); + .create('k')); options.addOption(OptionBuilder - .withLongOpt("partvals") - .withDescription("Comma separated list of partition values, in order of partition columns") + .withLongOpt("list-tables") + .withDescription("List tables in HBase metastore") .hasArg() - .create('p')); + .create('l')); options.addOption(OptionBuilder - .withLongOpt("stats") - .withDescription("Get statistics rather than catalog object") - .create('s')); + .withLongOpt("regex-key") + .withDescription("Regular expression to scan keys with.") + .hasArg() + .create('r')); options.addOption(OptionBuilder .withLongOpt("table") - .withDescription("Table name") + .withDescription("HBase metastore table to scan") .hasArg() .create('t')); - CommandLine cli = new GnuParser().parse(options, args); + CommandLine cli = null; + try { + cli = new GnuParser().parse(options, args); + } catch (ParseException e) { + System.err.println("Parse Exception: " + e.getMessage()); + usage(options); + return; + } if (cli.hasOption('h')) { - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("hbaseschematool", options); + usage(options); return; } - String[] cmds = cli.getArgs(); - if (cmds.length != 1) { - System.err.print("Must include a cmd, valid cmds are: "); - for (int i = 0; i < commands.length; i++) { - if (i != 0) System.err.print(", "); - System.err.print(commands[i]); - } - System.err.println(); - System.exit(1); - } - String cmd = cmds[0]; + Configuration conf = new Configuration(); - List parts = null; - if (cli.hasOption('p')) { - parts = Arrays.asList(cli.getOptionValue('p').split(",")); + if (cli.hasOption('i')) { + new HBaseSchemaTool().install(conf, System.err); } - List cols = null; - if (cli.hasOption('c')) { - cols = Arrays.asList(cli.getOptionValue('c').split(",")); + String key = null; + if (cli.hasOption('k')) key = cli.getOptionValue('k'); + String regex = null; + if (cli.hasOption('r')) regex = cli.getOptionValue('r'); + if (key != null && regex != null) { + usage(options); + return; } + if (key == null && regex == null) regex = ".*"; - HBaseSchemaTool tool = new HBaseSchemaTool(cli.getOptionValue('d'), cli.getOptionValue('t'), - parts, cli.getOptionValue('f'), cli.getOptionValue('r'), cols, cli.hasOption('s')); - Method method = tool.getClass().getMethod(cmd); - method.invoke(tool); - - + // I do this in the object rather than in the static main so that it's easier to test. + new HBaseSchemaTool().go(cli.hasOption('l'), cli.getOptionValue('t'), key, regex, conf, + System.out, System.err); } - private HBaseReadWrite hrw; - private String dbName; - private String funcName; - private String tableName; - private List partVals; - private String roleName; - private List colNames; - private boolean hasStats; - - private HBaseSchemaTool(String dbname, String tn, List pv, String fn, String rn, - List cn, boolean s) { - dbName = dbname; - tableName = tn; - partVals = pv; - funcName = fn; - roleName = rn; - colNames = cn; - hasStats = s; - HBaseReadWrite.setConf(new Configuration()); - hrw = HBaseReadWrite.getInstance(); + private static void usage(Options options) { + HelpFormatter formatter = new HelpFormatter(); + String header = "This tool dumps contents of your hbase metastore. You need to specify\n" + + "the table to dump. You can optionally specify a regular expression on the key for\n" + + "the table. Keep in mind that the key is often a compound. For partitions regular\n" + + "expressions are not used because non-string values are\nstored in binary. Instead for " + + "partition you can specify as much of the exact prefix as you want. So you can give " + + "dbname.tablename or dbname.tablename.pval1..."; + String footer = "If neither key or regex is provided a regex of .* will be assumed. You\n" + + "cannot set both key and regex."; + formatter.printHelp("hbaseschematool", header, options, footer); + return; } - public void db() throws IOException, TException { - Database db = hrw.getDb(dbName); - if (db == null) System.err.println("No such database: " + db); - else dump(db); - } - - public void install() throws IOException { - HBaseReadWrite.createTablesIfNotExist(); - } - - public void part() throws IOException, TException { - if (hasStats) { - Table table = hrw.getTable(dbName, tableName); - if (table == null) { - System.err.println("No such table: " + dbName + "." + tableName); - return; - } - String partName = HBaseStore.buildExternalPartName(table, partVals); - List stats = hrw.getPartitionStatistics(dbName, tableName, - Arrays.asList(partName), Arrays.asList(partVals), colNames); - if (stats == null) { - System.err.println("No stats for " + dbName + "." + tableName + "." + - StringUtils.join(partVals, ':')); - } else { - for (ColumnStatistics stat : stats) dump(stat); - } + @VisibleForTesting void go(boolean listTables, String table, String key, String regex, + Configuration conf, PrintStream out, PrintStream err) { + List lines = new ArrayList<>(); + if (listTables) { + lines = Arrays.asList(HBaseReadWrite.tableNames); } else { - Partition part = hrw.getPartition(dbName, tableName, partVals); - if (part == null) { - System.err.println("No such partition: " + dbName + "." + tableName + "." + - StringUtils.join(partVals, ':')); - } else { - dump(part); + // If they've used '.' as a key separator we need to replace it with the separator used by + // HBaseUtils + if (key != null) key = key.replace('.', HBaseUtils.KEY_SEPARATOR); + try { + HBaseReadWrite.setConf(conf); + HBaseReadWrite hrw = HBaseReadWrite.getInstance(); + if (table.equalsIgnoreCase(HBaseReadWrite.DB_TABLE)) { + if (key != null) lines.add(hrw.printDatabase(key)); + else lines.addAll(hrw.printDatabases(regex)); + } else if (table.equalsIgnoreCase(HBaseReadWrite.FUNC_TABLE)) { + if (key != null) lines.add(hrw.printFunction(key)); + else lines.addAll(hrw.printFunctions(regex)); + } else if (table.equalsIgnoreCase(HBaseReadWrite.GLOBAL_PRIVS_TABLE)) { + // Ignore whatever they passed, there's always only either one or zero global privileges + lines.add(hrw.printGlobalPrivs()); + } else if (table.equalsIgnoreCase(HBaseReadWrite.PART_TABLE)) { + if (key != null) lines.add(hrw.printPartition(key)); + else lines.addAll(hrw.printPartitions(regex)); + } else if (table.equalsIgnoreCase(HBaseReadWrite.USER_TO_ROLE_TABLE)) { + if (key != null) lines.add(hrw.printRolesForUser(key)); + else lines.addAll(hrw.printRolesForUsers(regex)); + } else if (table.equalsIgnoreCase(HBaseReadWrite.ROLE_TABLE)) { + if (key != null) lines.add(hrw.printRole(key)); + else lines.addAll(hrw.printRoles(regex)); + } else if (table.equalsIgnoreCase(HBaseReadWrite.TABLE_TABLE)) { + if (key != null) lines.add(hrw.printTable(key)); + else lines.addAll(hrw.printTables(regex)); + } else if (table.equalsIgnoreCase(HBaseReadWrite.SD_TABLE)) { + if (key != null) lines.add(hrw.printStorageDescriptor(Base64.decodeBase64(key))); + else lines.addAll(hrw.printStorageDescriptors()); + } else if (table.equalsIgnoreCase(HBaseReadWrite.SECURITY_TABLE)) { + // We always print all of security, we don't worry about finding particular entries. + lines.addAll(hrw.printSecurity()); + } else if (table.equalsIgnoreCase(HBaseReadWrite.SEQUENCES_TABLE)) { + // We always print all of sequences, we don't worry about finding particular entries. + lines.addAll(hrw.printSequences()); + } else { + err.println("Unknown table: " + table); + return; + } + } catch (Exception e) { + err.println("Caught exception " + e.getClass() + " with message: " + e.getMessage()); + return; } } + for (String line : lines) out.println(line); } - public void parts() throws IOException, TException { - List parts = hrw.scanPartitionsInTable(dbName, tableName, -1); - if (parts == null) { - System.err.println("No such table: " + dbName + "." + tableName); - } else { - for (Partition p : parts) dump(p); - } - } - - public void role() throws IOException, TException { - Role role = hrw.getRole(roleName); - if (role == null) System.err.println("No such role: " + roleName); - else dump(role); - } - - public void table() throws IOException, TException { - if (hasStats) { - ColumnStatistics stats = hrw.getTableStatistics(dbName, tableName, colNames); - if (stats == null) System.err.println("No stats for " + dbName + "." + tableName); - else dump(stats); - } else { - Table table = hrw.getTable(dbName, tableName); - if (table == null) System.err.println("No such table: " + dbName + "." + tableName); - else dump(table); + @VisibleForTesting void install(Configuration conf, PrintStream err) { + try { + // We need to set the conf because createTablesIfNotExist will get a thread local version + // which requires that the configuration object be set. + HBaseReadWrite.setConf(conf); + HBaseReadWrite.createTablesIfNotExist(); + } catch (Exception e) { + err.println("Caught exception " + e.getClass() + " with message: " + e.getMessage()); + return; } } - - public void function() throws IOException, TException { - Function func = hrw.getFunction(dbName, funcName); - if (func == null) System.err.println("No such function: " + dbName + "." + funcName); - else dump(func); - } - - private void dump(TBase thriftObj) throws TException { - TMemoryBuffer buf = new TMemoryBuffer(1000); - TProtocol protocol = new TSimpleJSONProtocol(buf); - thriftObj.write(protocol); - System.out.println(new String(buf.getArray())); - } - - } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java index 1885089..0afcdd9 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java @@ -21,7 +21,6 @@ import com.google.common.collect.Lists; import com.google.protobuf.ByteString; import com.google.protobuf.InvalidProtocolBufferException; - import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -74,10 +73,8 @@ import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; -import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; -import java.util.Deque; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -847,15 +844,67 @@ static void assembleStorageDescriptor(StorageDescriptor sd, StorageDescriptorPar } /** + * Deserialize a partition key when you know nothing about it. That is, you do not know what + * dbname, tablename it came from. + * @param key the key fetched from HBase + * @param callback A reference to the calling HBaseReadWrite object. This has to be done as a + * callback because we have to first deserialize the database name and table + * name, and then fetch the table information, and then we will know how to + * desierliaze the rest of the key. + * @return a list that includes the dbname, tablename, and partition values + * @throws IOException + */ + static List deserializePartitionKey(byte[] key, HBaseReadWrite callback) + throws IOException { + List keyParts = + desierliazeDbNameTableNameFromPartitionKey(key, callback.getConf()); + Table table = callback.getTable(keyParts.get(0), keyParts.get(1)); + keyParts.addAll(deserializePartitionKey(table.getPartitionKeys(), key, callback.getConf())); + return keyParts; + } + + /** * Deserialize a partition. This version should be used when the partition key is not already - * known (eg a scan). + * known and the database and table name are not known either (eg a full scan). Because the + * dbname and tablename (and thus the partition columns) are not known a priori this version + * has to go fetch the table after it figures out which table. If you already have the table + * object you should use + * {@link #deserializePartition(String,String,List,byte[],byte[],Configuration)} * @param key the key fetched from HBase * @param serialized the value fetched from HBase + * @param callback A reference to the calling HBaseReadWrite object. This has to be done as a + * callback because we have to first deserialize the database name and table + * name, and then fetch the table information, and then we will know how to + * desierliaze the rest of the key. * @return A struct that contains the partition plus parts of the storage descriptor */ - static StorageDescriptorParts deserializePartition(String dbName, String tableName, List partitions, - byte[] key, byte[] serialized, Configuration conf) throws InvalidProtocolBufferException { - List keys = deserializePartitionKey(partitions, key, conf); + static StorageDescriptorParts deserializePartition(byte[] key, byte[] serialized, + HBaseReadWrite callback) + throws IOException { + List dbNameTableName = + desierliazeDbNameTableNameFromPartitionKey(key, callback.getConf()); + Table table = callback.getTable(dbNameTableName.get(0), dbNameTableName.get(1)); + List keys = deserializePartitionKey(table.getPartitionKeys(), key, callback.getConf()); + return deserializePartition(dbNameTableName.get(0), dbNameTableName.get(1), keys, serialized); + } + + /** + * Deserialize a partition. This version should be used when you know the dbname and tablename + * but not the partition values. + * @param dbName database this partition is in + * @param tableName table this partition is in + * @param partitions schemas for the partition columns of this table + * @param key key fetched from HBase + * @param serialized serialized version of the partition + * @param conf configuration file + * @return + * @throws InvalidProtocolBufferException + */ + static StorageDescriptorParts deserializePartition(String dbName, String tableName, + List partitions, byte[] key, + byte[] serialized, Configuration conf) + throws InvalidProtocolBufferException { + List keys = deserializePartitionKey(partitions, key, conf); return deserializePartition(dbName, tableName, keys, serialized); } @@ -887,12 +936,46 @@ static StorageDescriptorParts deserializePartition(String dbName, String tableNa return sdParts; } - private static String[] deserializeKey(byte[] key) { + static String[] deserializeKey(byte[] key) { String k = new String(key, ENCODING); return k.split(KEY_SEPARATOR_STR); } - static List deserializePartitionKey(List partitions, byte[] key, + private static List desierliazeDbNameTableNameFromPartitionKey(byte[] key, + Configuration conf) { + StringBuffer names = new StringBuffer(); + names.append("dbName,tableName,"); + StringBuffer types = new StringBuffer(); + types.append("string,string,"); + BinarySortableSerDe serDe = new BinarySortableSerDe(); + Properties props = new Properties(); + props.setProperty(serdeConstants.LIST_COLUMNS, names.toString()); + props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString()); + try { + serDe.initialize(conf, props); + List deserializedkeys = ((List)serDe.deserialize(new BytesWritable(key))).subList(0, 2); + List keys = new ArrayList<>(); + for (int i=0;i deserializePartitionKey(List partitions, byte[] key, Configuration conf) { StringBuffer names = new StringBuffer(); names.append("dbName,tableName,"); @@ -911,7 +994,7 @@ static StorageDescriptorParts deserializePartition(String dbName, String tableNa props.setProperty(serdeConstants.LIST_COLUMNS, names.toString()); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString()); try { - serDe.initialize(new Configuration(), props); + serDe.initialize(conf, props); List deserializedkeys = ((List)serDe.deserialize(new BytesWritable(key))).subList(2, partitions.size()+2); List partitionKeys = new ArrayList(); for (int i=0;i