diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 64e7e0a..be6c64a 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -373,6 +373,17 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { METASTOREURIS("hive.metastore.uris", "", "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."), + METASTORE_FASTPATH("hive.metastore.fastpath", false, + "Used to avoid all of the proxies and object copies in the metastore. Note, if this is " + + "set, you MUST use a local metastore (hive.metastore.uris must be empty) otherwise " + + "undefined and most likely undesired behavior will result"), + METASTORE_HBASE_CACHE_SIZE("hive.metastore.hbase.cache.size", 100000, "Maximum number of " + + "objects we will place in the hbase metastore cache. The objects will be divided up by " + + "types that we need to cache."), + METASTORE_HBASE_CACHE_TIME_TO_LIVE("hive.metastore.hbase.cache.ttl", "600s", + new TimeValidator(TimeUnit.SECONDS), + "Number of seconds for stats items to live in the cache"), + METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3, "Number of retries while opening a connection to metastore"), METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1, diff --git itests/hive-unit/pom.xml itests/hive-unit/pom.xml index 5bbd406..dc951ff 100644 --- itests/hive-unit/pom.xml +++ itests/hive-unit/pom.xml @@ -245,6 +245,27 @@ test + org.apache.hbase + hbase-server + ${hbase.hadoop2.version} + test-jar + test + + + org.apache.hbase + hbase-hadoop-compat + ${hbase.hadoop2.version} + test-jar + test + + + org.apache.hbase + hbase-hadoop2-compat + ${hbase.hadoop2.version} + test-jar + test + + org.apache.hadoop hadoop-minicluster test diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreIntegration.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreIntegration.java new file mode 100644 index 0000000..2d02707 --- /dev/null +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreIntegration.java @@ -0,0 +1,793 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Integration tests with HBase Mini-cluster for HBaseStore + */ +public class TestHBaseStoreIntegration { + + private static final Log LOG = LogFactory.getLog(TestHBaseStoreIntegration.class.getName()); + + private static HBaseTestingUtility utility; + private static HTableInterface tblTable; + private static HTableInterface sdTable; + private static HTableInterface partTable; + private static HTableInterface dbTable; + private static HTableInterface roleTable; + private static Map emptyParameters = new HashMap(); + + @Rule public ExpectedException thrown = ExpectedException.none(); + @Mock private HConnection hconn; + private HBaseStore store; + private HiveConf conf; + + @BeforeClass + public static void startMiniCluster() throws Exception { + utility = new HBaseTestingUtility(); + utility.startMiniCluster(); + byte[][] families = new byte[][] {HBaseReadWrite.CATALOG_CF, HBaseReadWrite.STATS_CF}; + tblTable = utility.createTable(HBaseReadWrite.TABLE_TABLE.getBytes(HBaseUtils.ENCODING), + families); + sdTable = utility.createTable(HBaseReadWrite.SD_TABLE.getBytes(HBaseUtils.ENCODING), + HBaseReadWrite.CATALOG_CF); + partTable = utility.createTable(HBaseReadWrite.PART_TABLE.getBytes(HBaseUtils.ENCODING), + families); + dbTable = utility.createTable(HBaseReadWrite.DB_TABLE.getBytes(HBaseUtils.ENCODING), + HBaseReadWrite.CATALOG_CF); + roleTable = utility.createTable(HBaseReadWrite.ROLE_TABLE.getBytes(HBaseUtils.ENCODING), + HBaseReadWrite.CATALOG_CF); + } + + @AfterClass + public static void shutdownMiniCluster() throws Exception { + utility.shutdownMiniCluster(); + } + + @Before + public void setupConnection() throws IOException { + MockitoAnnotations.initMocks(this); + Mockito.when(hconn.getTable(HBaseReadWrite.SD_TABLE)).thenReturn(sdTable); + Mockito.when(hconn.getTable(HBaseReadWrite.TABLE_TABLE)).thenReturn(tblTable); + Mockito.when(hconn.getTable(HBaseReadWrite.PART_TABLE)).thenReturn(partTable); + Mockito.when(hconn.getTable(HBaseReadWrite.DB_TABLE)).thenReturn(dbTable); + Mockito.when(hconn.getTable(HBaseReadWrite.ROLE_TABLE)).thenReturn(roleTable); + conf = new HiveConf(); + // Turn off caching, as we want to test actual interaction with HBase + conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); + HBaseReadWrite hbase = HBaseReadWrite.getInstance(conf); + hbase.setConnection(hconn); + store = new HBaseStore(); + store.setConf(conf); + } + + @Test + public void createDb() throws Exception { + String dbname = "mydb"; + Database db = new Database(dbname, "no description", "file:///tmp", emptyParameters); + store.createDatabase(db); + + Database d = store.getDatabase("mydb"); + Assert.assertEquals(dbname, d.getName()); + Assert.assertEquals("no description", d.getDescription()); + Assert.assertEquals("file:///tmp", d.getLocationUri()); + } + + @Test + public void dropDb() throws Exception { + String dbname = "anotherdb"; + Database db = new Database(dbname, "no description", "file:///tmp", emptyParameters); + store.createDatabase(db); + + Database d = store.getDatabase(dbname); + Assert.assertNotNull(d); + + store.dropDatabase(dbname); + thrown.expect(NoSuchObjectException.class); + store.getDatabase(dbname); + } + + @Test + public void createTable() throws Exception { + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table("mytable", "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + Table t = store.getTable("default", "mytable"); + Assert.assertEquals(1, t.getSd().getColsSize()); + Assert.assertEquals("col1", t.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", t.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", t.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp", t.getSd().getLocation()); + Assert.assertEquals("input", t.getSd().getInputFormat()); + Assert.assertEquals("output", t.getSd().getOutputFormat()); + Assert.assertEquals("me", t.getOwner()); + Assert.assertEquals("default", t.getDbName()); + Assert.assertEquals("mytable", t.getTableName()); + } + + @Test + public void alterTable() throws Exception { + String tableName = "alttable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + startTime += 10; + table.setLastAccessTime(startTime); + store.alterTable("default", tableName, table); + + Table t = store.getTable("default", tableName); + Assert.assertEquals(1, t.getSd().getColsSize()); + Assert.assertEquals("col1", t.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", t.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", t.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp", t.getSd().getLocation()); + Assert.assertEquals("input", t.getSd().getInputFormat()); + Assert.assertEquals("output", t.getSd().getOutputFormat()); + Assert.assertEquals("me", t.getOwner()); + Assert.assertEquals("default", t.getDbName()); + Assert.assertEquals(tableName, t.getTableName()); + Assert.assertEquals(startTime, t.getLastAccessTime()); + } + + @Test + public void dropTable() throws Exception { + String tableName = "dtable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + Table t = store.getTable("default", tableName); + Assert.assertNotNull(t); + + store.dropTable("default", tableName); + Assert.assertNull(store.getTable("default", tableName)); + } + + @Test + public void createPartition() throws Exception { + String dbName = "default"; + String tableName = "myparttable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List vals = new ArrayList(); + vals.add("fred"); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=fred"); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Partition p = store.getPartition(dbName, tableName, vals); + Assert.assertEquals(1, p.getSd().getColsSize()); + Assert.assertEquals("col1", p.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", p.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", p.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", p.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", p.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp/pc=fred", p.getSd().getLocation()); + Assert.assertEquals("input", p.getSd().getInputFormat()); + Assert.assertEquals("output", p.getSd().getOutputFormat()); + Assert.assertEquals(dbName, p.getDbName()); + Assert.assertEquals(tableName, p.getTableName()); + Assert.assertEquals(1, p.getValuesSize()); + Assert.assertEquals("fred", p.getValues().get(0)); + } + + // TODO - Fix this and the next test. They depend on test execution order and are bogus. + @Test + public void createManyPartitions() throws Exception { + String dbName = "default"; + String tableName = "manyParts"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List partVals = Arrays.asList("alan", "bob", "carl", "doug", "ethan"); + for (String val : partVals) { + List vals = new ArrayList(); + vals.add(val); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=" + val); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Partition p = store.getPartition(dbName, tableName, vals); + Assert.assertEquals("file:/tmp/pc=" + val, p.getSd().getLocation()); + } + + Assert.assertEquals(2, HBaseReadWrite.getInstance(conf).countStorageDescriptor()); + + } + + @Test + public void createDifferentPartition() throws Exception { + int startTime = (int)(System.currentTimeMillis() / 1000); + Map emptyParameters = new HashMap(); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input2", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table("differenttable", "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + Assert.assertEquals(3, HBaseReadWrite.getInstance(conf).countStorageDescriptor()); + + } + + @Test + public void getPartitions() throws Exception { + String dbName = "default"; + String tableName = "manyParts"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List partVals = Arrays.asList("alan", "bob", "carl", "doug", "ethan"); + for (String val : partVals) { + List vals = new ArrayList(); + vals.add(val); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=" + val); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Partition p = store.getPartition(dbName, tableName, vals); + Assert.assertEquals("file:/tmp/pc=" + val, p.getSd().getLocation()); + } + + List parts = store.getPartitions(dbName, tableName, -1); + Assert.assertEquals(5, parts.size()); + String[] pv = new String[5]; + for (int i = 0; i < 5; i++) pv[i] = parts.get(i).getValues().get(0); + Arrays.sort(pv); + Assert.assertArrayEquals(pv, partVals.toArray(new String[5])); + } + + @Test + public void listPartitions() throws Exception { + String dbName = "default"; + String tableName = "listParts"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + partCols.add(new FieldSchema("region", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + String[][] partVals = new String[][]{{"today", "north america"}, {"tomorrow", "europe"}}; + for (String[] pv : partVals) { + List vals = new ArrayList(); + for (String v : pv) vals.add(v); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=" + pv[0] + "/region=" + pv[1]); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + } + + List names = store.listPartitionNames(dbName, tableName, (short) -1); + Assert.assertEquals(2, names.size()); + String[] resultNames = names.toArray(new String[names.size()]); + Arrays.sort(resultNames); + Assert.assertArrayEquals(resultNames, new String[]{"pc=today/region=north america", + "pc=tomorrow/region=europe"}); + + List parts = store.getPartitionsByNames(dbName, tableName, names); + Assert.assertArrayEquals(partVals[0], parts.get(0).getValues().toArray(new String[2])); + Assert.assertArrayEquals(partVals[1], parts.get(1).getValues().toArray(new String[2])); + + store.dropPartitions(dbName, tableName, names); + List afterDropParts = store.getPartitions(dbName, tableName, -1); + Assert.assertEquals(0, afterDropParts.size()); + } + + @Test + public void dropPartition() throws Exception { + String dbName = "default"; + String tableName = "myparttable2"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List vals = Arrays.asList("fred"); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=fred"); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Assert.assertNotNull(store.getPartition(dbName, tableName, vals)); + store.dropPartition(dbName, tableName, vals); + thrown.expect(NoSuchObjectException.class); + store.getPartition(dbName, tableName, vals); + } + + @Test + public void createRole() throws Exception { + int now = (int)System.currentTimeMillis(); + String roleName = "myrole"; + store.addRole(roleName, "me"); + + Role r = store.getRole(roleName); + Assert.assertEquals(roleName, r.getRoleName()); + Assert.assertEquals("me", r.getOwnerName()); + Assert.assertTrue(now <= r.getCreateTime()); + } + + @Test + public void dropRole() throws Exception { + int now = (int)System.currentTimeMillis(); + String roleName = "anotherrole"; + store.addRole(roleName, "me"); + + Role r = store.getRole(roleName); + Assert.assertEquals(roleName, r.getRoleName()); + Assert.assertEquals("me", r.getOwnerName()); + Assert.assertTrue(now <= r.getCreateTime()); + + store.removeRole(roleName); + thrown.expect(NoSuchObjectException.class); + store.getRole(roleName); + } + + @Test + public void tableStatistics() throws Exception { + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String boolcol = "boolcol"; + String longcol = "longcol"; + String doublecol = "doublecol"; + String stringcol = "stringcol"; + String binarycol = "bincol"; + String decimalcol = "deccol"; + long trues = 37; + long falses = 12; + long booleanNulls = 2; + long longHigh = 120938479124L; + long longLow = -12341243213412124L; + long longNulls = 23; + long longDVs = 213L; + double doubleHigh = 123423.23423; + double doubleLow = 0.00001234233; + long doubleNulls = 92; + long doubleDVs = 1234123421L; + long strMaxLen = 1234; + double strAvgLen = 32.3; + long strNulls = 987; + long strDVs = 906; + long binMaxLen = 123412987L; + double binAvgLen = 76.98; + long binNulls = 976998797L; + Decimal decHigh = new Decimal(); + decHigh.setScale((short)3); + decHigh.setUnscaled("3876".getBytes()); // I have no clue how this is translated, but it + // doesn't matter + Decimal decLow = new Decimal(); + decLow.setScale((short)3); + decLow.setUnscaled("38".getBytes()); + long decNulls = 13; + long decDVs = 923947293L; + + List cols = new ArrayList(); + cols.add(new FieldSchema(boolcol, "boolean", "nocomment")); + cols.add(new FieldSchema(longcol, "long", "nocomment")); + cols.add(new FieldSchema(doublecol, "double", "nocomment")); + cols.add(new FieldSchema(stringcol, "varchar(32)", "nocomment")); + cols.add(new FieldSchema(binarycol, "binary", "nocomment")); + cols.add(new FieldSchema(decimalcol, "decimal(5, 3)", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, dbname, "me", (int)now / 1000, (int)now / 1000, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + // Do one column of each type + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(boolcol); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + BooleanColumnStatsData boolData = new BooleanColumnStatsData(); + boolData.setNumTrues(trues); + boolData.setNumFalses(falses); + boolData.setNumNulls(booleanNulls); + data.setBooleanStats(boolData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + obj = new ColumnStatisticsObj(); + obj.setColName(longcol); + obj.setColType("long"); + data = new ColumnStatisticsData(); + LongColumnStatsData longData = new LongColumnStatsData(); + longData.setHighValue(longHigh); + longData.setLowValue(longLow); + longData.setNumNulls(longNulls); + longData.setNumDVs(longDVs); + data.setLongStats(longData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + obj = new ColumnStatisticsObj(); + obj.setColName(doublecol); + obj.setColType("double"); + data = new ColumnStatisticsData(); + DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); + doubleData.setHighValue(doubleHigh); + doubleData.setLowValue(doubleLow); + doubleData.setNumNulls(doubleNulls); + doubleData.setNumDVs(doubleDVs); + data.setDoubleStats(doubleData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, + Arrays.asList(boolcol, longcol, doublecol)); + + // We'll check all of the individual values later. + Assert.assertEquals(3, stats.getStatsObjSize()); + + // check that we can fetch just some of the columns + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol)); + Assert.assertEquals(1, stats.getStatsObjSize()); + + stats = new ColumnStatistics(); + stats.setStatsDesc(desc); + + + obj = new ColumnStatisticsObj(); + obj.setColName(stringcol); + obj.setColType("string"); + data = new ColumnStatisticsData(); + StringColumnStatsData strData = new StringColumnStatsData(); + strData.setMaxColLen(strMaxLen); + strData.setAvgColLen(strAvgLen); + strData.setNumNulls(strNulls); + strData.setNumDVs(strDVs); + data.setStringStats(strData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + obj = new ColumnStatisticsObj(); + obj.setColName(binarycol); + obj.setColType("binary"); + data = new ColumnStatisticsData(); + BinaryColumnStatsData binData = new BinaryColumnStatsData(); + binData.setMaxColLen(binMaxLen); + binData.setAvgColLen(binAvgLen); + binData.setNumNulls(binNulls); + data.setBinaryStats(binData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + obj = new ColumnStatisticsObj(); + obj.setColName(decimalcol); + obj.setColType("decimal(5,3)"); + data = new ColumnStatisticsData(); + DecimalColumnStatsData decData = new DecimalColumnStatsData(); + LOG.debug("Setting decimal high value to " + decHigh.getScale() + " <" + new String(decHigh.getUnscaled()) + ">"); + decData.setHighValue(decHigh); + decData.setLowValue(decLow); + decData.setNumNulls(decNulls); + decData.setNumDVs(decDVs); + data.setDecimalStats(decData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, + Arrays.asList(boolcol, longcol, doublecol, stringcol, binarycol, decimalcol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(6, stats.getStatsObjSize()); + + ColumnStatisticsData colData = stats.getStatsObj().get(0).getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField()); + boolData = colData.getBooleanStats(); + Assert.assertEquals(trues, boolData.getNumTrues()); + Assert.assertEquals(falses, boolData.getNumFalses()); + Assert.assertEquals(booleanNulls, boolData.getNumNulls()); + + colData = stats.getStatsObj().get(1).getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, colData.getSetField()); + longData = colData.getLongStats(); + Assert.assertEquals(longHigh, longData.getHighValue()); + Assert.assertEquals(longLow, longData.getLowValue()); + Assert.assertEquals(longNulls, longData.getNumNulls()); + Assert.assertEquals(longDVs, longData.getNumDVs()); + + colData = stats.getStatsObj().get(2).getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, colData.getSetField()); + doubleData = colData.getDoubleStats(); + Assert.assertEquals(doubleHigh, doubleData.getHighValue(), 0.01); + Assert.assertEquals(doubleLow, doubleData.getLowValue(), 0.01); + Assert.assertEquals(doubleNulls, doubleData.getNumNulls()); + Assert.assertEquals(doubleDVs, doubleData.getNumDVs()); + + colData = stats.getStatsObj().get(3).getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, colData.getSetField()); + strData = colData.getStringStats(); + Assert.assertEquals(strMaxLen, strData.getMaxColLen()); + Assert.assertEquals(strAvgLen, strData.getAvgColLen(), 0.01); + Assert.assertEquals(strNulls, strData.getNumNulls()); + Assert.assertEquals(strDVs, strData.getNumDVs()); + + colData = stats.getStatsObj().get(4).getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.BINARY_STATS, colData.getSetField()); + binData = colData.getBinaryStats(); + Assert.assertEquals(binMaxLen, binData.getMaxColLen()); + Assert.assertEquals(binAvgLen, binData.getAvgColLen(), 0.01); + Assert.assertEquals(binNulls, binData.getNumNulls()); + + colData = stats.getStatsObj().get(5).getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, colData.getSetField()); + decData = colData.getDecimalStats(); + Assert.assertEquals(decHigh, decData.getHighValue()); + Assert.assertEquals(decLow, decData.getLowValue()); + Assert.assertEquals(decNulls, decData.getNumNulls()); + Assert.assertEquals(decDVs, decData.getNumDVs()); + + } + + @Test + public void partitionStatistics() throws Exception { + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statspart"; + String[] partNames = {"ds=today", "ds=yesterday"}; + String[] partVals = {"today", "yesterday"}; + String boolcol = "boolcol"; + String longcol = "longcol"; + String doublecol = "doublecol"; + String stringcol = "stringcol"; + String binarycol = "bincol"; + String decimalcol = "deccol"; + long trues = 37; + long falses = 12; + long booleanNulls = 2; + long strMaxLen = 1234; + double strAvgLen = 32.3; + long strNulls = 987; + long strDVs = 906; + + List cols = new ArrayList(); + cols.add(new FieldSchema(boolcol, "boolean", "nocomment")); + cols.add(new FieldSchema(longcol, "long", "nocomment")); + cols.add(new FieldSchema(doublecol, "double", "nocomment")); + cols.add(new FieldSchema(stringcol, "varchar(32)", "nocomment")); + cols.add(new FieldSchema(binarycol, "binary", "nocomment")); + cols.add(new FieldSchema(decimalcol, "decimal(5, 3)", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbname, "me", (int)now / 1000, (int)now / 1000, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + for (int i = 0; i < partNames.length; i++) { + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(false); + desc.setPartName(partNames[i]); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(boolcol); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + BooleanColumnStatsData boolData = new BooleanColumnStatsData(); + boolData.setNumTrues(trues); + boolData.setNumFalses(falses); + boolData.setNumNulls(booleanNulls); + data.setBooleanStats(boolData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updatePartitionColumnStatistics(stats, Arrays.asList(partVals[i])); + } + + List statsList = store.getPartitionColumnStatistics(dbname, tableName, + Arrays.asList(partNames), Arrays.asList(boolcol)); + + Assert.assertEquals(2, statsList.size()); + for (int i = 0; i < partNames.length; i++) { + Assert.assertEquals(1, statsList.get(i).getStatsObjSize()); + } + + for (int i = 0; i < partNames.length; i++) { + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(false); + desc.setPartName(partNames[i]); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(stringcol); + obj.setColType("string"); + ColumnStatisticsData data = new ColumnStatisticsData(); + StringColumnStatsData strData = new StringColumnStatsData(); + strData.setMaxColLen(strMaxLen); + strData.setAvgColLen(strAvgLen); + strData.setNumNulls(strNulls); + strData.setNumDVs(strDVs); + data.setStringStats(strData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updatePartitionColumnStatistics(stats, Arrays.asList(partVals[i])); + } + + // Make sure when we ask for one we only get one + statsList = store.getPartitionColumnStatistics(dbname, tableName, + Arrays.asList(partNames), Arrays.asList(boolcol)); + + Assert.assertEquals(2, statsList.size()); + for (int i = 0; i < partNames.length; i++) { + Assert.assertEquals(1, statsList.get(i).getStatsObjSize()); + } + + statsList = store.getPartitionColumnStatistics(dbname, tableName, + Arrays.asList(partNames), Arrays.asList(boolcol, stringcol)); + + Assert.assertEquals(2, statsList.size()); + for (int i = 0; i < partNames.length; i++) { + Assert.assertEquals(2, statsList.get(i).getStatsObjSize()); + // Just check one piece of the data, I don't need to check it all again + Assert.assertEquals(booleanNulls, + statsList.get(i).getStatsObj().get(0).getStatsData().getBooleanStats().getNumNulls()); + Assert.assertEquals(strDVs, + statsList.get(i).getStatsObj().get(1).getStatsData().getStringStats().getNumDVs()); + } + } +} diff --git metastore/if/hive_metastore.thrift metastore/if/hive_metastore.thrift index 8c50bf8..85bd362 100755 --- metastore/if/hive_metastore.thrift +++ metastore/if/hive_metastore.thrift @@ -1134,6 +1134,8 @@ service ThriftHiveMetastore extends fb303.FacebookService // Notification logging calls NotificationEventResponse get_next_notification(1:NotificationEventRequest rqst) CurrentNotificationEventId get_current_notificationEventId() + + void flushCache() } // * Note about the DDL_TIME: When creating or altering a table or a partition, diff --git metastore/pom.xml metastore/pom.xml index 948f22c..a7d5a71 100644 --- metastore/pom.xml +++ metastore/pom.xml @@ -163,6 +163,11 @@ ${hadoop-23.version} true + + org.apache.hbase + hbase-client + ${hbase.hadoop2.version} + diff --git metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp index 4c0efc6..ce1a5ae 100644 --- metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp +++ metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp @@ -27397,6 +27397,116 @@ uint32_t ThriftHiveMetastore_get_current_notificationEventId_presult::read(::apa return xfer; } +uint32_t ThriftHiveMetastore_flushCache_args::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +uint32_t ThriftHiveMetastore_flushCache_args::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + xfer += oprot->writeStructBegin("ThriftHiveMetastore_flushCache_args"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +uint32_t ThriftHiveMetastore_flushCache_pargs::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + xfer += oprot->writeStructBegin("ThriftHiveMetastore_flushCache_pargs"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +uint32_t ThriftHiveMetastore_flushCache_result::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +uint32_t ThriftHiveMetastore_flushCache_result::write(::apache::thrift::protocol::TProtocol* oprot) const { + + uint32_t xfer = 0; + + xfer += oprot->writeStructBegin("ThriftHiveMetastore_flushCache_result"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +uint32_t ThriftHiveMetastore_flushCache_presult::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + void ThriftHiveMetastoreClient::getMetaConf(std::string& _return, const std::string& key) { send_getMetaConf(key); @@ -35023,6 +35133,58 @@ void ThriftHiveMetastoreClient::recv_get_current_notificationEventId(CurrentNoti throw ::apache::thrift::TApplicationException(::apache::thrift::TApplicationException::MISSING_RESULT, "get_current_notificationEventId failed: unknown result"); } +void ThriftHiveMetastoreClient::flushCache() +{ + send_flushCache(); + recv_flushCache(); +} + +void ThriftHiveMetastoreClient::send_flushCache() +{ + int32_t cseqid = 0; + oprot_->writeMessageBegin("flushCache", ::apache::thrift::protocol::T_CALL, cseqid); + + ThriftHiveMetastore_flushCache_pargs args; + args.write(oprot_); + + oprot_->writeMessageEnd(); + oprot_->getTransport()->writeEnd(); + oprot_->getTransport()->flush(); +} + +void ThriftHiveMetastoreClient::recv_flushCache() +{ + + int32_t rseqid = 0; + std::string fname; + ::apache::thrift::protocol::TMessageType mtype; + + iprot_->readMessageBegin(fname, mtype, rseqid); + if (mtype == ::apache::thrift::protocol::T_EXCEPTION) { + ::apache::thrift::TApplicationException x; + x.read(iprot_); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + throw x; + } + if (mtype != ::apache::thrift::protocol::T_REPLY) { + iprot_->skip(::apache::thrift::protocol::T_STRUCT); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + } + if (fname.compare("flushCache") != 0) { + iprot_->skip(::apache::thrift::protocol::T_STRUCT); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + } + ThriftHiveMetastore_flushCache_presult result; + result.read(iprot_); + iprot_->readMessageEnd(); + iprot_->getTransport()->readEnd(); + + return; +} + bool ThriftHiveMetastoreProcessor::dispatchCall(::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, const std::string& fname, int32_t seqid, void* callContext) { ProcessMap::iterator pfn; pfn = processMap_.find(fname); @@ -42141,6 +42303,59 @@ void ThriftHiveMetastoreProcessor::process_get_current_notificationEventId(int32 } } +void ThriftHiveMetastoreProcessor::process_flushCache(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext) +{ + void* ctx = NULL; + if (this->eventHandler_.get() != NULL) { + ctx = this->eventHandler_->getContext("ThriftHiveMetastore.flushCache", callContext); + } + ::apache::thrift::TProcessorContextFreer freer(this->eventHandler_.get(), ctx, "ThriftHiveMetastore.flushCache"); + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->preRead(ctx, "ThriftHiveMetastore.flushCache"); + } + + ThriftHiveMetastore_flushCache_args args; + args.read(iprot); + iprot->readMessageEnd(); + uint32_t bytes = iprot->getTransport()->readEnd(); + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->postRead(ctx, "ThriftHiveMetastore.flushCache", bytes); + } + + ThriftHiveMetastore_flushCache_result result; + try { + iface_->flushCache(); + } catch (const std::exception& e) { + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->handlerError(ctx, "ThriftHiveMetastore.flushCache"); + } + + ::apache::thrift::TApplicationException x(e.what()); + oprot->writeMessageBegin("flushCache", ::apache::thrift::protocol::T_EXCEPTION, seqid); + x.write(oprot); + oprot->writeMessageEnd(); + oprot->getTransport()->writeEnd(); + oprot->getTransport()->flush(); + return; + } + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->preWrite(ctx, "ThriftHiveMetastore.flushCache"); + } + + oprot->writeMessageBegin("flushCache", ::apache::thrift::protocol::T_REPLY, seqid); + result.write(oprot); + oprot->writeMessageEnd(); + bytes = oprot->getTransport()->writeEnd(); + oprot->getTransport()->flush(); + + if (this->eventHandler_.get() != NULL) { + this->eventHandler_->postWrite(ctx, "ThriftHiveMetastore.flushCache", bytes); + } +} + ::boost::shared_ptr< ::apache::thrift::TProcessor > ThriftHiveMetastoreProcessorFactory::getProcessor(const ::apache::thrift::TConnectionInfo& connInfo) { ::apache::thrift::ReleaseHandler< ThriftHiveMetastoreIfFactory > cleanup(handlerFactory_); ::boost::shared_ptr< ThriftHiveMetastoreIf > handler(handlerFactory_->getHandler(connInfo), cleanup); diff --git metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h index 488c746..f11d29a 100644 --- metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h +++ metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h @@ -135,6 +135,7 @@ class ThriftHiveMetastoreIf : virtual public ::facebook::fb303::FacebookService virtual void show_compact(ShowCompactResponse& _return, const ShowCompactRequest& rqst) = 0; virtual void get_next_notification(NotificationEventResponse& _return, const NotificationEventRequest& rqst) = 0; virtual void get_current_notificationEventId(CurrentNotificationEventId& _return) = 0; + virtual void flushCache() = 0; }; class ThriftHiveMetastoreIfFactory : virtual public ::facebook::fb303::FacebookServiceIfFactory { @@ -544,6 +545,9 @@ class ThriftHiveMetastoreNull : virtual public ThriftHiveMetastoreIf , virtual p void get_current_notificationEventId(CurrentNotificationEventId& /* _return */) { return; } + void flushCache() { + return; + } }; typedef struct _ThriftHiveMetastore_getMetaConf_args__isset { @@ -16777,6 +16781,80 @@ class ThriftHiveMetastore_get_current_notificationEventId_presult { }; + +class ThriftHiveMetastore_flushCache_args { + public: + + ThriftHiveMetastore_flushCache_args() { + } + + virtual ~ThriftHiveMetastore_flushCache_args() throw() {} + + + bool operator == (const ThriftHiveMetastore_flushCache_args & /* rhs */) const + { + return true; + } + bool operator != (const ThriftHiveMetastore_flushCache_args &rhs) const { + return !(*this == rhs); + } + + bool operator < (const ThriftHiveMetastore_flushCache_args & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + + +class ThriftHiveMetastore_flushCache_pargs { + public: + + + virtual ~ThriftHiveMetastore_flushCache_pargs() throw() {} + + + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + + +class ThriftHiveMetastore_flushCache_result { + public: + + ThriftHiveMetastore_flushCache_result() { + } + + virtual ~ThriftHiveMetastore_flushCache_result() throw() {} + + + bool operator == (const ThriftHiveMetastore_flushCache_result & /* rhs */) const + { + return true; + } + bool operator != (const ThriftHiveMetastore_flushCache_result &rhs) const { + return !(*this == rhs); + } + + bool operator < (const ThriftHiveMetastore_flushCache_result & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + + +class ThriftHiveMetastore_flushCache_presult { + public: + + + virtual ~ThriftHiveMetastore_flushCache_presult() throw() {} + + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + +}; + class ThriftHiveMetastoreClient : virtual public ThriftHiveMetastoreIf, public ::facebook::fb303::FacebookServiceClient { public: ThriftHiveMetastoreClient(boost::shared_ptr< ::apache::thrift::protocol::TProtocol> prot) : @@ -17146,6 +17224,9 @@ class ThriftHiveMetastoreClient : virtual public ThriftHiveMetastoreIf, public void get_current_notificationEventId(CurrentNotificationEventId& _return); void send_get_current_notificationEventId(); void recv_get_current_notificationEventId(CurrentNotificationEventId& _return); + void flushCache(); + void send_flushCache(); + void recv_flushCache(); }; class ThriftHiveMetastoreProcessor : public ::facebook::fb303::FacebookServiceProcessor { @@ -17275,6 +17356,7 @@ class ThriftHiveMetastoreProcessor : public ::facebook::fb303::FacebookServiceP void process_show_compact(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); void process_get_next_notification(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); void process_get_current_notificationEventId(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); + void process_flushCache(int32_t seqid, ::apache::thrift::protocol::TProtocol* iprot, ::apache::thrift::protocol::TProtocol* oprot, void* callContext); public: ThriftHiveMetastoreProcessor(boost::shared_ptr iface) : ::facebook::fb303::FacebookServiceProcessor(iface), @@ -17398,6 +17480,7 @@ class ThriftHiveMetastoreProcessor : public ::facebook::fb303::FacebookServiceP processMap_["show_compact"] = &ThriftHiveMetastoreProcessor::process_show_compact; processMap_["get_next_notification"] = &ThriftHiveMetastoreProcessor::process_get_next_notification; processMap_["get_current_notificationEventId"] = &ThriftHiveMetastoreProcessor::process_get_current_notificationEventId; + processMap_["flushCache"] = &ThriftHiveMetastoreProcessor::process_flushCache; } virtual ~ThriftHiveMetastoreProcessor() {} @@ -18572,6 +18655,15 @@ class ThriftHiveMetastoreMultiface : virtual public ThriftHiveMetastoreIf, publi return; } + void flushCache() { + size_t sz = ifaces_.size(); + size_t i = 0; + for (; i < (sz - 1); ++i) { + ifaces_[i]->flushCache(); + } + ifaces_[i]->flushCache(); + } + }; }}} // namespace diff --git metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp index e2d41c1..3d1e1ad 100644 --- metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp +++ metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp @@ -617,6 +617,11 @@ class ThriftHiveMetastoreHandler : virtual public ThriftHiveMetastoreIf { printf("get_current_notificationEventId\n"); } + void flushCache() { + // Your implementation goes here + printf("flushCache\n"); + } + }; int main(int argc, char **argv) { diff --git metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SkewedInfo.java metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SkewedInfo.java index 9df36b6..83438c7 100644 --- metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SkewedInfo.java +++ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SkewedInfo.java @@ -184,7 +184,7 @@ public SkewedInfo(SkewedInfo other) { __this__skewedColValueLocationMaps.put(__this__skewedColValueLocationMaps_copy_key, __this__skewedColValueLocationMaps_copy_value); } - this.skewedColValueLocationMaps = __this__skewedColValueLocationMaps; + this.skewedColValueLocationMaps = __this__skewedColValueLocationMaps; } } diff --git metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java index a72061e..64b6ed8 100644 --- metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java +++ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java @@ -276,6 +276,8 @@ public CurrentNotificationEventId get_current_notificationEventId() throws org.apache.thrift.TException; + public void flushCache() throws org.apache.thrift.TException; + } public interface AsyncIface extends com.facebook.fb303.FacebookService .AsyncIface { @@ -518,6 +520,8 @@ public void get_current_notificationEventId(org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + public void flushCache(org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException; + } public static class Client extends com.facebook.fb303.FacebookService.Client implements Iface { @@ -4053,6 +4057,25 @@ public CurrentNotificationEventId recv_get_current_notificationEventId() throws throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "get_current_notificationEventId failed: unknown result"); } + public void flushCache() throws org.apache.thrift.TException + { + send_flushCache(); + recv_flushCache(); + } + + public void send_flushCache() throws org.apache.thrift.TException + { + flushCache_args args = new flushCache_args(); + sendBase("flushCache", args); + } + + public void recv_flushCache() throws org.apache.thrift.TException + { + flushCache_result result = new flushCache_result(); + receiveBase(result, "flushCache"); + return; + } + } public static class AsyncClient extends com.facebook.fb303.FacebookService.AsyncClient implements AsyncIface { public static class Factory implements org.apache.thrift.async.TAsyncClientFactory { @@ -8317,6 +8340,35 @@ public CurrentNotificationEventId getResult() throws org.apache.thrift.TExceptio } } + public void flushCache(org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException { + checkReady(); + flushCache_call method_call = new flushCache_call(resultHandler, this, ___protocolFactory, ___transport); + this.___currentMethod = method_call; + ___manager.call(method_call); + } + + public static class flushCache_call extends org.apache.thrift.async.TAsyncMethodCall { + public flushCache_call(org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException { + super(client, protocolFactory, transport, resultHandler, false); + } + + public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException { + prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("flushCache", org.apache.thrift.protocol.TMessageType.CALL, 0)); + flushCache_args args = new flushCache_args(); + args.write(prot); + prot.writeMessageEnd(); + } + + public void getResult() throws org.apache.thrift.TException { + if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) { + throw new IllegalStateException("Method call not finished!"); + } + org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array()); + org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport); + (new Client(prot)).recv_flushCache(); + } + } + } public static class Processor extends com.facebook.fb303.FacebookService.Processor implements org.apache.thrift.TProcessor { @@ -8449,6 +8501,7 @@ protected Processor(I iface, Map extends org.apache.thrift.ProcessFunction { + public flushCache() { + super("flushCache"); + } + + public flushCache_args getEmptyArgsInstance() { + return new flushCache_args(); + } + + protected boolean isOneway() { + return false; + } + + public flushCache_result getResult(I iface, flushCache_args args) throws org.apache.thrift.TException { + flushCache_result result = new flushCache_result(); + iface.flushCache(); + return result; + } + } + } public static class getMetaConf_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { @@ -137532,4 +137605,496 @@ public void read(org.apache.thrift.protocol.TProtocol prot, get_current_notifica } + public static class flushCache_args implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("flushCache_args"); + + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new flushCache_argsStandardSchemeFactory()); + schemes.put(TupleScheme.class, new flushCache_argsTupleSchemeFactory()); + } + + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { +; + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(flushCache_args.class, metaDataMap); + } + + public flushCache_args() { + } + + /** + * Performs a deep copy on other. + */ + public flushCache_args(flushCache_args other) { + } + + public flushCache_args deepCopy() { + return new flushCache_args(this); + } + + @Override + public void clear() { + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof flushCache_args) + return this.equals((flushCache_args)that); + return false; + } + + public boolean equals(flushCache_args that) { + if (that == null) + return false; + + return true; + } + + @Override + public int hashCode() { + HashCodeBuilder builder = new HashCodeBuilder(); + + return builder.toHashCode(); + } + + public int compareTo(flushCache_args other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + flushCache_args typedOther = (flushCache_args)other; + + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("flushCache_args("); + boolean first = true; + + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + // check for sub-struct validity + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class flushCache_argsStandardSchemeFactory implements SchemeFactory { + public flushCache_argsStandardScheme getScheme() { + return new flushCache_argsStandardScheme(); + } + } + + private static class flushCache_argsStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, flushCache_args struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, flushCache_args struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class flushCache_argsTupleSchemeFactory implements SchemeFactory { + public flushCache_argsTupleScheme getScheme() { + return new flushCache_argsTupleScheme(); + } + } + + private static class flushCache_argsTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, flushCache_args struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, flushCache_args struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + } + } + + } + + public static class flushCache_result implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("flushCache_result"); + + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new flushCache_resultStandardSchemeFactory()); + schemes.put(TupleScheme.class, new flushCache_resultTupleSchemeFactory()); + } + + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { +; + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(flushCache_result.class, metaDataMap); + } + + public flushCache_result() { + } + + /** + * Performs a deep copy on other. + */ + public flushCache_result(flushCache_result other) { + } + + public flushCache_result deepCopy() { + return new flushCache_result(this); + } + + @Override + public void clear() { + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof flushCache_result) + return this.equals((flushCache_result)that); + return false; + } + + public boolean equals(flushCache_result that) { + if (that == null) + return false; + + return true; + } + + @Override + public int hashCode() { + HashCodeBuilder builder = new HashCodeBuilder(); + + return builder.toHashCode(); + } + + public int compareTo(flushCache_result other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + flushCache_result typedOther = (flushCache_result)other; + + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("flushCache_result("); + boolean first = true; + + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + // check for sub-struct validity + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class flushCache_resultStandardSchemeFactory implements SchemeFactory { + public flushCache_resultStandardScheme getScheme() { + return new flushCache_resultStandardScheme(); + } + } + + private static class flushCache_resultStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, flushCache_result struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, flushCache_result struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class flushCache_resultTupleSchemeFactory implements SchemeFactory { + public flushCache_resultTupleScheme getScheme() { + return new flushCache_resultTupleScheme(); + } + } + + private static class flushCache_resultTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, flushCache_result struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, flushCache_result struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + } + } + + } + } diff --git metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php index 2d1f935..7136bd7 100644 --- metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php +++ metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php @@ -135,6 +135,7 @@ interface ThriftHiveMetastoreIf extends \FacebookServiceIf { public function show_compact(\metastore\ShowCompactRequest $rqst); public function get_next_notification(\metastore\NotificationEventRequest $rqst); public function get_current_notificationEventId(); + public function flushCache(); } class ThriftHiveMetastoreClient extends \FacebookServiceClient implements \metastore\ThriftHiveMetastoreIf { @@ -6987,6 +6988,53 @@ class ThriftHiveMetastoreClient extends \FacebookServiceClient implements \metas throw new \Exception("get_current_notificationEventId failed: unknown result"); } + public function flushCache() + { + $this->send_flushCache(); + $this->recv_flushCache(); + } + + public function send_flushCache() + { + $args = new \metastore\ThriftHiveMetastore_flushCache_args(); + $bin_accel = ($this->output_ instanceof TProtocol::$TBINARYPROTOCOLACCELERATED) && function_exists('thrift_protocol_write_binary'); + if ($bin_accel) + { + thrift_protocol_write_binary($this->output_, 'flushCache', TMessageType::CALL, $args, $this->seqid_, $this->output_->isStrictWrite()); + } + else + { + $this->output_->writeMessageBegin('flushCache', TMessageType::CALL, $this->seqid_); + $args->write($this->output_); + $this->output_->writeMessageEnd(); + $this->output_->getTransport()->flush(); + } + } + + public function recv_flushCache() + { + $bin_accel = ($this->input_ instanceof TProtocol::$TBINARYPROTOCOLACCELERATED) && function_exists('thrift_protocol_read_binary'); + if ($bin_accel) $result = thrift_protocol_read_binary($this->input_, '\metastore\ThriftHiveMetastore_flushCache_result', $this->input_->isStrictRead()); + else + { + $rseqid = 0; + $fname = null; + $mtype = 0; + + $this->input_->readMessageBegin($fname, $mtype, $rseqid); + if ($mtype == TMessageType::EXCEPTION) { + $x = new TApplicationException(); + $x->read($this->input_); + $this->input_->readMessageEnd(); + throw $x; + } + $result = new \metastore\ThriftHiveMetastore_flushCache_result(); + $result->read($this->input_); + $this->input_->readMessageEnd(); + } + return; + } + } // HELPER FUNCTIONS AND STRUCTURES @@ -33615,4 +33663,104 @@ class ThriftHiveMetastore_get_current_notificationEventId_result { } +class ThriftHiveMetastore_flushCache_args { + static $_TSPEC; + + + public function __construct() { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + ); + } + } + + public function getName() { + return 'ThriftHiveMetastore_flushCache_args'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('ThriftHiveMetastore_flushCache_args'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + +class ThriftHiveMetastore_flushCache_result { + static $_TSPEC; + + + public function __construct() { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + ); + } + } + + public function getName() { + return 'ThriftHiveMetastore_flushCache_result'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('ThriftHiveMetastore_flushCache_result'); + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + diff --git metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote old mode 100644 new mode 100755 index 197b35e..51e041d --- metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote +++ metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote @@ -142,6 +142,7 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help': print ' ShowCompactResponse show_compact(ShowCompactRequest rqst)' print ' NotificationEventResponse get_next_notification(NotificationEventRequest rqst)' print ' CurrentNotificationEventId get_current_notificationEventId()' + print ' void flushCache()' print '' sys.exit(0) @@ -907,6 +908,12 @@ elif cmd == 'get_current_notificationEventId': sys.exit(1) pp.pprint(client.get_current_notificationEventId()) +elif cmd == 'flushCache': + if len(args) != 0: + print 'flushCache requires 0 args' + sys.exit(1) + pp.pprint(client.flushCache()) + else: print 'Unrecognized method %s' % cmd sys.exit(1) diff --git metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py index 1357e41..57b861f 100644 --- metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py +++ metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py @@ -986,6 +986,9 @@ def get_next_notification(self, rqst): def get_current_notificationEventId(self, ): pass + def flushCache(self, ): + pass + class Client(fb303.FacebookService.Client, Iface): """ @@ -5261,6 +5264,29 @@ def recv_get_current_notificationEventId(self, ): return result.success raise TApplicationException(TApplicationException.MISSING_RESULT, "get_current_notificationEventId failed: unknown result"); + def flushCache(self, ): + self.send_flushCache() + self.recv_flushCache() + + def send_flushCache(self, ): + self._oprot.writeMessageBegin('flushCache', TMessageType.CALL, self._seqid) + args = flushCache_args() + args.write(self._oprot) + self._oprot.writeMessageEnd() + self._oprot.trans.flush() + + def recv_flushCache(self, ): + (fname, mtype, rseqid) = self._iprot.readMessageBegin() + if mtype == TMessageType.EXCEPTION: + x = TApplicationException() + x.read(self._iprot) + self._iprot.readMessageEnd() + raise x + result = flushCache_result() + result.read(self._iprot) + self._iprot.readMessageEnd() + return + class Processor(fb303.FacebookService.Processor, Iface, TProcessor): def __init__(self, handler): @@ -5384,6 +5410,7 @@ def __init__(self, handler): self._processMap["show_compact"] = Processor.process_show_compact self._processMap["get_next_notification"] = Processor.process_get_next_notification self._processMap["get_current_notificationEventId"] = Processor.process_get_current_notificationEventId + self._processMap["flushCache"] = Processor.process_flushCache def process(self, iprot, oprot): (name, type, seqid) = iprot.readMessageBegin() @@ -7291,6 +7318,17 @@ def process_get_current_notificationEventId(self, seqid, iprot, oprot): oprot.writeMessageEnd() oprot.trans.flush() + def process_flushCache(self, seqid, iprot, oprot): + args = flushCache_args() + args.read(iprot) + iprot.readMessageEnd() + result = flushCache_result() + self._handler.flushCache() + oprot.writeMessageBegin("flushCache", TMessageType.REPLY, seqid) + result.write(oprot) + oprot.writeMessageEnd() + oprot.trans.flush() + # HELPER FUNCTIONS AND STRUCTURES @@ -26492,3 +26530,87 @@ def __eq__(self, other): def __ne__(self, other): return not (self == other) + +class flushCache_args: + + thrift_spec = ( + ) + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('flushCache_args') + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + +class flushCache_result: + + thrift_spec = ( + ) + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('flushCache_result') + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) diff --git metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb index a383e0b..2244bfa 100644 --- metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb +++ metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb @@ -2007,6 +2007,20 @@ module ThriftHiveMetastore raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_current_notificationEventId failed: unknown result') end + def flushCache() + send_flushCache() + recv_flushCache() + end + + def send_flushCache() + send_message('flushCache', FlushCache_args) + end + + def recv_flushCache() + result = receive_message(FlushCache_result) + return + end + end class Processor < ::FacebookService::Processor @@ -3537,6 +3551,13 @@ module ThriftHiveMetastore write_result(result, oprot, 'get_current_notificationEventId', seqid) end + def process_flushCache(seqid, iprot, oprot) + args = read_args(iprot, FlushCache_args) + result = FlushCache_result.new() + @handler.flushCache() + write_result(result, oprot, 'flushCache', seqid) + end + end # HELPER FUNCTIONS AND STRUCTURES @@ -8088,5 +8109,35 @@ module ThriftHiveMetastore ::Thrift::Struct.generate_accessors self end + class FlushCache_args + include ::Thrift::Struct, ::Thrift::Struct_Union + + FIELDS = { + + } + + def struct_fields; FIELDS; end + + def validate + end + + ::Thrift::Struct.generate_accessors self + end + + class FlushCache_result + include ::Thrift::Struct, ::Thrift::Struct_Union + + FIELDS = { + + } + + def struct_fields; FIELDS; end + + def validate + end + + ::Thrift::Struct.generate_accessors self + end + end diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index caad948..90545a9 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -574,6 +574,19 @@ private RawStore newRawStore() throws MetaException { + rawStoreClassName)); Configuration conf = getConf(); + if (hiveConf.getBoolVar(ConfVars.METASTORE_FASTPATH)) { + LOG.info("Fastpath, skipping raw store proxy"); + try { + RawStore rs = ((Class) MetaStoreUtils.getClass( + rawStoreClassName)).newInstance(); + rs.setConf(conf); + return rs; + } catch (Exception e) { + LOG.fatal("Unable to instantiate raw store directly in fastpath mode"); + throw new RuntimeException(e); + } + } + return RawStoreProxy.getProxy(hiveConf, conf, rawStoreClassName, threadLocalId.get()); } @@ -5448,6 +5461,11 @@ public ShowCompactResponse show_compact(ShowCompactRequest rqst) throws TExcepti } @Override + public void flushCache() throws TException { + getMS().flushCache(); + } + + @Override public GetPrincipalsInRoleResponse get_principals_in_role(GetPrincipalsInRoleRequest request) throws MetaException, TException { diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 4285c94..95052c2 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -153,6 +153,7 @@ private URI metastoreUris[]; private final HiveMetaHookLoader hookLoader; protected final HiveConf conf; + protected boolean fastpath = false; private String tokenStrForm; private final boolean localMetaStore; private final MetaStoreFilterHook filterHook; @@ -185,10 +186,20 @@ public HiveMetaStoreClient(HiveConf conf, HiveMetaHookLoader hookLoader) if (localMetaStore) { // instantiate the metastore server handler directly instead of connecting // through the network - client = HiveMetaStore.newRetryingHMSHandler("hive client", conf, true); + if (conf.getBoolVar(ConfVars.METASTORE_FASTPATH)) { + client = new HiveMetaStore.HMSHandler("hive client", conf, true); + fastpath = true; + } else { + client = HiveMetaStore.newRetryingHMSHandler("hive client", conf, true); + } isConnected = true; snapshotActiveConf(); return; + } else { + if (conf.getBoolVar(ConfVars.METASTORE_FASTPATH)) { + throw new RuntimeException("You can't set hive.metastore.fastpath to true when you're " + + "talking to the thrift metastore service. You must run the metastore locally."); + } } // get the number retries @@ -514,7 +525,8 @@ public Partition add_partition(Partition new_part) public Partition add_partition(Partition new_part, EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, MetaException, TException { - return deepCopy(client.add_partition_with_environment_context(new_part, envContext)); + Partition p = client.add_partition_with_environment_context(new_part, envContext); + return fastpath ? p : deepCopy(p); } /** @@ -574,8 +586,9 @@ public Partition appendPartition(String db_name, String table_name, public Partition appendPartition(String db_name, String table_name, List part_vals, EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, MetaException, TException { - return deepCopy(client.append_partition_with_environment_context(db_name, table_name, - part_vals, envContext)); + Partition p = client.append_partition_with_environment_context(db_name, table_name, + part_vals, envContext); + return fastpath ? p : deepCopy(p); } @Override @@ -587,8 +600,9 @@ public Partition appendPartition(String dbName, String tableName, String partNam public Partition appendPartition(String dbName, String tableName, String partName, EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, MetaException, TException { - return deepCopy(client.append_partition_by_name_with_environment_context(dbName, tableName, - partName, envContext)); + Partition p = client.append_partition_by_name_with_environment_context(dbName, tableName, + partName, envContext); + return fastpath ? p : deepCopy(p); } /** @@ -980,8 +994,8 @@ public boolean dropType(String type) throws NoSuchObjectException, MetaException @Override public List listPartitions(String db_name, String tbl_name, short max_parts) throws NoSuchObjectException, MetaException, TException { - return deepCopyPartitions(filterHook.filterPartitions( - client.get_partitions(db_name, tbl_name, max_parts))); + List parts = client.get_partitions(db_name, tbl_name, max_parts); + return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override @@ -994,16 +1008,17 @@ public PartitionSpecProxy listPartitionSpecs(String dbName, String tableName, in public List listPartitions(String db_name, String tbl_name, List part_vals, short max_parts) throws NoSuchObjectException, MetaException, TException { - return deepCopyPartitions(filterHook.filterPartitions( - client.get_partitions_ps(db_name, tbl_name, part_vals, max_parts))); + List parts = client.get_partitions_ps(db_name, tbl_name, part_vals, max_parts); + return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override public List listPartitionsWithAuthInfo(String db_name, String tbl_name, short max_parts, String user_name, List group_names) throws NoSuchObjectException, MetaException, TException { - return deepCopyPartitions(filterHook.filterPartitions( - client.get_partitions_with_auth(db_name, tbl_name, max_parts, user_name, group_names))); + List parts = client.get_partitions_with_auth(db_name, tbl_name, max_parts, + user_name, group_names); + return fastpath ? parts :deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override @@ -1011,8 +1026,9 @@ public PartitionSpecProxy listPartitionSpecs(String dbName, String tableName, in String tbl_name, List part_vals, short max_parts, String user_name, List group_names) throws NoSuchObjectException, MetaException, TException { - return deepCopyPartitions(filterHook.filterPartitions(client.get_partitions_ps_with_auth(db_name, - tbl_name, part_vals, max_parts, user_name, group_names))); + List parts = client.get_partitions_ps_with_auth(db_name, + tbl_name, part_vals, max_parts, user_name, group_names); + return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } /** @@ -1033,8 +1049,8 @@ public PartitionSpecProxy listPartitionSpecs(String dbName, String tableName, in public List listPartitionsByFilter(String db_name, String tbl_name, String filter, short max_parts) throws MetaException, NoSuchObjectException, TException { - return deepCopyPartitions(filterHook.filterPartitions( - client.get_partitions_by_filter(db_name, tbl_name, filter, max_parts))); + List parts = client.get_partitions_by_filter(db_name, tbl_name, filter, max_parts); + return fastpath ? parts :deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override @@ -1070,9 +1086,13 @@ public boolean listPartitionsByExpr(String db_name, String tbl_name, byte[] expr throw new IncompatibleMetastoreException( "Metastore doesn't support listPartitionsByExpr: " + te.getMessage()); } - r.setPartitions(filterHook.filterPartitions(r.getPartitions())); - // TODO: in these methods, do we really need to deepcopy? - deepCopyPartitions(r.getPartitions(), result); + if (fastpath) { + result.addAll(r.getPartitions()); + } else { + r.setPartitions(filterHook.filterPartitions(r.getPartitions())); + // TODO: in these methods, do we really need to deepcopy? + deepCopyPartitions(r.getPartitions(), result); + } return !r.isSetHasUnknownPartitions() || r.isHasUnknownPartitions(); // Assume the worst. } @@ -1088,7 +1108,8 @@ public boolean listPartitionsByExpr(String db_name, String tbl_name, byte[] expr @Override public Database getDatabase(String name) throws NoSuchObjectException, MetaException, TException { - return deepCopy(filterHook.filterDatabase(client.get_database(name))); + Database d = client.get_database(name); + return fastpath ? d :deepCopy(filterHook.filterDatabase(d)); } /** @@ -1104,15 +1125,15 @@ public Database getDatabase(String name) throws NoSuchObjectException, @Override public Partition getPartition(String db_name, String tbl_name, List part_vals) throws NoSuchObjectException, MetaException, TException { - return deepCopy(filterHook.filterPartition( - client.get_partition(db_name, tbl_name, part_vals))); + Partition p = client.get_partition(db_name, tbl_name, part_vals); + return fastpath ? p : deepCopy(filterHook.filterPartition(p)); } @Override public List getPartitionsByNames(String db_name, String tbl_name, List part_names) throws NoSuchObjectException, MetaException, TException { - return deepCopyPartitions(filterHook.filterPartitions( - client.get_partitions_by_names(db_name, tbl_name, part_names))); + List parts = client.get_partitions_by_names(db_name, tbl_name, part_names); + return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override @@ -1120,8 +1141,9 @@ public Partition getPartitionWithAuthInfo(String db_name, String tbl_name, List part_vals, String user_name, List group_names) throws MetaException, UnknownTableException, NoSuchObjectException, TException { - return deepCopy(filterHook.filterPartition(client.get_partition_with_auth(db_name, - tbl_name, part_vals, user_name, group_names))); + Partition p = client.get_partition_with_auth(db_name, tbl_name, part_vals, user_name, + group_names); + return fastpath ? p : deepCopy(filterHook.filterPartition(p)); } /** @@ -1138,7 +1160,8 @@ public Partition getPartitionWithAuthInfo(String db_name, String tbl_name, @Override public Table getTable(String dbname, String name) throws MetaException, TException, NoSuchObjectException { - return deepCopy(filterHook.filterTable(client.get_table(dbname, name))); + Table t = client.get_table(dbname, name); + return fastpath ? t : deepCopy(filterHook.filterTable(t)); } /** {@inheritDoc} */ @@ -1146,15 +1169,16 @@ public Table getTable(String dbname, String name) throws MetaException, @Deprecated public Table getTable(String tableName) throws MetaException, TException, NoSuchObjectException { - return filterHook.filterTable(getTable(DEFAULT_DATABASE_NAME, tableName)); + Table t = getTable(DEFAULT_DATABASE_NAME, tableName); + return fastpath ? t : filterHook.filterTable(t); } /** {@inheritDoc} */ @Override public List getTableObjectsByName(String dbName, List tableNames) throws MetaException, InvalidOperationException, UnknownDBException, TException { - return deepCopyTables(filterHook.filterTables( - client.get_table_objects_by_name(dbName, tableNames))); + List
tabs = client.get_table_objects_by_name(dbName, tableNames); + return fastpath ? tabs : deepCopyTables(filterHook.filterTables(tabs)); } /** {@inheritDoc} */ @@ -1263,7 +1287,8 @@ public void alterDatabase(String dbName, Database db) public List getFields(String db, String tableName) throws MetaException, TException, UnknownTableException, UnknownDBException { - return deepCopyFieldSchemas(client.get_fields(db, tableName)); + List fields = client.get_fields(db, tableName); + return fastpath ? fields : deepCopyFieldSchemas(fields); } /** @@ -1371,6 +1396,16 @@ public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) return client.set_aggr_stats_for(request); } + @Override + public void flushCache() { + try { + client.flushCache(); + } catch (TException e) { + // Not much we can do about it honestly + LOG.warn("Got error flushing the cache", e); + } + } + /** {@inheritDoc} */ @Override public List getTableColumnStatistics(String dbName, String tableName, @@ -1421,7 +1456,8 @@ public boolean deleteTableColumnStatistics(String dbName, String tableName, Stri public List getSchema(String db, String tableName) throws MetaException, TException, UnknownTableException, UnknownDBException { - return deepCopyFieldSchemas(client.get_schema(db, tableName)); + List fields = client.get_schema(db, tableName); + return fastpath ? fields : deepCopyFieldSchemas(fields); } @Override @@ -1433,8 +1469,8 @@ public String getConfigValue(String name, String defaultValue) @Override public Partition getPartition(String db, String tableName, String partName) throws MetaException, TException, UnknownTableException, NoSuchObjectException { - return deepCopy( - filterHook.filterPartition(client.get_partition_by_name(db, tableName, partName))); + Partition p = client.get_partition_by_name(db, tableName, partName); + return fastpath ? p : deepCopy(filterHook.filterPartition(p)); } public Partition appendPartitionByName(String dbName, String tableName, String partName) @@ -1445,8 +1481,9 @@ public Partition appendPartitionByName(String dbName, String tableName, String p public Partition appendPartitionByName(String dbName, String tableName, String partName, EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, MetaException, TException { - return deepCopy(client.append_partition_by_name_with_environment_context(dbName, tableName, - partName, envContext)); + Partition p = client.append_partition_by_name_with_environment_context(dbName, tableName, + partName, envContext); + return fastpath ? p : deepCopy(p); } public boolean dropPartitionByName(String dbName, String tableName, String partName, @@ -1948,7 +1985,8 @@ public void dropFunction(String dbName, String funcName) @Override public Function getFunction(String dbName, String funcName) throws MetaException, TException { - return deepCopy(client.get_function(dbName, funcName)); + Function f = client.get_function(dbName, funcName); + return fastpath ? f : deepCopy(f); } @Override diff --git metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java index 93660db..dbbaedb 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java @@ -1379,4 +1379,10 @@ public AggrStats getAggrColStatsFor(String dbName, String tblName, List colNames, List partName) throws NoSuchObjectException, MetaException, TException; boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException; + + /** + * Flush any catalog objects held by the metastore implementation. Note that this does not + * flush statistics objects. This should be called at the beginning of each query. + */ + void flushCache(); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index e689853..0d6f149 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -6115,6 +6115,11 @@ protected String describeResult() { }.run(true); } + @Override + public void flushCache() { + // NOP as there's no caching + } + private List getMPartitionColumnStatistics( Table table, List partNames, List colNames) throws NoSuchObjectException, MetaException { diff --git metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java index 2b49eab..acdda3e 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -589,5 +589,10 @@ public AggrStats get_aggr_stats_for(String dbName, String tblName, * @return */ public CurrentNotificationEventId getCurrentNotificationEventId(); - + + /* + * Flush any catalog objects held by the metastore implementation. Note that this does not + * flush statistics objects. This should be called at the beginning of each query. + */ + public void flushCache(); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java index 7c9bedb..baf6014 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java @@ -26,6 +26,8 @@ import java.util.List; import org.apache.commons.lang.ClassUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; @@ -37,6 +39,8 @@ @InterfaceStability.Evolving public class RawStoreProxy implements InvocationHandler { + static final private Log LOG = LogFactory.getLog(RawStoreProxy.class.getName()); + private final RawStore base; private final MetaStoreInit.MetaStoreInitData metaStoreInitData = new MetaStoreInit.MetaStoreInitData(); @@ -95,6 +99,7 @@ public Object invoke(Object proxy, Method method, Object[] args) throws Throwabl Object ret = null; try { + LOG.info("Invoking " + method.toGenericString()); ret = method.invoke(base, args); } catch (UndeclaredThrowableException e) { throw e.getCause(); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/WritableMicroBench.java metastore/src/java/org/apache/hadoop/hive/metastore/WritableMicroBench.java new file mode 100644 index 0000000..5e11f9b --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/WritableMicroBench.java @@ -0,0 +1,355 @@ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.io.Writable; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Created by gates on 12/17/14. + */ +public class WritableMicroBench { + + private static void encodeStrings(StringBuilder bldr, String... strings) { + for (String s : strings) { + bldr.append(s.length()); + if (s.length() > 0) bldr.append(s); + } + } + + public static void main(String[] argv) throws Exception { + + Set s = new HashSet(); + byte[] b1 = "hello world".getBytes(); + byte[] b2 = "hello world".getBytes(); + s.add(b1); + s.add(b2); + System.out.println("b1 == b2 " + (b1.equals(b2))); + System.out.println("b1 hash " + b1.hashCode()); + System.out.println("b2 hash " + b2.hashCode()); + System.out.println("Array.equals " + Arrays.equals(b1, b2)); + System.out.println("b1 Arrays hash " + Arrays.hashCode(b1)); + System.out.println("b2 Arrays hash " + Arrays.hashCode(b2)); + /* + int numIters = 1000000; + + StringBuilder bldr = new StringBuilder(); + + String[] vals = {"name", "varchar32", "", "age", "int", "", "gpa", "decimal(3,2)", "", + "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"}; + encodeStrings(bldr, vals); + bldr.append(0); + bldr.append(0); + encodeStrings(bldr, "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "dontknowwhatthisis"); + bldr.append(0); + bldr.append(0); + bldr.append(0); + + byte[] bytes = bldr.toString().getBytes(StandardCharsets.UTF_8); + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] digest = null; + + long begin = System.currentTimeMillis(); + for (int i = 0; i < numIters; i++) { + md.update(bytes); + digest = md.digest(); + } + long end = System.currentTimeMillis(); + System.out.println("md5 length is " + digest.length); + System.out.println("md5 time is " + (end - begin)); + + md = MessageDigest.getInstance("SHA-1"); + begin = System.currentTimeMillis(); + for (int i = 0; i < numIters; i++) { + md.update(bytes); + digest = md.digest(); + } + end = System.currentTimeMillis(); + System.out.println("sha1 length is " + digest.length); + System.out.println("sha1 time is " + (end - begin)); + + md = MessageDigest.getInstance("SHA-256"); + begin = System.currentTimeMillis(); + for (int i = 0; i < numIters; i++) { + md.update(bytes); + digest = md.digest(); + } + end = System.currentTimeMillis(); + System.out.println("sha256 length is " + digest.length); + System.out.println("sha256 time is " + (end - begin)); + + + + + + + + if (argv.length > 0) numIters = Integer.valueOf(argv[0]); + + Partition part = new Partition(); + + List values = new ArrayList(2); + values.add("2014-12-17"); + values.add("NorthAmerica"); + part.setValues(values); + part.setDbName("mydb"); + part.setTableName("mytable"); + part.setCreateTime(93); + part.setLastAccessTime(3242423); + StorageDescriptor sd = new StorageDescriptor(); + List cols = new ArrayList(10); + for (int i = 0; i < 10; i++) { + FieldSchema fs = new FieldSchema("col_" + Integer.toString(i), "no comment", "varchar(32)"); + cols.add(fs); + } + sd.setCols(cols); + sd.setLocation("/hive/warehouse/somewhere"); + sd.setInputFormat("org.apache.hive.io.unicorn.UnicornInputFormat"); + sd.setOutputFormat("org.apache.hive.io.unicorn.UnicornOutputFormat"); + sd.setCompressed(false); + sd.setNumBuckets(0); + SerDeInfo serde = new SerDeInfo("org.apache.hive.io.unicorn.UnicornSerde", + "dontknowwhatthisis", new HashMap()); + sd.setSerdeInfo(serde); + sd.setBucketCols(new ArrayList()); + sd.setSortCols(new ArrayList()); + sd.setParameters(new HashMap()); + part.setSd(sd); + Map parameters = new HashMap(2); + parameters.put("transactional", "true"); + parameters.put("someotherparam", "whatever"); + part.setParameters(parameters); + + try { + long beginSerialization = System.currentTimeMillis(); + ByteArrayOutputStream baos = null; + for (int i = 0; i < numIters; i++) { + baos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(baos); + oos.writeObject(part); + + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + ObjectInputStream ois = new ObjectInputStream(bais); + Partition newPart = (Partition)ois.readObject(); + assert part.getTableName() == newPart.getTableName(); + } + long endSerialization = System.currentTimeMillis(); + System.out.println("serializable size is " + baos.toByteArray().length); + + long beginWritable = System.currentTimeMillis(); + for (int i = 0; i < numIters; i++) { + baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + PartitionWritable pw = new PartitionWritable(part); + pw.write(dos); + + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + DataInputStream dis = new DataInputStream(bais); + PartitionWritable newPart = new PartitionWritable(); + newPart.readFields(dis); + assert part.getTableName() == newPart.p.getTableName(); + } + long endWritable = System.currentTimeMillis(); + System.out.println("writable size is " + baos.toByteArray().length); + + System.out.println("Serialization time is " + (endSerialization - beginSerialization) + " ms"); + System.out.println("Writable time is " + (endWritable - beginWritable) + " ms"); + + } catch (Exception e) { + System.err.println("Received exception " + e.getClass().getName() + ", " + e.getMessage()); + e.printStackTrace(); + } + */ + + } + + static class WritableWrapper { + + protected DataOutput out; + protected DataInput in; + + protected void writeStr(String str) throws IOException { + out.writeInt(str.length()); + out.write(str.getBytes(), 0, str.length()); + } + + protected String readStr() throws IOException { + int len = in.readInt(); + byte[] b = new byte[len]; + in.readFully(b, 0, len); + return new String(b); + } + + protected void writeList(List list) throws IOException { + out.writeInt(list.size()); + for (String val : list) { + writeStr(val); + } + } + + protected List readList() throws IOException { + int sz = in.readInt(); + List list = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + list.add(readStr()); + } + return list; + } + + protected void writeMap(Map m) throws IOException { + out.writeInt(m.size()); + for (Map.Entry e : m.entrySet()) { + writeStr(e.getKey()); + writeStr(e.getValue()); + } + } + + protected Map readMap() throws IOException { + int sz = in.readInt(); + Map m = new HashMap(sz); + for (int i = 0; i < sz; i++) { + m.put(readStr(), readStr()); + } + return m; + } + } + + static class PartitionWritable extends WritableWrapper implements Writable { + public final Partition p; + + public PartitionWritable() { + p = new Partition(); + } + + public PartitionWritable(Partition partition) { + p = partition; + } + + @Override + public void write(DataOutput out) throws IOException { + this.out = out; + writeList(p.getValues()); + writeStr(p.getDbName()); + writeStr(p.getTableName()); + out.writeInt(p.getCreateTime()); + out.writeInt(p.getLastAccessTime()); + StorageDescriptorWritable sd = new StorageDescriptorWritable(p.getSd()); + sd.write(out); + writeMap(p.getParameters()); + } + + @Override + public void readFields(DataInput in) throws IOException { + this.in = in; + p.setValues(readList()); + p.setDbName(readStr()); + p.setTableName(readStr()); + p.setCreateTime(in.readInt()); + p.setLastAccessTime(in.readInt()); + StorageDescriptorWritable sd = new StorageDescriptorWritable(); + sd.readFields(in); + p.setSd(sd.sd); + p.setParameters(readMap()); + } + } + + static class StorageDescriptorWritable extends WritableWrapper implements Writable { + public final StorageDescriptor sd; + + public StorageDescriptorWritable() { + sd = new StorageDescriptor(); + } + + public StorageDescriptorWritable(StorageDescriptor sd) { + this.sd = sd; + } + + @Override + public void write(DataOutput out) throws IOException { + this.out = out; + out.writeInt(sd.getColsSize()); + for (FieldSchema fs : sd.getCols()) { + writeStr(fs.getName()); + writeStr(fs.getComment()); + writeStr(fs.getType()); + } + writeStr(sd.getLocation()); + writeStr(sd.getInputFormat()); + writeStr(sd.getOutputFormat()); + out.writeBoolean(sd.isCompressed()); + out.writeInt(sd.getNumBuckets()); + writeStr(sd.getSerdeInfo().getName()); + writeStr(sd.getSerdeInfo().getSerializationLib()); + writeMap(sd.getSerdeInfo().getParameters()); + writeList(sd.getBucketCols()); + out.writeInt(sd.getSortColsSize()); + for (Order o : sd.getSortCols()) { + writeStr(o.getCol()); + out.writeInt(o.getOrder()); + } + writeMap(sd.getParameters()); + // skipping SkewedInfo + out.writeBoolean(sd.isStoredAsSubDirectories()); + + + } + + @Override + public void readFields(DataInput in) throws IOException { + this.in = in; + int numCols = in.readInt(); + for (int i = 0; i < numCols; i++) { + FieldSchema fs = new FieldSchema(); + fs.setName(readStr()); + fs.setComment(readStr()); + fs.setType(readStr()); + sd.addToCols(fs); + } + sd.setLocation(readStr()); + sd.setInputFormat(readStr()); + sd.setOutputFormat(readStr()); + sd.setCompressed(in.readBoolean()); + sd.setNumBuckets(in.readInt()); + SerDeInfo serde = new SerDeInfo(); + serde.setName(readStr()); + serde.setSerializationLib(readStr()); + serde.setParameters(readMap()); + sd.setSerdeInfo(serde); + sd.setBucketCols(readList()); + int numOrder = in.readInt(); + for (int i = 0; i < numOrder; i++) { + Order o = new Order(); + o.setCol(readStr()); + o.setOrder(in.readInt()); + sd.addToSortCols(o); + } + sd.setParameters(readMap()); + // skipping SkewedInfo + sd.setStoredAsSubDirectories(in.readBoolean()); + } + } + + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/Counter.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/Counter.java new file mode 100644 index 0000000..6171fab --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/Counter.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +/** + * A simple metric to count how many times something occurs. + */ +class Counter { + private final String name; + private long cnt; + + Counter(String name) { + this.name = name; + cnt = 0; + } + + void incr() { + cnt++; + } + + void clear() { + cnt = 0; + } + + String dump() { + StringBuilder bldr = new StringBuilder("Dumping metric: "); + bldr.append(name).append(' ').append(cnt); + return bldr.toString(); + } + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/DatabaseWritable.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/DatabaseWritable.java new file mode 100644 index 0000000..ebbe4fc --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/DatabaseWritable.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Wrapper for {@link org.apache.hadoop.hive.metastore.api.Table} that makes it writable + */ +class DatabaseWritable implements Writable { + final Database db; + + DatabaseWritable() { + this.db = new Database(); + } + + DatabaseWritable(Database db) { + this.db = db; + } + + @Override + public void write(DataOutput out) throws IOException { + HBaseUtils.writeStr(out, db.getName()); + HBaseUtils.writeStr(out, db.getDescription()); + HBaseUtils.writeStr(out, db.getLocationUri()); + HBaseUtils.writeStrStrMap(out, db.getParameters()); + HBaseUtils.writePrivileges(out, db.getPrivileges()); + HBaseUtils.writeStr(out, db.getOwnerName()); + HBaseUtils.writePrincipalType(out, db.getOwnerType()); + } + + @Override + public void readFields(DataInput in) throws IOException { + db.setName(HBaseUtils.readStr(in)); + db.setDescription(HBaseUtils.readStr(in)); + db.setLocationUri(HBaseUtils.readStr(in)); + db.setParameters(HBaseUtils.readStrStrMap(in)); + db.setPrivileges(HBaseUtils.readPrivileges(in)); + db.setOwnerName(HBaseUtils.readStr(in)); + db.setOwnerType(HBaseUtils.readPrincipalType(in)); + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java new file mode 100644 index 0000000..f097158 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java @@ -0,0 +1,984 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HConnectionManager; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.filter.CompareFilter; +import org.apache.hadoop.hbase.filter.Filter; +import org.apache.hadoop.hbase.filter.RegexStringComparator; +import org.apache.hadoop.hbase.filter.RowFilter; +import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; + +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Deque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + + +/** + * Class to manage storing object in and reading them from HBase. + */ +class HBaseReadWrite { + + @VisibleForTesting final static String DB_TABLE = "DBS"; + @VisibleForTesting final static String PART_TABLE = "PARTITIONS"; + @VisibleForTesting final static String ROLE_TABLE = "ROLES"; + @VisibleForTesting final static String SD_TABLE = "SDS"; + @VisibleForTesting final static String TABLE_TABLE = "TBLS"; + @VisibleForTesting final static byte[] CATALOG_CF = "c".getBytes(HBaseUtils.ENCODING); + @VisibleForTesting final static byte[] STATS_CF = "s".getBytes(HBaseUtils.ENCODING); + @VisibleForTesting final static String NO_CACHE_CONF = "no.use.cache"; + private final static byte[] CATALOG_COL = "cat".getBytes(HBaseUtils.ENCODING); + private final static byte[] REF_COUNT_COL = "ref".getBytes(HBaseUtils.ENCODING); + private final static int tablesToCache = 10; + + // TODO Add privileges as a second column in the CATALOG_CF + + private final static String[] tableNames = { DB_TABLE, PART_TABLE, ROLE_TABLE, SD_TABLE, + TABLE_TABLE }; + static final private Log LOG = LogFactory.getLog(HBaseReadWrite.class.getName()); + + private static ThreadLocal self = new ThreadLocal() { + @Override + protected HBaseReadWrite initialValue() { + if (staticConf == null) { + throw new RuntimeException("Attempt to create HBaseReadWrite with no configuration set"); + } + return new HBaseReadWrite(staticConf); + } + }; + + private static boolean tablesCreated = false; + private static Configuration staticConf = null; + + private Configuration conf; + private HConnection conn; + private Map tables; + private MessageDigest md; + private ObjectCache, Table> tableCache; + private ObjectCache sdCache; + private PartitionCache partCache; + private StatsCache statsCache; + private Counter tableHits; + private Counter tableMisses; + private Counter tableOverflows; + private Counter partHits; + private Counter partMisses; + private Counter partOverflows; + private Counter sdHits; + private Counter sdMisses; + private Counter sdOverflows; + private List counters; + + /** + * Get the instance of HBaseReadWrite for the current thread. This is intended to be used by + * {@link org.apache.hadoop.hive.metastore.hbase.HBaseStore} since it creates the thread local + * version of this class. + * @param configuration Configuration object + * @return thread's instance of HBaseReadWrite + */ + static HBaseReadWrite getInstance(Configuration configuration) { + staticConf = configuration; + return self.get(); + } + + /** + * Get the instance of HBaseReadWrite for the current thread. This is inteded to be used after + * the thread has been initialized. Woe betide you if that's not the case. + * @return thread's instance of HBaseReadWrite + */ + static HBaseReadWrite getInstance() { + return self.get(); + } + + private HBaseReadWrite(Configuration configuration) { + conf = configuration; + try { + conn = HConnectionManager.createConnection(conf); + } catch (IOException e) { + throw new RuntimeException(e); + } + tables = new HashMap(); + + try { + md = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + int totalObjectsToCache = + ((HiveConf)conf).getIntVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_SIZE); + + tableHits = new Counter("table cache hits"); + tableMisses = new Counter("table cache misses"); + tableOverflows = new Counter("table cache overflows"); + partHits = new Counter("partition cache hits"); + partMisses = new Counter("partition cache misses"); + partOverflows = new Counter("partition cache overflows"); + sdHits = new Counter("storage descriptor cache hits"); + sdMisses = new Counter("storage descriptor cache misses"); + sdOverflows = new Counter("storage descriptor cache overflows"); + counters = new ArrayList(); + counters.add(tableHits); + counters.add(tableMisses); + counters.add(tableOverflows); + counters.add(partHits); + counters.add(partMisses); + counters.add(partOverflows); + counters.add(sdHits); + counters.add(sdMisses); + counters.add(sdOverflows); + + // Divide 50/50 between catalog and stats, then give 1% of catalog space to storage + // descriptors (storage descriptors are shared, so 99% should be the same for a + // given table). + int sdsCacheSize = totalObjectsToCache / 100; + if (conf.getBoolean(NO_CACHE_CONF, false)) { + tableCache = new BogusObjectCache, Table>(); + sdCache = new BogusObjectCache(); + partCache = new BogusPartitionCache(); + statsCache = StatsCache.getBogusStatsCache(); + } else { + tableCache = new ObjectCache, Table>(tablesToCache, tableHits, + tableMisses, tableOverflows); + sdCache = new ObjectCache(sdsCacheSize, sdHits, + sdMisses, sdOverflows); + partCache = new PartitionCache(totalObjectsToCache / 2, partHits, partMisses, partOverflows); + statsCache = StatsCache.getInstance(conf); + } + } + + // Synchronize this so not everyone's doing it at once. + static synchronized void createTablesIfNotExist() throws IOException { + if (!tablesCreated) { + LOG.debug("Determing which tables need created"); + HBaseAdmin admin = new HBaseAdmin(self.get().conn); + LOG.debug("Got hbase admin"); + for (String name : tableNames) { + LOG.debug("Checking for table " + name); + if (self.get().getHTable(name) == null) { + LOG.debug("Creating table " + name); + HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(name)); + tableDesc.addFamily(new HColumnDescriptor(CATALOG_CF)); + // Only table and partitions need stats + if (TABLE_TABLE.equals(name) || PART_TABLE.equals(name)) { + tableDesc.addFamily(new HColumnDescriptor(STATS_CF)); + } + admin.createTable(tableDesc); + } + } + admin.close(); + tablesCreated = true; + } + } + + /** + * Begin a transaction + */ + void begin() { + // NOP for now + } + + /** + * Commit a transaction + */ + void commit() { + // NOP for now + } + + void rollback() { + // NOP for now + } + + void close() throws IOException { + for (HTableInterface htab : tables.values()) htab.close(); + conn.close(); + } + + /** + * Fetch a database object + * @param name name of the database to fetch + * @return the database object, or null if there is no such database + * @throws IOException + */ + Database getDb(String name) throws IOException { + byte[] key = HBaseUtils.buildKey(name); + byte[] serialized = read(DB_TABLE, key, CATALOG_CF, CATALOG_COL); + if (serialized == null) return null; + DatabaseWritable db = new DatabaseWritable(); + HBaseUtils.deserialize(db, serialized); + return db.db; + } + + /** + * Store a database object + * @param database database object to store + * @throws IOException + */ + void putDb(Database database) throws IOException { + DatabaseWritable db = new DatabaseWritable(database); + byte[] key = HBaseUtils.buildKey(db.db.getName()); + byte[] serialized = HBaseUtils.serialize(db); + store(DB_TABLE, key, CATALOG_CF, CATALOG_COL, serialized); + flush(); + } + + /** + * Drop a database + * @param name name of db to drop + * @throws IOException + */ + void deleteDb(String name) throws IOException { + byte[] key = HBaseUtils.buildKey(name); + delete(DB_TABLE, key, null, null); + flush(); + } + + /** + * Fetch one partition + * @param dbName database table is in + * @param tableName table partition is in + * @param partVals list of values that specify the partition, given in the same order as the + * columns they belong to + * @return The partition objec,t or null if there is no such partition + * @throws IOException + */ + Partition getPartition(String dbName, String tableName, List partVals) + throws IOException { + Partition cached = partCache.get(dbName, tableName, partVals); + if (cached != null) return cached; + byte[] key = buildPartitionKey(dbName, tableName, partVals); + byte[] serialized = read(PART_TABLE, key, CATALOG_CF, CATALOG_COL); + if (serialized == null) return null; + PartitionWritable part = new PartitionWritable(); + HBaseUtils.deserialize(part, serialized); + partCache.put(dbName, tableName, part.part); + return part.part; + } + + /** + * Add a partition + * @param partition partition object to add + * @throws IOException + */ + void putPartition(Partition partition) throws IOException { + PartitionWritable part = new PartitionWritable(partition); + byte[] key = buildPartitionKey(part); + byte[] serialized = HBaseUtils.serialize(part); + store(PART_TABLE, key, CATALOG_CF, CATALOG_COL, serialized); + flush(); + partCache.put(partition.getDbName(), partition.getTableName(), partition); + } + + /** + * Find all the partitions in a table. + * @param dbName name of the database the table is in + * @param tableName table name + * @param maxPartitions max partitions to fetch. If negative all partitions will be returned. + * @return List of partitions that match the criteria. + * @throws IOException + */ + List scanPartitionsInTable(String dbName, String tableName, int maxPartitions) + throws IOException { + if (maxPartitions < 0) maxPartitions = Integer.MAX_VALUE; + Collection cached = partCache.getAllForTable(dbName, tableName); + if (cached != null) { + return maxPartitions < cached.size() + ? new ArrayList(cached).subList(0, maxPartitions) + : new ArrayList(cached); + } + byte[] keyPrefix = HBaseUtils.buildKeyWithTrailingSeparator(dbName, tableName); + List parts = scanOnPrefix(PART_TABLE, keyPrefix, CATALOG_CF, CATALOG_COL, -1); + partCache.put(dbName, tableName, parts, true); + return maxPartitions < parts.size() ? parts.subList(0, maxPartitions) : parts; + } + + /** + * Scan partitions based on partial key information. + * @param dbName name of database, required + * @param tableName name of table, required + * @param partVals partial specification of values. Any values that are unknown can be left + * null in the list. For example, if a table had two partition columns date + * and region (in that order), and partitions ('today', 'na'), ('today', 'eu'), + * ('tomorrow', 'na'), ('tomorrow', 'eu') then passing ['today'] would return + * ('today', 'na') and ('today', 'eu') while passing [null, 'eu'] would return + * ('today', 'eu') and ('tomorrow', 'eu') + * @param maxPartitions Maximum number of entries to return. + * @return list of partitions that match the specified information + * @throws IOException + */ + List scanPartitions(String dbName, String tableName, List partVals, + int maxPartitions) throws IOException { + byte[] keyPrefix; + if (partVals == null || partVals.size() == 0) { + keyPrefix = HBaseUtils.buildKeyWithTrailingSeparator(dbName, tableName); + return scanOnPrefix(PART_TABLE, keyPrefix, CATALOG_CF, CATALOG_COL, maxPartitions); + } + int firstNull = 0; + for (; firstNull < partVals.size(); firstNull++) { + if (partVals.get(firstNull) == null) break; + } + if (firstNull == partVals.size()) { + keyPrefix = buildPartitionKey(dbName, tableName, partVals); + return scanOnPrefix(PART_TABLE, keyPrefix, CATALOG_CF, CATALOG_COL, maxPartitions); + } + keyPrefix = buildPartitionKey(dbName, tableName, partVals.subList(0, firstNull)); + StringBuilder regex = new StringBuilder(); + regex.append(dbName); + regex.append(':'); + regex.append(tableName); + for (String val : partVals) { + regex.append(HBaseUtils.KEY_SEPARATOR); + if (val == null) regex.append("[^" + HBaseUtils.KEY_SEPARATOR + "]+"); // Will this do + // what I want? + else regex.append(val); + } + + Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, + new RegexStringComparator(regex.toString())); + + List parts = scanOnPrefixWithFilter(PART_TABLE, keyPrefix, CATALOG_CF, CATALOG_COL, + maxPartitions, filter); + partCache.put(dbName, tableName, parts, false); + return parts; + } + + /** + * Delete a partition + * @param dbName database name that table is in + * @param tableName table partition is in + * @param partVals partition values that define this partition, in the same order as the + * partition columns they are values for + * @throws IOException + */ + void deletePartition(String dbName, String tableName, List partVals) throws IOException { + // Find the partition so I can get the storage descriptor and drop it + partCache.remove(dbName, tableName, partVals); + Partition p = getPartition(dbName, tableName, partVals); + decrementStorageDescriptorRefCount(p.getSd()); + byte[] key = buildPartitionKey(dbName, tableName, partVals); + delete(PART_TABLE, key, null, null); + flush(); + } + + /** + * Fetch a role + * @param roleName name of the role + * @return role object, or null if no such role + * @throws IOException + */ + Role getRole(String roleName) throws IOException { + byte[] key = HBaseUtils.buildKey(roleName); + byte[] serialized = read(ROLE_TABLE, key, CATALOG_CF, CATALOG_COL); + if (serialized == null) return null; + RoleWritable role = new RoleWritable(); + HBaseUtils.deserialize(role, serialized); + return role.role; + } + + /** + * Add a new role + * @param role role object + * @throws IOException + */ + void putRole(Role role) throws IOException { + byte[] key = HBaseUtils.buildKey(role.getRoleName()); + byte[] serialized = HBaseUtils.serialize(new RoleWritable(role)); + store(ROLE_TABLE, key, CATALOG_CF, CATALOG_COL, serialized); + flush(); + } + + /** + * Drop a role + * @param roleName name of role to drop + * @throws IOException + */ + void deleteRole(String roleName) throws IOException { + byte[] key = HBaseUtils.buildKey(roleName); + delete(ROLE_TABLE, key, null, null); + flush(); + } + + /** + * Fetch a table object + * @param dbName database the table is in + * @param tableName table name + * @return Table object, or null if no such table + * @throws IOException + */ + Table getTable(String dbName, String tableName) throws IOException { + ObjectPair hashKey = new ObjectPair(dbName, tableName); + Table cached = tableCache.get(hashKey); + if (cached != null) return cached; + byte[] key = HBaseUtils.buildKey(dbName, tableName); + byte[] serialized = read(TABLE_TABLE, key, CATALOG_CF, CATALOG_COL); + if (serialized == null) return null; + TableWritable table = new TableWritable(); + HBaseUtils.deserialize(table, serialized); + tableCache.put(hashKey, table.table); + return table.table; + } + + /** + * Put a table object + * @param table table object + * @throws IOException + */ + void putTable(Table table) throws IOException { + byte[] key = HBaseUtils.buildKey(table.getDbName(), table.getTableName()); + byte[] serialized = HBaseUtils.serialize(new TableWritable(table)); + store(TABLE_TABLE, key, CATALOG_CF, CATALOG_COL, serialized); + flush(); + tableCache.put(new ObjectPair(table.getDbName(), table.getTableName()), table); + } + + /** + * Delete a table + * @param dbName name of database table is in + * @param tableName table to drop + * @throws IOException + */ + void deleteTable(String dbName, String tableName) throws IOException { + tableCache.remove(new ObjectPair(dbName, tableName)); + // Find the table so I can get the storage descriptor and drop it + Table t = getTable(dbName, tableName); + decrementStorageDescriptorRefCount(t.getSd()); + byte[] key = HBaseUtils.buildKey(dbName, tableName); + delete(TABLE_TABLE, key, null, null); + flush(); + } + + /** + * If this serde has already been read, then return it from the cache. If not, read it, then + * return it. + * @param hash + * @return + * @throws IOException + */ + StorageDescriptor getStorageDescriptor(byte[] hash) throws IOException { + ByteArrayWrapper hashKey = new ByteArrayWrapper(hash); + StorageDescriptor cached = sdCache.get(hashKey); + if (cached != null) return cached; + byte[] serialized = read(SD_TABLE, hash, CATALOG_CF, CATALOG_COL); + if (serialized == null) { + throw new RuntimeException("Woh, bad! Trying to fetch a non-existent storage descriptor " + + "from hash " + hash); + } + StorageDescriptor sd = new StorageDescriptor(); + HBaseUtils.deserializeStorageDescriptor(sd, serialized); + sdCache.put(hashKey, sd); + return sd; + } + + /** + * Lower the reference count on the storage descriptor by one. If it goes to zero, then it + * will be deleted. + * @param sd Storage descriptor + * @throws IOException + */ + void decrementStorageDescriptorRefCount(StorageDescriptor sd) throws IOException { + byte[] serialized = HBaseUtils.serializeStorageDescriptor(sd); + byte[] key = hash(serialized); + for (int i = 0; i < 10; i++) { + byte[] serializedRefCnt = read(SD_TABLE, key, CATALOG_CF, REF_COUNT_COL); + if (serializedRefCnt == null) { + // Someone deleted it before we got to it, no worries + return; + } + int refCnt = Integer.valueOf(new String(serializedRefCnt, HBaseUtils.ENCODING)); + HTableInterface htab = getHTable(SD_TABLE); + if (refCnt-- < 1) { + Delete d = new Delete(key); + if (htab.checkAndDelete(key, CATALOG_CF, REF_COUNT_COL, serializedRefCnt, d)) { + sdCache.remove(new ByteArrayWrapper(key)); + return; + } + } else { + Put p = new Put(key); + p.add(CATALOG_CF, REF_COUNT_COL, Integer.toString(refCnt).getBytes(HBaseUtils.ENCODING)); + if (htab.checkAndPut(key, CATALOG_CF, REF_COUNT_COL, serializedRefCnt, p)) { + return; + } + } + } + throw new IOException("Too many unsuccessful attepts to decrement storage counter"); + } + + /** + * Place the common parts of a storage descriptor into the cache. + * @param storageDescriptor storage descriptor to store. + * @return id of the entry in the cache, to be written in for the storage descriptor + */ + byte[] putStorageDescriptor(StorageDescriptor storageDescriptor) throws IOException { + byte[] sd = HBaseUtils.serializeStorageDescriptor(storageDescriptor); + byte[] key = hash(sd); + for (int i = 0; i < 10; i++) { + byte[] serializedRefCnt = read(SD_TABLE, key, CATALOG_CF, REF_COUNT_COL); + HTableInterface htab = getHTable(SD_TABLE); + if (serializedRefCnt == null) { + // We are the first to put it in the DB + Put p = new Put(key); + p.add(CATALOG_CF, CATALOG_COL, sd); + p.add(CATALOG_CF, REF_COUNT_COL, "0".getBytes(HBaseUtils.ENCODING)); + if (htab.checkAndPut(key, CATALOG_CF, REF_COUNT_COL, null, p)) { + sdCache.put(new ByteArrayWrapper(key), storageDescriptor); + return key; + } + } else { + // Just increment the reference count + int refCnt = Integer.valueOf(new String(serializedRefCnt, HBaseUtils.ENCODING)) + 1; + Put p = new Put(key); + p.add(CATALOG_CF, REF_COUNT_COL, Integer.toString(refCnt).getBytes(HBaseUtils.ENCODING)); + if (htab.checkAndPut(key, CATALOG_CF, REF_COUNT_COL, serializedRefCnt, p)) { + return key; + } + } + } + throw new IOException("Too many unsuccessful attepts to increment storage counter"); + } + + /** + * Update statistics for one or more columns for a table or a partition. + * @param dbName database the table is in + * @param tableName table to update statistics for + * @param partName name of the partition, can be null if these are table level statistics. + * @param partVals partition values that define partition to update statistics for. If this is + * null, then these will be assumed to be table level statistics. + * @param stats Stats object with stats for one or more columns. + * @throws IOException + */ + void updateStatistics(String dbName, String tableName, String partName, List partVals, + ColumnStatistics stats) throws IOException { + byte[] key = getStatisticsKey(dbName, tableName, partVals); + String hbaseTable = getStatisticsTable(partVals); + + byte[][] colnames = new byte[stats.getStatsObjSize()][]; + byte[][] serializeds = new byte[stats.getStatsObjSize()][]; + for (int i = 0; i < stats.getStatsObjSize(); i++) { + ColumnStatisticsObj obj = stats.getStatsObj().get(i); + serializeds[i] = HBaseUtils.serializeStatsForOneColumn(stats, obj); + String colname = obj.getColName(); + colnames[i] = HBaseUtils.buildKey(colname); + statsCache.put(dbName, tableName, partName, colname, obj, + stats.getStatsDesc().getLastAnalyzed()); + } + store(hbaseTable, key, STATS_CF, colnames, serializeds); + flush(); + } + + /** + * Get Statistics for a table + * @param dbName name of database table is in + * @param tableName name of table + * @param colNames list of column names to get statistics for + * @return column statistics for indicated table + * @throws IOException + */ + ColumnStatistics getTableStatistics(String dbName, String tableName, List colNames) + throws IOException { + byte[] key = HBaseUtils.buildKey(dbName, tableName); + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setIsTblLevel(true); + desc.setDbName(dbName); + desc.setTableName(tableName); + stats.setStatsDesc(desc); + + // First we have to go through and see what's in the cache and fetch what we can from there. + // Then we'll fetch the rest from HBase + List stillLookingFor = new ArrayList(); + for (int i = 0; i < colNames.size(); i++) { + StatsCache.StatsInfo info = + statsCache.getTableStatistics(dbName, tableName, colNames.get(i)); + if (info == null) { + stillLookingFor.add(colNames.get(i)); + } else { + info.stats.setColName(colNames.get(i)); + stats.addToStatsObj(info.stats); + stats.getStatsDesc().setLastAnalyzed(Math.max(stats.getStatsDesc().getLastAnalyzed(), + info.lastAnalyzed)); + } + } + if (stillLookingFor.size() == 0) return stats; + + byte[][] colKeys = new byte[stillLookingFor.size()][]; + for (int i = 0; i < colKeys.length; i++) { + colKeys[i] = HBaseUtils.buildKey(stillLookingFor.get(i)); + } + Result res = read(TABLE_TABLE, key, STATS_CF, colKeys); + for (int i = 0; i < colKeys.length; i++) { + byte[] serialized = res.getValue(STATS_CF, colKeys[i]); + if (serialized == null) { + // There were no stats for this column, so skip it + continue; + } + ColumnStatisticsObj obj = HBaseUtils.deserializeStatsForOneColumn(stats, serialized); + statsCache.put(dbName, tableName, null, stillLookingFor.get(i), obj, + stats.getStatsDesc().getLastAnalyzed()); + obj.setColName(stillLookingFor.get(i)); + stats.addToStatsObj(obj); + } + return stats; + } + + /** + * Get statistics for a set of partitions + * @param dbName name of database table is in + * @param tableName table partitions are in + * @param partNames names of the partitions, used only to set values inside the return stats + * objects. + * @param partVals partition values for each partition, needed because this class doesn't know + * how to translate from partName to partVals + * @param colNames column names to fetch stats for. These columns will be fetched for all + * requested partitions. + * @return list of ColumnStats, one for each partition. The values will be in the same order + * as the partNames list that was passed in. + * @throws IOException + */ + List getPartitionStatistics(String dbName, String tableName, + List partNames, + List> partVals, + List colNames) throws IOException { + // Go through the cache first, see what we can fetch from there. This is complicated because + // we may have different columns for different partitions + List statsList = new ArrayList(partNames.size()); + List stillLookingFor = new ArrayList(); + for (int pOff = 0; pOff < partVals.size(); pOff++) { + // Add an entry for this partition in the list + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setIsTblLevel(false); + desc.setDbName(dbName); + desc.setTableName(tableName); + desc.setPartName(partNames.get(pOff)); + stats.setStatsDesc(desc); + statsList.add(stats); + PartStatsInfo missing = null; + + for (int cOff = 0; cOff < colNames.size(); cOff++) { + StatsCache.StatsInfo info = statsCache.getPartitionStatistics(dbName, tableName, + partNames.get(pOff), colNames.get(cOff)); + if (info == null) { + if (missing == null) { + // We haven't started an entry for this one yet + missing = new PartStatsInfo(stats, partVals.get(pOff), partNames.get(pOff)); + stillLookingFor.add(missing); + } + missing.colNames.add(colNames.get(cOff)); + } else { + info.stats.setColName(colNames.get(cOff)); + stats.addToStatsObj(info.stats); + stats.getStatsDesc().setLastAnalyzed(Math.max(stats.getStatsDesc().getLastAnalyzed(), + info.lastAnalyzed)); + } + } + } + if (stillLookingFor.size() == 0) return statsList; + + // Build the list of gets. It may be different for each partition now depending on what we + // found in the cache. + List gets = new ArrayList(); + for (PartStatsInfo pi : stillLookingFor) { + byte[][] colKeys = new byte[pi.colNames.size()][]; + for (int i = 0; i < colKeys.length; i++) { + colKeys[i] = HBaseUtils.buildKey(pi.colNames.get(i)); + } + pi.colKeys = colKeys; + + byte[] key = buildPartitionKey(dbName, tableName, pi.partVals); + Get g = new Get(key); + for (byte[] colName : colKeys) g.addColumn(STATS_CF, colName); + gets.add(g); + } + HTableInterface htab = getHTable(PART_TABLE); + Result[] results = htab.get(gets); + if (results == null) return null; + + for (int pOff = 0; pOff < results.length; pOff++) { + PartStatsInfo pi = stillLookingFor.get(pOff); + for (int cOff = 0; cOff < pi.colNames.size(); cOff++) { + byte[] serialized = results[pOff].getValue(STATS_CF, pi.colKeys[cOff]); + if (serialized == null) { + // There were no stats for this column, so skip it + continue; + } + ColumnStatisticsObj obj = HBaseUtils.deserializeStatsForOneColumn(pi.stats, serialized); + statsCache.put(dbName, tableName, pi.partName, pi.colNames.get(cOff), obj, + pi.stats.getStatsDesc().getLastAnalyzed()); + obj.setColName(pi.colNames.get(cOff)); + pi.stats.addToStatsObj(obj); + } + } + return statsList; + } + + /** + * This should be called whenever a new query is started. + */ + void flushCatalogCache() { + for (Counter counter : counters) { + LOG.debug(counter.dump()); + counter.clear(); + } + tableCache.flush(); + sdCache.flush(); + partCache.flush(); + } + + @VisibleForTesting + int countStorageDescriptor() throws IOException { + ResultScanner scanner = getHTable(SD_TABLE).getScanner(new Scan()); + int cnt = 0; + while (scanner.next() != null) cnt++; + return cnt; + } + + private void store(String table, byte[] key, byte[] colFam, byte[] colName, byte[] obj) + throws IOException { + HTableInterface htab = getHTable(table); + Put p = new Put(key); + p.add(colFam, colName, obj); + htab.put(p); + } + + private void store(String table, byte[] key, byte[] colFam, byte[][] colName, byte[][] obj) + throws IOException { + HTableInterface htab = getHTable(table); + Put p = new Put(key); + for (int i = 0; i < colName.length; i++) { + p.add(colFam, colName[i], obj[i]); + } + htab.put(p); + } + + private byte[] read(String table, byte[] key, byte[] colFam, byte[] colName) throws IOException { + HTableInterface htab = getHTable(table); + Get g = new Get(key); + g.addColumn(colFam, colName); + Result res = htab.get(g); + return res.getValue(colFam, colName); + } + + private Result read(String table, byte[] key, byte[] colFam, byte[][] colNames) + throws IOException { + HTableInterface htab = getHTable(table); + Get g = new Get(key); + for (byte[] colName : colNames) g.addColumn(colFam, colName); + return htab.get(g); + } + + // Delete a row. If colFam and colName are not null, then only the named column will be + // deleted. If colName is null and colFam is not, only the named family will be deleted. If + // both are null the entire row will be deleted. + private void delete(String table, byte[] key, byte[] colFam, byte[] colName) throws IOException { + HTableInterface htab = getHTable(table); + Delete d = new Delete(key); + if (colName != null) d.deleteColumn(colFam, colName); + else if (colFam != null) d.deleteFamily(colFam); + htab.delete(d); + } + + private List scanOnPrefix(String table, byte[] keyPrefix, byte[] colFam, byte[] colName, + int maxResults) throws IOException { + return scanOnPrefixWithFilter(table, keyPrefix, colFam, colName, maxResults, null); + } + + private List scanOnPrefixWithFilter(String table, byte[] keyPrefix, byte[] colFam, + byte[] colName, int maxResults, Filter filter) + throws IOException { + HTableInterface htab = getHTable(table); + byte[] stop = Arrays.copyOf(keyPrefix, keyPrefix.length); + stop[stop.length - 1]++; + Scan s = new Scan(keyPrefix, stop); + s.addColumn(colFam, colName); + if (filter != null) s.setFilter(filter); + ResultScanner scanner = htab.getScanner(s); + List parts = new ArrayList(); + int numToFetch = maxResults < 0 ? Integer.MAX_VALUE : maxResults; + Iterator iter = scanner.iterator(); + for (int i = 0; i < numToFetch && iter.hasNext(); i++) { + PartitionWritable p = new PartitionWritable(); + HBaseUtils.deserialize(p, iter.next().getValue(colFam, colName)); + parts.add(p.part); + } + return parts; + } + + private HTableInterface getHTable(String table) throws IOException { + HTableInterface htab = tables.get(table); + if (htab == null) { + LOG.debug("Trying to connect to table " + table); + try { + htab = conn.getTable(table); + // Calling gettable doesn't actually connect to the region server, it's very light + // weight, so call something else so we actually reach out and touch the region server + // and see if the table is there. + Result r = htab.get(new Get("nosuchkey".getBytes(HBaseUtils.ENCODING))); + } catch (IOException e) { + LOG.info("Caught exception when table was missing"); + return null; + } + htab.setAutoFlushTo(false); + tables.put(table, htab); + } + return htab; + } + + private void flush() throws IOException { + for (HTableInterface htab : tables.values()) htab.flushCommits(); + } + + private byte[] buildPartitionKey(String dbName, String tableName, List partVals) { + Deque keyParts = new ArrayDeque(partVals); + keyParts.addFirst(tableName); + keyParts.addFirst(dbName); + return HBaseUtils.buildKey(keyParts.toArray(new String[keyParts.size()])); + } + + private byte[] buildPartitionKey(PartitionWritable part) throws IOException { + Deque keyParts = new ArrayDeque(part.part.getValues()); + keyParts.addFirst(part.part.getTableName()); + keyParts.addFirst(part.part.getDbName()); + return HBaseUtils.buildKey(keyParts.toArray(new String[keyParts.size()])); + } + + private byte[] hash(byte[] serialized) throws IOException { + md.update(serialized); + return md.digest(); + } + + private byte[] getStatisticsKey(String dbName, String tableName, List partVals) { + return partVals == null ? + HBaseUtils.buildKey(dbName, tableName) : + buildPartitionKey(dbName, tableName, partVals); + } + + private String getStatisticsTable(List partVals) { + return partVals == null ? TABLE_TABLE : PART_TABLE; + } + + /** + * Use this for unit testing only, so that a mock connection object can be passed in. + * @param connection Mock connection objecct + */ + @VisibleForTesting + void setConnection(HConnection connection) { + conn = connection; + } + + private static class ByteArrayWrapper { + byte[] wrapped; + + ByteArrayWrapper(byte[] b) { + wrapped = b; + } + + @Override + public boolean equals(Object other) { + if (other instanceof ByteArrayWrapper) { + return Arrays.equals(((ByteArrayWrapper)other).wrapped, wrapped); + } else { + return false; + } + } + + @Override + public int hashCode() { + return Arrays.hashCode(wrapped); + } + } + + private static class PartStatsInfo { + ColumnStatistics stats; + String partName; + List colNames; + List partVals; + byte[][] colKeys; + + PartStatsInfo(ColumnStatistics s, List pv, String pn) { + stats = s; partVals = pv; partName = pn; + colNames = new ArrayList(); + colKeys = null; + } + } + + // For testing without the cache + private static class BogusObjectCache extends ObjectCache { + static Counter bogus = new Counter("bogus"); + + BogusObjectCache() { + super(1, bogus, bogus, bogus); + } + + @Override + V get(K key) { + return null; + } + } + + private static class BogusPartitionCache extends PartitionCache { + static Counter bogus = new Counter("bogus"); + + BogusPartitionCache() { + super(1, bogus, bogus, bogus); + } + + @Override + Collection getAllForTable(String dbName, String tableName) { + return null; + } + + @Override + Partition get(String dbName, String tableName, List partVals) { + return null; + } + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java new file mode 100644 index 0000000..129c2b2 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java @@ -0,0 +1,964 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.RawStore; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.CurrentNotificationEventId; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidPartitionException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.NotificationEvent; +import org.apache.hadoop.hive.metastore.api.NotificationEventRequest; +import org.apache.hadoop.hive.metastore.api.NotificationEventResponse; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.PrivilegeBag; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.Type; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.metastore.model.MDBPrivilege; +import org.apache.hadoop.hive.metastore.model.MGlobalPrivilege; +import org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege; +import org.apache.hadoop.hive.metastore.model.MPartitionPrivilege; +import org.apache.hadoop.hive.metastore.model.MRoleMap; +import org.apache.hadoop.hive.metastore.model.MTableColumnPrivilege; +import org.apache.hadoop.hive.metastore.model.MTablePrivilege; +import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; +import org.apache.thrift.TException; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Implementation of RawStore that stores data in HBase + */ +public class HBaseStore implements RawStore { + static final private Log LOG = LogFactory.getLog(HBaseStore.class.getName()); + + // Do not access this directly, call getHBase to make sure it is initialized. + private HBaseReadWrite hbase = null; + private Configuration conf; + private int txnNestLevel = 0; + + public HBaseStore() { + } + + @Override + public void shutdown() { + try { + if (txnNestLevel != 0) rollbackTransaction(); + getHBase().close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public boolean openTransaction() { + if (txnNestLevel++ == 0) getHBase().begin(); + return true; + } + + @Override + public boolean commitTransaction() { + if (txnNestLevel-- < 1) getHBase().commit(); + return true; + } + + @Override + public void rollbackTransaction() { + txnNestLevel = 0; + getHBase().rollback(); + } + + @Override + public void createDatabase(Database db) throws InvalidObjectException, MetaException { + try { + // HiveMetaStore already checks for existence of the database, don't recheck + getHBase().putDb(db); + } catch (IOException e) { + // TODO NOt sure what i should throw here + LOG.error("Unable to create database ", e); + throw new MetaException("Unable to read from or write to hbase " + e.getMessage()); + } + + } + + @Override + public Database getDatabase(String name) throws NoSuchObjectException { + try { + Database db = getHBase().getDb(name); + if (db == null) { + throw new NoSuchObjectException("Unable to find db " + name); + } + return db; + } catch (IOException e) { + LOG.error("Unable to get db", e); + throw new NoSuchObjectException("Error reading db " + e.getMessage()); + } + } + + @Override + public boolean dropDatabase(String dbname) throws NoSuchObjectException, MetaException { + try { + getHBase().deleteDb(dbname); + return true; + } catch (IOException e) { + LOG.error("Unable to delete db" + e); + throw new MetaException("Unable to drop database " + dbname); + } + } + + @Override + public boolean alterDatabase(String dbname, Database db) throws NoSuchObjectException, + MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List getDatabases(String pattern) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List getAllDatabases() throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean createType(Type type) { + throw new UnsupportedOperationException(); + } + + @Override + public Type getType(String typeName) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean dropType(String typeName) { + throw new UnsupportedOperationException(); + } + + @Override + public void createTable(Table tbl) throws InvalidObjectException, MetaException { + // HiveMetaStore above us checks if the table already exists, so we can blindly store it here. + try { + getHBase().putTable(tbl); + } catch (IOException e) { + // TODO NOt sure what i should throw here + LOG.error("Unable to create table ", e); + throw new MetaException("Unable to read from or write to hbase " + e.getMessage()); + } + } + + @Override + public boolean dropTable(String dbName, String tableName) throws MetaException, + NoSuchObjectException, InvalidObjectException, InvalidInputException { + try { + getHBase().deleteTable(dbName, tableName); + return true; + } catch (IOException e) { + LOG.error("Unable to delete db" + e); + throw new MetaException("Unable to drop table " + tableName(dbName, tableName)); + } + } + + @Override + public Table getTable(String dbName, String tableName) throws MetaException { + try { + Table table = getHBase().getTable(dbName, tableName); + if (table == null) { + LOG.debug("Unable to find table " + tableName(dbName, tableName)); + } + return table; + } catch (IOException e) { + LOG.error("Unable to get table", e); + throw new MetaException("Error reading table " + e.getMessage()); + } + } + + @Override + public boolean addPartition(Partition part) throws InvalidObjectException, MetaException { + try { + getHBase().putPartition(part); + return true; + } catch (IOException e) { + // TODO NOt sure what i should throw here + LOG.error("Unable to add partition", e); + throw new MetaException("Unable to read from or write to hbase " + e.getMessage()); + } + } + + @Override + public boolean addPartitions(String dbName, String tblName, List parts) throws + InvalidObjectException, MetaException { + try { + for (Partition part : parts) { + getHBase().putPartition(part); + } + return true; + } catch (IOException e) { + // TODO NOt sure what i should throw here + LOG.error("Unable to add partitions", e); + throw new MetaException("Unable to read from or write to hbase " + e.getMessage()); + } + } + + @Override + public boolean addPartitions(String dbName, String tblName, PartitionSpecProxy partitionSpec, + boolean ifNotExists) throws InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public Partition getPartition(String dbName, String tableName, List part_vals) throws + MetaException, NoSuchObjectException { + try { + Partition part = getHBase().getPartition(dbName, tableName, part_vals); + if (part == null) { + throw new NoSuchObjectException("Unable to find partition " + + partName(dbName, tableName, part_vals)); + } + return part; + } catch (IOException e) { + LOG.error("Unable to get partition", e); + throw new MetaException("Error reading partition " + e.getMessage()); + } + } + + @Override + public boolean doesPartitionExist(String dbName, String tableName, List part_vals) throws + MetaException, NoSuchObjectException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean dropPartition(String dbName, String tableName, List part_vals) throws + MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException { + try { + getHBase().deletePartition(dbName, tableName, part_vals); + return true; + } catch (IOException e) { + LOG.error("Unable to delete db" + e); + throw new MetaException("Unable to drop partition " + partName(dbName, tableName, part_vals)); + } + } + + @Override + public List getPartitions(String dbName, String tableName, int max) throws + MetaException, NoSuchObjectException { + try { + return getHBase().scanPartitionsInTable(dbName, tableName, max); + } catch (IOException e) { + LOG.error("Unable to get partitions", e); + throw new MetaException("Error scanning partitions"); + } + } + + @Override + public void alterTable(String dbname, String name, Table newTable) throws InvalidObjectException, + MetaException { + // HiveMetaStore above us has already confirmed the table exists, I'm not rechecking + try { + getHBase().putTable(newTable); + } catch (IOException e) { + LOG.error("Unable to alter table " + tableName(dbname, name), e); + throw new MetaException("Unable to alter table " + tableName(dbname, name)); + } + } + + @Override + public List getTables(String dbName, String pattern) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List
getTableObjectsByName(String dbname, List tableNames) throws + MetaException, UnknownDBException { + throw new UnsupportedOperationException(); + } + + @Override + public List getAllTables(String dbName) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List listTableNamesByFilter(String dbName, String filter, short max_tables) throws + MetaException, UnknownDBException { + throw new UnsupportedOperationException(); + } + + @Override + public List listPartitionNames(String db_name, String tbl_name, short max_parts) throws + MetaException { + try { + List parts = getHBase().scanPartitionsInTable(db_name, tbl_name, max_parts); + if (parts == null) return null; + List names = new ArrayList(parts.size()); + Table table = getHBase().getTable(db_name, tbl_name); + for (Partition p : parts) { + names.add(partName(table, p)); + } + return names; + } catch (IOException e) { + LOG.error("Unable to get partitions", e); + throw new MetaException("Error scanning partitions"); + } + } + + @Override + public List listPartitionNamesByFilter(String db_name, String tbl_name, String filter, + short max_parts) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public void alterPartition(String db_name, String tbl_name, List part_vals, + Partition new_part) throws InvalidObjectException, MetaException { + + } + + @Override + public void alterPartitions(String db_name, String tbl_name, List> part_vals_list, + List new_parts) throws InvalidObjectException, + MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean addIndex(Index index) throws InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public Index getIndex(String dbName, String origTableName, String indexName) throws + MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean dropIndex(String dbName, String origTableName, String indexName) throws + MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List getIndexes(String dbName, String origTableName, int max) throws MetaException { + // TODO - Index not currently supported. But I need to return an empty list or else drop + // table cores. + return new ArrayList(); + } + + @Override + public List listIndexNames(String dbName, String origTableName, short max) throws + MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public void alterIndex(String dbname, String baseTblName, String name, Index newIndex) throws + InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List getPartitionsByFilter(String dbName, String tblName, String filter, + short maxParts) throws MetaException, + NoSuchObjectException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean getPartitionsByExpr(String dbName, String tblName, byte[] expr, + String defaultPartitionName, short maxParts, + List result) throws TException { + // TODO for now just return all partitions, need to add real expression parsing later. + result.addAll(getPartitions(dbName, tblName, maxParts)); + return true; + } + + @Override + public List getPartitionsByNames(String dbName, String tblName, + List partNames) throws MetaException, + NoSuchObjectException { + List parts = new ArrayList(); + for (String partName : partNames) { + parts.add(getPartition(dbName, tblName, partNameToVals(partName))); + } + return parts; + } + + @Override + public Table markPartitionForEvent(String dbName, String tblName, Map partVals, + PartitionEventType evtType) throws MetaException, + UnknownTableException, InvalidPartitionException, UnknownPartitionException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean isPartitionMarkedForEvent(String dbName, String tblName, + Map partName, + PartitionEventType evtType) throws MetaException, + UnknownTableException, InvalidPartitionException, UnknownPartitionException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean addRole(String roleName, String ownerName) throws InvalidObjectException, + MetaException, NoSuchObjectException { + int now = (int)(System.currentTimeMillis()/1000); + Role role = new Role(roleName, now, ownerName); + try { + if (getHBase().getRole(roleName) != null) { + throw new InvalidObjectException("Role " + roleName + " already exists"); + } + getHBase().putRole(role); + return true; + } catch (IOException e) { + // TODO NOt sure what i should throw here + LOG.error("Unable to create role ", e); + throw new MetaException("Unable to read from or write to hbase " + e.getMessage()); + } + } + + @Override + public boolean removeRole(String roleName) throws MetaException, NoSuchObjectException { + try { + getHBase().deleteRole(roleName); + return true; + } catch (IOException e) { + LOG.error("Unable to delete role" + e); + throw new MetaException("Unable to drop role " + roleName); + } + } + + @Override + public boolean grantRole(Role role, String userName, PrincipalType principalType, String grantor, + PrincipalType grantorType, boolean grantOption) throws MetaException, + NoSuchObjectException, InvalidObjectException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean revokeRole(Role role, String userName, PrincipalType principalType, + boolean grantOption) throws MetaException, NoSuchObjectException { + throw new UnsupportedOperationException(); + } + + @Override + public PrincipalPrivilegeSet getUserPrivilegeSet(String userName, List groupNames) throws + InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public PrincipalPrivilegeSet getDBPrivilegeSet(String dbName, String userName, + List groupNames) throws + InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public PrincipalPrivilegeSet getTablePrivilegeSet(String dbName, String tableName, + String userName, List groupNames) throws + InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public PrincipalPrivilegeSet getPartitionPrivilegeSet(String dbName, String tableName, + String partition, String userName, + List groupNames) throws + InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public PrincipalPrivilegeSet getColumnPrivilegeSet(String dbName, String tableName, + String partitionName, String columnName, + String userName, + List groupNames) throws + InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalGlobalGrants(String principalName, + PrincipalType principalType) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalDBGrants(String principalName, PrincipalType principalType, + String dbName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listAllTableGrants(String principalName, PrincipalType principalType, + String dbName, String tableName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalPartitionGrants(String principalName, + PrincipalType principalType, + String dbName, String tableName, + String partName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalTableColumnGrants(String principalName, + PrincipalType principalType, + String dbName, String tableName, + String columnName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalPartitionColumnGrants(String principalName, + PrincipalType principalType, + String dbName, + String tableName, + String partName, + String columnName) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean grantPrivileges(PrivilegeBag privileges) throws InvalidObjectException, + MetaException, NoSuchObjectException { + // TODO + return true; + } + + @Override + public boolean revokePrivileges(PrivilegeBag privileges, boolean grantOption) throws + InvalidObjectException, MetaException, NoSuchObjectException { + throw new UnsupportedOperationException(); + } + + @Override + public Role getRole(String roleName) throws NoSuchObjectException { + try { + Role role = getHBase().getRole(roleName); + if (role == null) { + throw new NoSuchObjectException("Unable to find role " + roleName); + } + return role; + } catch (IOException e) { + LOG.error("Unable to get role", e); + throw new NoSuchObjectException("Error reading table " + e.getMessage()); + } + } + + @Override + public List listRoleNames() { + throw new UnsupportedOperationException(); + } + + @Override + public List listRoles(String principalName, PrincipalType principalType) { + throw new UnsupportedOperationException(); + } + + @Override + public List listRoleMembers(String roleName) { + throw new UnsupportedOperationException(); + } + + @Override + public Partition getPartitionWithAuth(String dbName, String tblName, List partVals, + String user_name, List group_names) throws + MetaException, NoSuchObjectException, InvalidObjectException { + Partition p = getPartition(dbName, tblName, partVals); + // TODO check that user is authorized to see these partitions + return p; + } + + @Override + public List getPartitionsWithAuth(String dbName, String tblName, short maxParts, + String userName, List groupNames) throws + MetaException, NoSuchObjectException, InvalidObjectException { + List parts = getPartitions(dbName, tblName, maxParts); + // TODO check that user is authorized; + return parts; + } + + @Override + public List listPartitionNamesPs(String db_name, String tbl_name, List part_vals, + short max_parts) throws MetaException, + NoSuchObjectException { + throw new UnsupportedOperationException(); + } + + + @Override + public List listPartitionsPsWithAuth(String db_name, String tbl_name, + List part_vals, short max_parts, + String userName, List groupNames) throws + MetaException, InvalidObjectException, NoSuchObjectException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean updateTableColumnStatistics(ColumnStatistics colStats) throws + NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + try { + getHBase().updateStatistics(colStats.getStatsDesc().getDbName(), + colStats.getStatsDesc().getTableName(), null, null, colStats); + return true; + } catch (IOException e) { + LOG.error("Unable to update column statistics", e); + throw new MetaException("Failed to update column statistics, " + e.getMessage()); + } + } + + @Override + public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj, + List partVals) throws + NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + try { + getHBase().updateStatistics(statsObj.getStatsDesc().getDbName(), + statsObj.getStatsDesc().getTableName(), statsObj.getStatsDesc().getPartName(), + partVals, statsObj); + return true; + } catch (IOException e) { + LOG.error("Unable to update column statistics", e); + throw new MetaException("Failed to update column statistics, " + e.getMessage()); + } + } + + @Override + public ColumnStatistics getTableColumnStatistics(String dbName, String tableName, + List colName) throws MetaException, + NoSuchObjectException { + try { + return getHBase().getTableStatistics(dbName, tableName, colName); + } catch (IOException e) { + LOG.error("Unable to fetch column statistics", e); + throw new MetaException("Failed to fetch column statistics, " + e.getMessage()); + } + } + + @Override + public List getPartitionColumnStatistics(String dbName, String tblName, + List partNames, + List colNames) throws + MetaException, NoSuchObjectException { + List> partVals = new ArrayList>(partNames.size()); + for (String partName : partNames) partVals.add(partNameToVals(partName)); + try { + return getHBase().getPartitionStatistics(dbName, tblName, partNames, partVals, colNames); + } catch (IOException e) { + LOG.error("Unable to fetch column statistics", e); + throw new MetaException("Failed fetching column statistics, " + e.getMessage()); + } + } + + @Override + public boolean deletePartitionColumnStatistics(String dbName, String tableName, String partName, + List partVals, String colName) throws + NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + // NOP, stats will be deleted along with the partition when it is dropped. + return true; + } + + @Override + public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName) throws + NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException { + // NOP, stats will be deleted along with the table when it is dropped. + return true; + } + + @Override + public long cleanupEvents() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean addToken(String tokenIdentifier, String delegationToken) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean removeToken(String tokenIdentifier) { + throw new UnsupportedOperationException(); + } + + @Override + public String getToken(String tokenIdentifier) { + throw new UnsupportedOperationException(); + } + + @Override + public List getAllTokenIdentifiers() { + throw new UnsupportedOperationException(); + } + + @Override + public int addMasterKey(String key) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public void updateMasterKey(Integer seqNo, String key) throws NoSuchObjectException, + MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean removeMasterKey(Integer keySeq) { + throw new UnsupportedOperationException(); + } + + @Override + public String[] getMasterKeys() { + throw new UnsupportedOperationException(); + } + + @Override + public void verifySchema() throws MetaException { + try { + getHBase().createTablesIfNotExist(); + } catch (IOException e) { + LOG.fatal("Unable to verify schema ", e); + throw new MetaException("Unable to verify schema"); + } + } + + @Override + public String getMetaStoreSchemaVersion() throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public void setMetaStoreSchemaVersion(String version, String comment) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public void dropPartitions(String dbName, String tblName, List partNames) throws + MetaException, NoSuchObjectException { + try { + for (String partName : partNames) { + dropPartition(dbName, tblName, partNameToVals(partName)); + } + } catch (Exception e) { + LOG.error("Unable to drop partitions", e); + throw new NoSuchObjectException("Failure dropping partitions, " + e.getMessage()); + } + } + + @Override + public List listPrincipalDBGrantsAll(String principalName, + PrincipalType principalType) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalTableGrantsAll(String principalName, + PrincipalType principalType) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalPartitionGrantsAll(String principalName, + PrincipalType principalType) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalTableColumnGrantsAll(String principalName, + PrincipalType principalType) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPrincipalPartitionColumnGrantsAll(String principalName, + PrincipalType principalType) { + throw new UnsupportedOperationException(); + } + + @Override + public List listGlobalGrantsAll() { + throw new UnsupportedOperationException(); + } + + @Override + public List listDBGrantsAll(String dbName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPartitionColumnGrantsAll(String dbName, String tableName, + String partitionName, + String columnName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listTableGrantsAll(String dbName, String tableName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listPartitionGrantsAll(String dbName, String tableName, + String partitionName) { + throw new UnsupportedOperationException(); + } + + @Override + public List listTableColumnGrantsAll(String dbName, String tableName, + String columnName) { + throw new UnsupportedOperationException(); + } + + @Override + public void createFunction(Function func) throws InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public void alterFunction(String dbName, String funcName, Function newFunction) throws + InvalidObjectException, MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public void dropFunction(String dbName, String funcName) throws MetaException, + NoSuchObjectException, InvalidObjectException, InvalidInputException { + throw new UnsupportedOperationException(); + } + + @Override + public Function getFunction(String dbName, String funcName) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public List getFunctions(String dbName, String pattern) throws MetaException { + throw new UnsupportedOperationException(); + } + + @Override + public AggrStats get_aggr_stats_for(String dbName, String tblName, List partNames, + List colNames) throws MetaException, + NoSuchObjectException { + throw new UnsupportedOperationException(); + } + + @Override + public NotificationEventResponse getNextNotification(NotificationEventRequest rqst) { + throw new UnsupportedOperationException(); + } + + @Override + public void addNotificationEvent(NotificationEvent event) { + throw new UnsupportedOperationException(); + } + + @Override + public void cleanNotificationEvents(int olderThan) { + throw new UnsupportedOperationException(); + } + + @Override + public CurrentNotificationEventId getCurrentNotificationEventId() { + throw new UnsupportedOperationException(); + } + + @Override + public void flushCache() { + getHBase().flushCatalogCache(); + } + + @Override + public void setConf(Configuration configuration) { + conf = configuration; + } + + @Override + public Configuration getConf() { + return conf; + + } + + private HBaseReadWrite getHBase() { + if (hbase == null) hbase = HBaseReadWrite.getInstance(conf); + return hbase; + } + + private String tableName(String dbName, String tableName) { + return dbName + "." + tableName; + } + + private String partName(String dbName, String tableName, List partVals) { + return tableName(dbName, tableName) + StringUtils.join(partVals, ':'); + } + + private String partName(Table table, Partition part) { + List partCols = table.getPartitionKeys(); + List partVals = part.getValues(); + StringBuilder builder = new StringBuilder(); + if (partCols.size() != partVals.size()) { + throw new RuntimeException("Woh bad, different number of partition cols and vals!"); + } + for (int i = 0; i < partCols.size(); i++) { + if (i != 0) builder.append('/'); + builder.append(partCols.get(i).getName()); + builder.append('='); + builder.append(partVals.get(i)); + } + return builder.toString(); + } + + private List partNameToVals(String name) { + if (name == null) return null; + List vals = new ArrayList(); + String[] kvp = name.split("/"); + for (String kv : kvp) { + vals.add(kv.substring(kv.indexOf('=') + 1)); + } + return vals; + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java new file mode 100644 index 0000000..efe98ea --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java @@ -0,0 +1,612 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.io.Writable; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Utility functions + */ +class HBaseUtils { + + final static Charset ENCODING = StandardCharsets.UTF_8; + final static char KEY_SEPARATOR = ':'; + + static final private Log LOG = LogFactory.getLog(HBaseUtils.class.getName()); + + /** + * Build a key for an object in hbase + * @param components + * @return + */ + static byte[] buildKey(String... components) { + return buildKey(false, components); + } + + static byte[] buildKeyWithTrailingSeparator(String... components) { + return buildKey(true, components); + } + + private static byte[] buildKey(boolean trailingSeparator, String... components) { + String protoKey = StringUtils.join(components, KEY_SEPARATOR); + if (trailingSeparator) protoKey += KEY_SEPARATOR; + return protoKey.getBytes(ENCODING); + } + + static byte[] serialize(Writable writable) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + writable.write(dos); + return baos.toByteArray(); + } + + static void deserialize(T instance, byte[] bytes) throws IOException { + DataInput in = new DataInputStream(new ByteArrayInputStream(bytes)); + instance.readFields(in); + } + + static void writeStr(DataOutput out, String str) throws IOException { + if (str == null || str.length() == 0) { + out.writeInt(0); + return; + } else { + out.writeInt(str.length()); + out.write(str.getBytes(), 0, str.length()); + } + } + + static String readStr(DataInput in) throws IOException { + int len = in.readInt(); + if (len == 0) { + return new String(); + } else { + byte[] b = new byte[len]; + in.readFully(b, 0, len); + return new String(b); + } + } + + static void writeByteArray(DataOutput out, byte[] b) throws IOException { + if (b == null || b.length == 0) { + out.writeInt(0); + } else { + out.writeInt(b.length); + out.write(b, 0, b.length); + } + } + + static byte[] readByteArray(DataInput in) throws IOException { + int len = in.readInt(); + if (len == 0) { + return new byte[0]; + } else { + byte[] b = new byte[len]; + in.readFully(b, 0, len); + return b; + } + } + + static void writeDecimal(DataOutput out, Decimal val) throws IOException { + HBaseUtils.writeByteArray(out, val.getUnscaled()); + out.writeShort(val.getScale()); + } + + static Decimal readDecimal(DataInput in) throws IOException { + Decimal d = new Decimal(); + d.setUnscaled(HBaseUtils.readByteArray(in)); + d.setScale(in.readShort()); + return d; + } + + static Map readStrStrMap(DataInput in) throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new HashMap(); + } else { + Map m = new HashMap(sz); + for (int i = 0; i < sz; i++) { + m.put(readStr(in), readStr(in)); + } + return m; + } + } + + + static void writeStrStrMap(DataOutput out, Map map) throws IOException { + if (map == null || map.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(map.size()); + for (Map.Entry e : map.entrySet()) { + writeStr(out, e.getKey()); + writeStr(out, e.getValue()); + } + } + } + + static Map, String> readStrListStrMap(DataInput in) throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new HashMap, String>(); + } else { + Map, String> m = new HashMap, String>(sz); + for (int i = 0; i < sz; i++) { + m.put(readStrList(in), readStr(in)); + } + return m; + } + } + + + static void writeStrListStrMap(DataOutput out, Map, String> map) throws IOException { + if (map == null || map.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(map.size()); + for (Map.Entry, String> e : map.entrySet()) { + writeStrList(out, e.getKey()); + writeStr(out, e.getValue()); + } + } + } + + static void writeStrList(DataOutput out, List list) throws IOException { + if (list == null || list.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(list.size()); + for (String val : list) { + writeStr(out, val); + } + } + } + + static List readStrList(DataInput in) throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new ArrayList(); + } else { + List list = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + list.add(readStr(in)); + } + return list; + } + } + + static void writeWritableList(DataOutput out, List list) throws IOException { + if (list == null || list.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(list.size()); + for (Writable val : list) { + val.write(out); + } + } + } + + static List readWritableList(DataInput in, Class clazz) + throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new ArrayList(); + } else { + List list = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + try { + T instance = clazz.newInstance(); + instance.readFields(in); + list.add(instance); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + return list; + } + } + + static void writeStrListList(DataOutput out, List> list) throws IOException { + if (list == null || list.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(list.size()); + for (List vals : list) { + writeStrList(out, vals); + } + } + } + + static List> readStrListList(DataInput in) throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new ArrayList>(); + } else { + List> list = new ArrayList>(sz); + for (int i = 0; i < sz; i++) { + list.add(readStrList(in)); + } + return list; + } + } + static List readFieldSchemaList(DataInput in) throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new ArrayList(); + } else { + List schemas = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + schemas.add(new FieldSchema(readStr(in), readStr(in), readStr(in))); + } + return schemas; + } + } + + static void writeFieldSchemaList(DataOutput out, List fields) throws IOException { + if (fields == null || fields.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(fields.size()); + for (FieldSchema field : fields) { + writeStr(out, field.getName()); + writeStr(out, field.getType()); + writeStr(out, field.getComment()); + } + } + } + + static List readOrderList(DataInput in) throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new ArrayList(); + } else { + List orderList = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + orderList.add(new Order(readStr(in), in.readInt())); + } + return orderList; + } + } + + static void writeOrderList(DataOutput out, List orderList) throws IOException { + if (orderList == null || orderList.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(orderList.size()); + for (Order order : orderList) { + writeStr(out, order.getCol()); + out.writeInt(order.getOrder()); + } + } + } + + static PrincipalPrivilegeSet readPrivileges(DataInput in) throws IOException { + if (in.readBoolean()) { + PrincipalPrivilegeSet pps = new PrincipalPrivilegeSet(); + pps.setUserPrivileges(readPrivilege(in)); + pps.setGroupPrivileges(readPrivilege(in)); + pps.setRolePrivileges(readPrivilege(in)); + return pps; + } else { + return new PrincipalPrivilegeSet(); + } + + } + + private static Map> readPrivilege(DataInput in) + throws IOException { + int sz = in.readInt(); + if (sz == 0) { + return new HashMap>(); + } else { + Map> priv = + new HashMap>(sz); + for (int i = 0; i < sz; i++) { + String key = readStr(in); + int numGrants = in.readInt(); + if (numGrants == 0) { + priv.put(key, new ArrayList()); + } else { + for (int j = 0; j < numGrants; j++) { + PrivilegeGrantInfo pgi = new PrivilegeGrantInfo(); + pgi.setPrivilege(readStr(in)); + pgi.setCreateTime(in.readInt()); + pgi.setGrantor(readStr(in)); + pgi.setGrantorType(PrincipalType.findByValue(in.readInt())); + pgi.setGrantOption(in.readBoolean()); + } + } + } + return priv; + } + } + + static void writePrivileges(DataOutput out, PrincipalPrivilegeSet privs) throws IOException { + if (privs == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + writePrivilege(out, privs.getUserPrivileges()); + writePrivilege(out, privs.getGroupPrivileges()); + writePrivilege(out, privs.getRolePrivileges()); + } + } + + private static void writePrivilege(DataOutput out, Map> priv) + throws IOException { + if (priv == null || priv.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(priv.size()); + for (Map.Entry> e : priv.entrySet()) { + writeStr(out, e.getKey()); + List grants = e.getValue(); + if (grants == null || grants.size() == 0) { + out.writeInt(0); + } else { + out.writeInt(grants.size()); + for (PrivilegeGrantInfo grant : grants) { + writeStr(out, grant.getPrivilege()); + out.writeInt(grant.getCreateTime()); + writeStr(out, grant.getGrantor()); + out.writeInt(grant.getGrantorType().getValue()); + out.writeBoolean(grant.isGrantOption()); + } + } + } + } + } + + static void writePrincipalType(DataOutput out, PrincipalType pt) throws IOException { + if (pt == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeInt(pt.getValue()); + } + } + + static PrincipalType readPrincipalType(DataInput in) throws IOException { + return (in.readBoolean()) ? PrincipalType.findByValue(in.readInt()) : null; + } + + static void writeSkewedInfo(DataOutput out, SkewedInfo skew) throws IOException { + if (skew == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + writeStrList(out, skew.getSkewedColNames()); + writeStrListList(out, skew.getSkewedColValues()); + writeStrListStrMap(out, skew.getSkewedColValueLocationMaps()); + } + } + + static SkewedInfo readSkewedInfo(DataInput in) throws IOException { + if (in.readBoolean()) { + SkewedInfo skew = new SkewedInfo(); + skew.setSkewedColNames(readStrList(in)); + skew.setSkewedColValues(readStrListList(in)); + skew.setSkewedColValueLocationMaps(readStrListStrMap(in)); + return skew; + } else { + return new SkewedInfo(new ArrayList(), new ArrayList>(), + new HashMap, String>()); + } + } + + static byte[] serializeStorageDescriptor(StorageDescriptor sd) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + writeFieldSchemaList(dos, sd.getCols()); + writeStr(dos, sd.getInputFormat()); + writeStr(dos, sd.getOutputFormat()); + dos.writeBoolean(sd.isCompressed()); + dos.writeInt(sd.getNumBuckets()); + writeStr(dos, sd.getSerdeInfo().getName()); + writeStr(dos, sd.getSerdeInfo().getSerializationLib()); + writeStrStrMap(dos, sd.getSerdeInfo().getParameters()); + writeStrList(dos, sd.getBucketCols()); + writeOrderList(dos, sd.getSortCols()); + writeSkewedInfo(dos, sd.getSkewedInfo()); + dos.writeBoolean(sd.isStoredAsSubDirectories()); + return baos.toByteArray(); + } + + static void deserializeStorageDescriptor(StorageDescriptor sd, byte[] bytes) + throws IOException { + DataInput in = new DataInputStream(new ByteArrayInputStream(bytes)); + sd.setCols(readFieldSchemaList(in)); + sd.setInputFormat(readStr(in)); + sd.setOutputFormat(readStr(in)); + sd.setCompressed(in.readBoolean()); + sd.setNumBuckets(in.readInt()); + SerDeInfo serde = new SerDeInfo(readStr(in), readStr(in), readStrStrMap(in)); + sd.setSerdeInfo(serde); + sd.setBucketCols(readStrList(in)); + sd.setSortCols(readOrderList(in)); + sd.setSkewedInfo(readSkewedInfo(in)); + sd.setStoredAsSubDirectories(in.readBoolean()); + } + + static byte[] serializeStatsForOneColumn(ColumnStatistics stats, ColumnStatisticsObj obj) + throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + dos.writeLong(stats.getStatsDesc().getLastAnalyzed()); + HBaseUtils.writeStr(dos, obj.getColType()); + ColumnStatisticsData colData = obj.getStatsData(); + HBaseUtils.writeStr(dos, colData.getSetField().toString()); + switch (colData.getSetField()) { + case BOOLEAN_STATS: + BooleanColumnStatsData boolData = colData.getBooleanStats(); + dos.writeLong(boolData.getNumTrues()); + dos.writeLong(boolData.getNumFalses()); + dos.writeLong(boolData.getNumNulls()); + break; + + case LONG_STATS: + LongColumnStatsData longData = colData.getLongStats(); + dos.writeLong(longData.getLowValue()); + dos.writeLong(longData.getHighValue()); + dos.writeLong(longData.getNumNulls()); + dos.writeLong(longData.getNumDVs()); + break; + + case DOUBLE_STATS: + DoubleColumnStatsData doubleData = colData.getDoubleStats(); + dos.writeDouble(doubleData.getLowValue()); + dos.writeDouble(doubleData.getHighValue()); + dos.writeLong(doubleData.getNumNulls()); + dos.writeLong(doubleData.getNumDVs()); + break; + + case STRING_STATS: + StringColumnStatsData stringData = colData.getStringStats(); + dos.writeLong(stringData.getMaxColLen()); + dos.writeDouble(stringData.getAvgColLen()); + dos.writeLong(stringData.getNumNulls()); + dos.writeLong(stringData.getNumDVs()); + break; + + case BINARY_STATS: + BinaryColumnStatsData binaryData = colData.getBinaryStats(); + dos.writeLong(binaryData.getMaxColLen()); + dos.writeDouble(binaryData.getAvgColLen()); + dos.writeLong(binaryData.getNumNulls()); + break; + + case DECIMAL_STATS: + DecimalColumnStatsData decimalData = colData.getDecimalStats(); + writeDecimal(dos, decimalData.getHighValue()); + writeDecimal(dos, decimalData.getLowValue()); + dos.writeLong(decimalData.getNumNulls()); + dos.writeLong(decimalData.getNumDVs()); + break; + + default: + throw new RuntimeException("Woh, bad. Unknown stats type!"); + } + return baos.toByteArray(); + } + + static ColumnStatisticsObj deserializeStatsForOneColumn(ColumnStatistics stats, + byte[] bytes) throws IOException { + DataInput in = new DataInputStream(new ByteArrayInputStream(bytes)); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + long lastAnalyzed = in.readLong(); + stats.getStatsDesc().setLastAnalyzed( + Math.max(lastAnalyzed, stats.getStatsDesc().getLastAnalyzed())); + obj.setColType(HBaseUtils.readStr(in)); + + ColumnStatisticsData._Fields type = ColumnStatisticsData._Fields.valueOf(HBaseUtils.readStr (in)); + ColumnStatisticsData colData = new ColumnStatisticsData(); + switch (type) { + case BOOLEAN_STATS: + BooleanColumnStatsData boolData = new BooleanColumnStatsData(); + boolData.setNumTrues(in.readLong()); + boolData.setNumFalses(in.readLong()); + boolData.setNumNulls(in.readLong()); + colData.setBooleanStats(boolData); + break; + + case LONG_STATS: + LongColumnStatsData longData = new LongColumnStatsData(); + longData.setLowValue(in.readLong()); + longData.setHighValue(in.readLong()); + longData.setNumNulls(in.readLong()); + longData.setNumDVs(in.readLong()); + colData.setLongStats(longData); + break; + + case DOUBLE_STATS: + DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); + doubleData.setLowValue(in.readDouble()); + doubleData.setHighValue(in.readDouble()); + doubleData.setNumNulls(in.readLong()); + doubleData.setNumDVs(in.readLong()); + colData.setDoubleStats(doubleData); + break; + + case STRING_STATS: + StringColumnStatsData stringData = new StringColumnStatsData(); + stringData.setMaxColLen(in.readLong()); + stringData.setAvgColLen(in.readDouble()); + stringData.setNumNulls(in.readLong()); + stringData.setNumDVs(in.readLong()); + colData.setStringStats(stringData); + break; + + case BINARY_STATS: + BinaryColumnStatsData binaryData = new BinaryColumnStatsData(); + binaryData.setMaxColLen(in.readLong()); + binaryData.setAvgColLen(in.readDouble()); + binaryData.setNumNulls(in.readLong()); + colData.setBinaryStats(binaryData); + break; + + case DECIMAL_STATS: + DecimalColumnStatsData decimalData = new DecimalColumnStatsData(); + decimalData.setHighValue(readDecimal(in)); + decimalData.setLowValue(readDecimal(in)); + decimalData.setNumNulls(in.readLong()); + decimalData.setNumDVs(in.readLong()); + colData.setDecimalStats(decimalData); + break; + + default: + throw new RuntimeException("Woh, bad. Unknown stats type!"); + } + obj.setStatsData(colData); + return obj; + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/ObjectCache.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/ObjectCache.java new file mode 100644 index 0000000..47b7be0 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/ObjectCache.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import java.util.HashMap; +import java.util.Map; + +/** + * A generic class for caching objects obtained from HBase. Currently a set of + * convenience methods around a {@link java.util.HashMap} with a max size but built + * as a separate class in case we want to switch out the implementation to something more efficient. + */ +class ObjectCache { + private Map cache; + private final int maxSize; + private Counter hits; + private Counter misses; + private Counter overflows; + + /** + * + * @param max maximum number of objects to store in the cache. When max is reached, eviction + * policy is MRU. + * @param hits counter to increment when we find an element in the cache + * @param misses counter to increment when we do not find an element in the cache + * @param overflows counter to increment when we do not have room for an element in the cache + */ + ObjectCache(int max, Counter hits, Counter misses, Counter overflows) { + maxSize = max; + cache = new HashMap(); + this.hits = hits; + this.misses = misses; + this.overflows = overflows; + } + + void put(K key, V value) { + if (cache.size() < maxSize) { + cache.put(key, value); + } else { + overflows.incr(); + } + } + + V get(K key) { + V val = cache.get(key); + if (val == null) misses.incr(); + else hits.incr(); + return val; + } + + void remove(K key) { + cache.remove(key); + } + + void flush() { + cache.clear(); + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/PartitionCache.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/PartitionCache.java new file mode 100644 index 0000000..f09e58c --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/PartitionCache.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hadoop.hive.metastore.api.Partition; + +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A cache for partition objects. This is separate from ObjectCache because we need to access it + * differently (always by table) and because we need to be able to track whether we are caching + * all of the partitions for a table or not. + */ +class PartitionCache { + // This is a trie. The key to the first map is (dbname, tablename), since partitions are + // always accessed within the context of the table they belong to. The second map maps + // partition values (not names) to partitions. + private Map, TrieValue> cache; + private final int maxSize; + private int cacheSize; + private Counter misses; + private Counter hits; + private Counter overflows; + + /** + * + * @param max maximum number of objects to store in the cache. When max is reached, eviction + * policy is MRU. + * @param hits counter to increment when we find an element in the cache + * @param misses counter to increment when we do not find an element in the cache + * @param overflows counter to increment when we do not have room for an element in the cache + */ + PartitionCache(int max, Counter hits, Counter misses, Counter overflows) { + maxSize = max; + cache = new HashMap, TrieValue>(); + cacheSize = 0; + this.hits = hits; + this.misses = misses; + this.overflows = overflows; + } + + /** + * Put a single partition into the cache + * @param dbName + * @param tableName + * @param part + */ + void put(String dbName, String tableName, Partition part) { + if (cacheSize < maxSize) { + ObjectPair key = new ObjectPair(dbName, tableName); + TrieValue entry = cache.get(key); + if (entry == null) { + entry = new TrieValue(false); + cache.put(key, entry); + } + entry.map.put(part.getValues(), part); + cacheSize++; + } else { + overflows.incr(); + } + } + + /** + * + * @param dbName + * @param tableName + * @param parts + * @param allForTable if true indicates that all partitions for this table are present + */ + void put(String dbName, String tableName, List parts, boolean allForTable) { + if (cacheSize + parts.size() < maxSize) { + ObjectPair key = new ObjectPair(dbName, tableName); + TrieValue entry = cache.get(key); + if (entry == null) { + entry = new TrieValue(allForTable); + cache.put(key, entry); + } + for (Partition part : parts) entry.map.put(part.getValues(), part); + cacheSize += parts.size(); + } else { + overflows.incr(); + } + } + + /** + * Will only return a value if all partitions for this table are in the cache. Otherwise you + * should call {@link #get} individually + * @param dbName + * @param tableName + * @return + */ + Collection getAllForTable(String dbName, String tableName) { + TrieValue entry = cache.get(new ObjectPair(dbName, tableName)); + if (entry != null && entry.hasAllPartitionsForTable) { + hits.incr(); + return entry.map.values(); + } else { + misses.incr(); + return null; + } + } + + Partition get(String dbName, String tableName, List partVals) { + TrieValue entry = cache.get(new ObjectPair(dbName, tableName)); + if (entry != null) { + hits.incr(); + return entry.map.get(partVals); + } else { + misses.incr(); + return null; + } + } + + void remove(String dbName, String tableName) { + ObjectPair key = new ObjectPair(dbName, tableName); + TrieValue entry = cache.get(key); + if (entry != null) { + cacheSize -= entry.map.size(); + cache.remove(key); + } + } + + void remove(String dbName, String tableName, List partVals) { + ObjectPair key = new ObjectPair(dbName, tableName); + TrieValue entry = cache.get(key); + if (entry != null && entry.map.remove(partVals) != null) { + cacheSize--; + entry.hasAllPartitionsForTable = false; + } + } + + void flush() { + cache.clear(); + cacheSize = 0; + } + + static class TrieValue { + boolean hasAllPartitionsForTable; + Map, Partition> map; + + TrieValue(boolean hasAll) { + hasAllPartitionsForTable = hasAll; + map = new HashMap, Partition>(); + } + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/PartitionWritable.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/PartitionWritable.java new file mode 100644 index 0000000..34881a3 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/PartitionWritable.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Wrapper for {@link org.apache.hadoop.hive.metastore.api.Table} that makes it writable + */ +class PartitionWritable implements Writable { + final Partition part; + + PartitionWritable() { + this.part = new Partition(); + } + + PartitionWritable(Partition part) { + this.part = part; + } + + @Override + public void write(DataOutput out) throws IOException { + HBaseUtils.writeStrList(out, part.getValues()); + // TODO should be able to avoid dbname and tablename since they're in the key + HBaseUtils.writeStr(out, part.getDbName()); + HBaseUtils.writeStr(out, part.getTableName()); + out.writeInt(part.getCreateTime()); + out.writeInt(part.getLastAccessTime()); + new StorageDescriptorWritable(part.getSd()).write(out); + HBaseUtils.writeStrStrMap(out, part.getParameters()); + HBaseUtils.writePrivileges(out, part.getPrivileges()); + } + + @Override + public void readFields(DataInput in) throws IOException { + part.setValues(HBaseUtils.readStrList(in)); + part.setDbName(HBaseUtils.readStr(in)); + part.setTableName(HBaseUtils.readStr(in)); + part.setCreateTime(in.readInt()); + part.setLastAccessTime(in.readInt()); + StorageDescriptorWritable sdw = new StorageDescriptorWritable(); + sdw.readFields(in); + part.setSd(sdw.sd); + part.setParameters(HBaseUtils.readStrStrMap(in)); + part.setPrivileges(HBaseUtils.readPrivileges(in)); + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/RoleWritable.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/RoleWritable.java new file mode 100644 index 0000000..1ad1fe5 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/RoleWritable.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Wrapper for {@link org.apache.hadoop.hive.metastore.api.Table} that makes it writable + */ +class RoleWritable implements Writable { + final Role role; + + RoleWritable() { + this.role = new Role(); + } + + RoleWritable(Role role) { + this.role = role; + } + + @Override + public void write(DataOutput out) throws IOException { + HBaseUtils.writeStr(out, role.getRoleName()); + out.writeInt(role.getCreateTime()); + HBaseUtils.writeStr(out, role.getOwnerName()); + } + + @Override + public void readFields(DataInput in) throws IOException { + role.setRoleName(HBaseUtils.readStr(in)); + role.setCreateTime(in.readInt()); + role.setOwnerName(HBaseUtils.readStr(in)); + } +} \ No newline at end of file diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/SharedStorageDescriptor.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/SharedStorageDescriptor.java new file mode 100644 index 0000000..12fea80 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/SharedStorageDescriptor.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SkewedInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * A {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} with most of it's content + * shared. Location and parameters are left alone, everything else is redirected to a shared + * reference in the cache. + */ +public class SharedStorageDescriptor extends StorageDescriptor { + static final private Log LOG = LogFactory.getLog(SharedStorageDescriptor.class.getName()); + StorageDescriptor shared; + + SharedStorageDescriptor() { + } + + public SharedStorageDescriptor(SharedStorageDescriptor that) { + this.setLocation(that.getLocation()); + this.setParameters(that.getParameters()); + this.shared = that.shared; + } + + void readShared(byte[] hash) throws IOException { + shared = HBaseReadWrite.getInstance().getStorageDescriptor(hash); + } + + @Override + public List getCols() { + return shared.getCols(); + } + + @Override + public int getColsSize() { + return shared.getColsSize(); + } + + @Override + public Iterator getColsIterator() { + return shared.getColsIterator(); + } + + @Override + public String getInputFormat() { + return shared.getInputFormat(); + } + + @Override + public String getOutputFormat() { + return shared.getOutputFormat(); + } + + @Override + public boolean isCompressed() { + return shared.isCompressed(); + } + + @Override + public int getNumBuckets() { + return shared.getNumBuckets(); + } + + @Override + public SerDeInfo getSerdeInfo() { + return shared.getSerdeInfo(); + } + + @Override + public List getBucketCols() { + return shared.getBucketCols(); + } + + @Override + public int getBucketColsSize() { + return shared.getBucketColsSize(); + } + + @Override + public Iterator getBucketColsIterator() { + return shared.getBucketColsIterator(); + } + + @Override + public List getSortCols() { + return shared.getSortCols(); + } + + @Override + public int getSortColsSize() { + return shared.getSortColsSize(); + } + + @Override + public Iterator getSortColsIterator() { + return shared.getSortColsIterator(); + } + + @Override + public SkewedInfo getSkewedInfo() { + return shared.getSkewedInfo(); + } + + @Override + public boolean isStoredAsSubDirectories() { + return shared.isStoredAsSubDirectories(); + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java new file mode 100644 index 0000000..289c58d --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StatsCache.java @@ -0,0 +1,317 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * Caching for stats. This implements an LRU cache. It does not remove entries explicitly as + * that is generally expensive to find all entries for a table or partition. Instead it lets them + * time out. When the cache is full a sweep is done in the background to remove expired entries. + * This cache is shared across all threads, and so operations are protected by reader or writer + * locks as appropriate. + */ +class StatsCache { + private static StatsCache self = null; + + private final long timeToLive; + private final int maxSize; + private Map cache; + private ReadWriteLock lock; + private boolean cleaning; + private Counter tableMisses; + private Counter partMisses; + private Counter tableHits; + private Counter partHits; + private Counter cleans; + private List counters; + + static synchronized StatsCache getInstance(Configuration conf) { + if (self == null) { + int totalObjectsToCache = + ((HiveConf) conf).getIntVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_SIZE); + long timeToLive = ((HiveConf) conf).getTimeVar( + HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, TimeUnit.SECONDS); + self = new StatsCache(totalObjectsToCache / 2, timeToLive); + } + return self; + } + + /** + * @param max maximum number of objects to store in the cache. When max is reached, eviction + * policy is MRU. + * @param timeToLive time (in seconds) that an entry is valid. After this time the record will + * discarded lazily + */ + private StatsCache(int max, long timeToLive) { + maxSize = max; + this.timeToLive = timeToLive * 1000; + cache = new HashMap(); + lock = new ReentrantReadWriteLock(); + cleaning = false; + counters = new ArrayList(); + tableMisses = new Counter("Stats cache table misses"); + counters.add(tableMisses); + tableHits = new Counter("Stats cache table hits"); + counters.add(tableHits); + partMisses = new Counter("Stats cache partition misses"); + counters.add(partMisses); + partHits = new Counter("Stats cache partition hits"); + counters.add(partHits); + cleans = new Counter("Stats cache cleans"); + counters.add(cleans); + } + + /** + * Add an object to the cache. + * @param dbName name of database table is in + * @param tableName name of table + * @param partName name of partition, can be null if these are table level statistics + * @param colName name of the column these statistics are for + * @param stats stats + * @param lastAnalyzed last time these stats were analyzed + */ + void put(String dbName, String tableName, String partName, String colName, + ColumnStatisticsObj stats, long lastAnalyzed) { + if (cache.size() > maxSize) clean(); + lock.writeLock().lock(); + try { + cache.put(new Key(dbName, tableName, partName, colName), new StatsInfo(stats, lastAnalyzed)); + } finally { + lock.writeLock().unlock(); + } + } + + /** + * Get table level statistics + * @param dbName name of database table is in + * @param tableName name of table + * @param colName of column to get stats for + * @return stats object for this column, or null if none cached + */ + StatsInfo getTableStatistics(String dbName, String tableName, String colName) { + lock.readLock().lock(); + try { + Key key = new Key(dbName, tableName, colName); + StatsInfo s = cache.get(key); + if (s == null) { + tableMisses.incr(); + return null; + } + if (tooLate(s)) { + remove(key); + tableMisses.incr(); + return null; + } else { + s.lastTouched = System.currentTimeMillis(); + tableHits.incr(); + return s; + } + } finally { + lock.readLock().unlock(); + } + } + + /** + * Get partition level statistics + * @param dbName name of database table is in + * @param tableName name of table + * @param partName name of this partition + * @param colName of column to get stats for + * @return stats object for this column, or null if none cached + */ + StatsInfo getPartitionStatistics(String dbName, String tableName, + String partName, String colName) { + lock.readLock().lock(); + try { + Key key = new Key(dbName, tableName, partName, colName); + StatsInfo s = cache.get(key); + if (s == null) { + partMisses.incr(); + return null; + } + if (tooLate(s)) { + remove(key); + partMisses.incr(); + return null; + } else { + s.lastTouched = System.currentTimeMillis(); + partHits.incr(); + return s; + } + } finally { + lock.readLock().unlock(); + } + } + + String[] dumpMetrics() { + String[] strs = new String[counters.size()]; + for (int i = 0; i < strs.length; i++) { + strs[i] = counters.get(i).dump(); + } + return strs; + } + + private void remove(Key key) { + lock.writeLock().lock(); + try { + cache.remove(key); + } finally { + lock.writeLock().unlock(); + } + } + + private void clean() { + // This spawns a separate thread to walk through the cache and clean. + synchronized (this) { + if (cleaning) return; + cleaning = true; + cleans.incr(); + } + try { + Thread cleaner = new Thread() { + @Override + public void run() { + // Get the read lock and then make a copy of the map. This is so we can work through it + // without having concurrent modification exceptions. Then walk through and remove things + // one at a time. + List> entries = null; + lock.readLock().lock(); + try { + entries = new ArrayList>(cache.entrySet()); + } finally { + lock.readLock().unlock(); + } + for (Map.Entry entry : entries) { + if (tooLate(entry.getValue())) { + remove(entry.getKey()); + } + // We want to make sure this runs at a low priority in the background + Thread.yield(); + } + } + }; + cleaner.setPriority(Thread.MIN_PRIORITY); + cleaner.start(); + } finally { + cleaning = false; + } + } + + private boolean tooLate(StatsInfo stats) { + return System.currentTimeMillis() - stats.lastTouched > timeToLive; + } + + private static class Key { + private final String dbName, tableName, partName, colName; + + Key(String db, String table, String col) { + this(db, table, null, col); + } + + Key(String db, String table, String part, String col) { + dbName = db; tableName = table; partName = part; colName = col; + } + + @Override + public boolean equals(Object other) { + if (other == null || !(other instanceof Key)) return false; + Key that = (Key)other; + if (partName == null) { + return that.partName == null && dbName.equals(that.dbName) && + tableName.equals(that.tableName) && colName.equals(that.colName); + } else { + return dbName.equals(that.dbName) && tableName.equals(that.tableName) && + partName.equals(that.partName) && colName.equals(that.colName); + } + } + + @Override + public int hashCode() { + int hashCode = dbName.hashCode() * 31 + tableName.hashCode(); + if (partName != null) hashCode = hashCode * 31 + partName.hashCode(); + return hashCode * 31 + colName.hashCode(); + } + } + + static class StatsInfo { + final ColumnStatisticsObj stats; + final long lastAnalyzed; + long lastTouched; + + StatsInfo(ColumnStatisticsObj obj, long la) { + stats = obj; + lastAnalyzed = la; + lastTouched = System.currentTimeMillis(); + } + } + + + /** + * This returns a stats cache that will store nothing and return nothing, useful + * for unit testing when you don't want the cache in your way. + * @return + */ + @VisibleForTesting + static StatsCache getBogusStatsCache() { + return new StatsCache(0, 0) { + @Override + void put(String dbName, String tableName, String partName, String colName, + ColumnStatisticsObj stats, long lastAnalyzed) { + } + + @Override + StatsInfo getTableStatistics(String dbName, String tableName, String colName) { + return null; + } + + @Override + StatsInfo getPartitionStatistics(String dbName, String tableName, + String partName, String colName) { + return null; + } + }; + } + + /** + * Go through and make all the entries in the cache old so they will time out when requested + */ + @VisibleForTesting + void makeWayOld() { + for (StatsInfo stats : cache.values()) { + stats.lastTouched = 1; + } + } + + @VisibleForTesting + void clear() { + cache.clear(); + } +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StorageDescriptorWritable.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StorageDescriptorWritable.java new file mode 100644 index 0000000..94a8242 --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/StorageDescriptorWritable.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Wrapper for {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor} to make it writable. + */ +public class StorageDescriptorWritable implements Writable { + static final private Log LOG = LogFactory.getLog(StorageDescriptorWritable.class.getName()); + final StorageDescriptor sd; + + StorageDescriptorWritable() { + sd = new SharedStorageDescriptor(); + } + + StorageDescriptorWritable(StorageDescriptor sd) { + this.sd = sd; + } + + @Override + public void write(DataOutput out) throws IOException { + HBaseUtils.writeStr(out, sd.getLocation()); + HBaseUtils.writeStrStrMap(out, sd.getParameters()); + byte[] hash = HBaseReadWrite.getInstance().putStorageDescriptor(sd); + out.writeInt(hash.length); + out.write(hash); + } + + @Override + public void readFields(DataInput in) throws IOException { + sd.setLocation(HBaseUtils.readStr(in)); + sd.setParameters(HBaseUtils.readStrStrMap(in)); + int len = in.readInt(); + byte[] hash = new byte[len]; + in.readFully(hash, 0, len); + ((SharedStorageDescriptor)sd).readShared(hash); + } + + +} diff --git metastore/src/java/org/apache/hadoop/hive/metastore/hbase/TableWritable.java metastore/src/java/org/apache/hadoop/hive/metastore/hbase/TableWritable.java new file mode 100644 index 0000000..71df26b --- /dev/null +++ metastore/src/java/org/apache/hadoop/hive/metastore/hbase/TableWritable.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Wrapper for {@link org.apache.hadoop.hive.metastore.api.Table} that makes it writable + */ +class TableWritable implements Writable { + static final private Log LOG = LogFactory.getLog(TableWritable.class.getName()); + final Table table; + + TableWritable() { + this.table = new Table(); + } + + TableWritable(Table table) { + this.table = table; + } + + @Override + public void write(DataOutput out) throws IOException { + HBaseUtils.writeStr(out, table.getTableName()); + HBaseUtils.writeStr(out, table.getDbName()); + HBaseUtils.writeStr(out, table.getOwner()); + out.writeInt(table.getCreateTime()); + out.writeInt(table.getLastAccessTime()); + out.writeInt(table.getRetention()); + new StorageDescriptorWritable(table.getSd()).write(out); + HBaseUtils.writeFieldSchemaList(out, table.getPartitionKeys()); + HBaseUtils.writeStrStrMap(out, table.getParameters()); + HBaseUtils.writeStr(out, table.getViewOriginalText()); + HBaseUtils.writeStr(out, table.getViewExpandedText()); + HBaseUtils.writeStr(out, table.getTableType()); + HBaseUtils.writePrivileges(out, table.getPrivileges()); + out.writeBoolean(table.isTemporary()); + } + + @Override + public void readFields(DataInput in) throws IOException { + table.setTableName(HBaseUtils.readStr(in)); + table.setDbName(HBaseUtils.readStr(in)); + table.setOwner(HBaseUtils.readStr(in)); + table.setCreateTime(in.readInt()); + table.setLastAccessTime(in.readInt()); + table.setRetention(in.readInt()); + StorageDescriptorWritable sdw = new StorageDescriptorWritable(); + sdw.readFields(in); + table.setSd(sdw.sd); + table.setPartitionKeys(HBaseUtils.readFieldSchemaList(in)); + table.setParameters(HBaseUtils.readStrStrMap(in)); + table.setViewOriginalText(HBaseUtils.readStr(in)); + table.setViewExpandedText(HBaseUtils.readStr(in)); + table.setTableType(HBaseUtils.readStr(in)); + table.setPrivileges(HBaseUtils.readPrivileges(in)); + table.setTemporary(in.readBoolean()); + } +} diff --git metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index cf068e4..0b26e5f 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -751,5 +751,8 @@ public CurrentNotificationEventId getCurrentNotificationEventId() { return objectStore.getCurrentNotificationEventId(); } - + @Override + public void flushCache() { + objectStore.flushCache(); + } } diff --git metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index 5f28d73..44e69f6 100644 --- metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -769,6 +769,10 @@ public CurrentNotificationEventId getCurrentNotificationEventId() { } + public void flushCache() { + + } + } diff --git metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java new file mode 100644 index 0000000..9da2a10 --- /dev/null +++ metastore/src/test/org/apache/hadoop/hive/metastore/hbase/MockUtils.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; + +/** + * Mock utilities for HBaseStore testing + */ +public class MockUtils { + + static HBaseStore init(Configuration conf, HTableInterface htable, + final SortedMap rows) throws IOException { + Mockito.when(htable.get(Mockito.any(Get.class))).thenAnswer(new Answer() { + @Override + public Result answer(InvocationOnMock invocation) throws Throwable { + Get get = (Get) invocation.getArguments()[0]; + Cell cell = rows.get(new String(get.getRow())); + if (cell == null) { + return new Result(); + } else { + return Result.create(new Cell[]{cell}); + } + } + }); + + Mockito.when(htable.getScanner(Mockito.any(Scan.class))).thenAnswer(new Answer() { + @Override + public ResultScanner answer(InvocationOnMock invocation) throws Throwable { + Scan scan = (Scan)invocation.getArguments()[0]; + List results = new ArrayList(); + SortedMap sub = + rows.subMap(new String(scan.getStartRow()), new String(scan.getStopRow())); + for (Map.Entry e : sub.entrySet()) { + results.add(Result.create(new Cell[]{e.getValue()})); + } + + final Iterator iter = results.iterator(); + + return new ResultScanner() { + @Override + public Result next() throws IOException { + return null; + } + + @Override + public Result[] next(int nbRows) throws IOException { + return new Result[0]; + } + + @Override + public void close() { + + } + + @Override + public Iterator iterator() { + return iter; + } + }; + } + }); + + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + Put put = (Put)invocation.getArguments()[0]; + rows.put(new String(put.getRow()), put.getFamilyCellMap().firstEntry().getValue().get(0)); + return null; + } + }).when(htable).put(Mockito.any(Put.class)); + + Mockito.when(htable.checkAndPut(Mockito.any(byte[].class), Mockito.any(byte[].class), + Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(Put.class))).thenAnswer( + new Answer() { + + @Override + public Boolean answer(InvocationOnMock invocation) throws Throwable { + // Always say it succeeded and overwrite + Put put = (Put)invocation.getArguments()[4]; + rows.put(new String(put.getRow()), + put.getFamilyCellMap().firstEntry().getValue().get(0)); + return true; + } + }); + + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + Delete del = (Delete)invocation.getArguments()[0]; + rows.remove(new String(del.getRow())); + return null; + } + }).when(htable).delete(Mockito.any(Delete.class)); + + Mockito.when(htable.checkAndDelete(Mockito.any(byte[].class), Mockito.any(byte[].class), + Mockito.any(byte[].class), Mockito.any(byte[].class), Mockito.any(Delete.class))).thenAnswer( + new Answer() { + + @Override + public Boolean answer(InvocationOnMock invocation) throws Throwable { + // Always say it succeeded + Delete del = (Delete)invocation.getArguments()[4]; + rows.remove(new String(del.getRow())); + return true; + } + }); + + // Mock connection + HConnection hconn = Mockito.mock(HConnection.class); + Mockito.when(hconn.getTable(Mockito.anyString())).thenReturn(htable); + HBaseReadWrite hbase = HBaseReadWrite.getInstance(conf); + hbase.setConnection(hconn); + HBaseStore store = new HBaseStore(); + store.setConf(conf); + return store; + } +} diff --git metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java new file mode 100644 index 0000000..76f0791 --- /dev/null +++ metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStore.java @@ -0,0 +1,690 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * + */ +public class TestHBaseStore { + private static final Log LOG = LogFactory.getLog(TestHBaseStore.class.getName()); + static Map emptyParameters = new HashMap(); + + @Rule public ExpectedException thrown = ExpectedException.none(); + @Mock HTableInterface htable; + SortedMap rows = new TreeMap(); + HBaseStore store; + + @Before + public void init() throws IOException { + MockitoAnnotations.initMocks(this); + HiveConf conf = new HiveConf(); + conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); + store = MockUtils.init(conf, htable, rows); + } + + @Test + public void createDb() throws Exception { + String dbname = "mydb"; + Database db = new Database(dbname, "no description", "file:///tmp", emptyParameters); + store.createDatabase(db); + + Database d = store.getDatabase(dbname); + Assert.assertEquals(dbname, d.getName()); + Assert.assertEquals("no description", d.getDescription()); + Assert.assertEquals("file:///tmp", d.getLocationUri()); + } + + @Test + public void dropDb() throws Exception { + String dbname = "anotherdb"; + Database db = new Database(dbname, "no description", "file:///tmp", emptyParameters); + store.createDatabase(db); + + Database d = store.getDatabase(dbname); + Assert.assertNotNull(d); + + store.dropDatabase(dbname); + thrown.expect(NoSuchObjectException.class); + store.getDatabase(dbname); + } + + @Test + public void createTable() throws Exception { + String tableName = "mytable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + Table t = store.getTable("default", tableName); + Assert.assertEquals(1, t.getSd().getColsSize()); + Assert.assertEquals("col1", t.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", t.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", t.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp", t.getSd().getLocation()); + Assert.assertEquals("input", t.getSd().getInputFormat()); + Assert.assertEquals("output", t.getSd().getOutputFormat()); + Assert.assertEquals("me", t.getOwner()); + Assert.assertEquals("default", t.getDbName()); + Assert.assertEquals(tableName, t.getTableName()); + } + + @Test + public void alterTable() throws Exception { + String tableName = "alttable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + startTime += 10; + table.setLastAccessTime(startTime); + store.alterTable("default", tableName, table); + + Table t = store.getTable("default", tableName); + Assert.assertEquals(1, t.getSd().getColsSize()); + Assert.assertEquals("col1", t.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", t.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", t.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp", t.getSd().getLocation()); + Assert.assertEquals("input", t.getSd().getInputFormat()); + Assert.assertEquals("output", t.getSd().getOutputFormat()); + Assert.assertEquals("me", t.getOwner()); + Assert.assertEquals("default", t.getDbName()); + Assert.assertEquals(tableName, t.getTableName()); + Assert.assertEquals(startTime, t.getLastAccessTime()); + } + + @Test + public void dropTable() throws Exception { + String tableName = "dtable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + Table t = store.getTable("default", tableName); + Assert.assertNotNull(t); + + store.dropTable("default", tableName); + Assert.assertNull(store.getTable("default", tableName)); + } + + @Test + public void createPartition() throws Exception { + String dbName = "default"; + String tableName = "myparttable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List vals = Arrays.asList("fred"); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=fred"); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Partition p = store.getPartition(dbName, tableName, vals); + Assert.assertEquals(1, p.getSd().getColsSize()); + Assert.assertEquals("col1", p.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", p.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", p.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", p.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", p.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp/pc=fred", p.getSd().getLocation()); + Assert.assertEquals("input", p.getSd().getInputFormat()); + Assert.assertEquals("output", p.getSd().getOutputFormat()); + Assert.assertEquals(dbName, p.getDbName()); + Assert.assertEquals(tableName, p.getTableName()); + Assert.assertEquals(1, p.getValuesSize()); + Assert.assertEquals("fred", p.getValues().get(0)); + } + + @Test + public void getPartitions() throws Exception { + String dbName = "default"; + String tableName = "manyParts"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List partVals = Arrays.asList("alan", "bob", "carl", "doug", "ethan"); + for (String val : partVals) { + List vals = new ArrayList(); + vals.add(val); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=" + val); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Partition p = store.getPartition(dbName, tableName, vals); + Assert.assertEquals("file:/tmp/pc=" + val, p.getSd().getLocation()); + } + + List parts = store.getPartitions(dbName, tableName, -1); + Assert.assertEquals(5, parts.size()); + String[] pv = new String[5]; + for (int i = 0; i < 5; i++) pv[i] = parts.get(i).getValues().get(0); + Arrays.sort(pv); + Assert.assertArrayEquals(pv, partVals.toArray(new String[5])); + } + + @Test + public void listGetDropPartitionNames() throws Exception { + String dbName = "default"; + String tableName = "listParts"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + partCols.add(new FieldSchema("region", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + String[][] partVals = new String[][]{{"today", "north america"}, {"tomorrow", "europe"}}; + for (String[] pv : partVals) { + List vals = new ArrayList(); + for (String v : pv) vals.add(v); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=" + pv[0] + "/region=" + pv[1]); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + } + + List names = store.listPartitionNames(dbName, tableName, (short) -1); + Assert.assertEquals(2, names.size()); + String[] resultNames = names.toArray(new String[names.size()]); + Arrays.sort(resultNames); + Assert.assertArrayEquals(resultNames, new String[]{"pc=today/region=north america", + "pc=tomorrow/region=europe"}); + + List parts = store.getPartitionsByNames(dbName, tableName, names); + Assert.assertArrayEquals(partVals[0], parts.get(0).getValues().toArray(new String[2])); + Assert.assertArrayEquals(partVals[1], parts.get(1).getValues().toArray(new String[2])); + + store.dropPartitions(dbName, tableName, names); + List afterDropParts = store.getPartitions(dbName, tableName, -1); + Assert.assertEquals(0, afterDropParts.size()); + } + + + @Test + public void dropPartition() throws Exception { + String dbName = "default"; + String tableName = "myparttable2"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List vals = Arrays.asList("fred"); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=fred"); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Assert.assertNotNull(store.getPartition(dbName, tableName, vals)); + store.dropPartition(dbName, tableName, vals); + thrown.expect(NoSuchObjectException.class); + store.getPartition(dbName, tableName, vals); + } + + @Test + public void createRole() throws Exception { + int now = (int)System.currentTimeMillis(); + String roleName = "myrole"; + store.addRole(roleName, "me"); + + Role r = store.getRole(roleName); + Assert.assertEquals(roleName, r.getRoleName()); + Assert.assertEquals("me", r.getOwnerName()); + Assert.assertTrue(now <= r.getCreateTime()); + } + + @Test + public void dropRole() throws Exception { + String roleName = "anotherrole"; + store.addRole(roleName, "me"); + + Role role = store.getRole(roleName); + Assert.assertNotNull(role); + + store.removeRole(roleName); + thrown.expect(NoSuchObjectException.class); + store.getRole(roleName); + } + + // Due to the way our mock stuff works, we can only insert one column at a time, so we'll test + // each stat type separately. We'll test them together in hte integration tests. + @Test + public void booleanTableStatistics() throws Exception { + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String boolcol = "boolcol"; + long trues = 37; + long falses = 12; + long booleanNulls = 2; + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(boolcol); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + BooleanColumnStatsData boolData = new BooleanColumnStatsData(); + boolData.setNumTrues(trues); + boolData.setNumFalses(falses); + boolData.setNumNulls(booleanNulls); + data.setBooleanStats(boolData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(1, stats.getStatsObjSize()); + ColumnStatisticsData colData = obj.getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField()); + boolData = colData.getBooleanStats(); + Assert.assertEquals(trues, boolData.getNumTrues()); + Assert.assertEquals(falses, boolData.getNumFalses()); + Assert.assertEquals(booleanNulls, boolData.getNumNulls()); + } + + @Test + public void longTableStatistics() throws Exception { + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String longcol = "longcol"; + long longHigh = 120938479124L; + long longLow = -12341243213412124L; + long longNulls = 23; + long longDVs = 213L; + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(longcol); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData longData = new LongColumnStatsData(); + longData.setHighValue(longHigh); + longData.setLowValue(longLow); + longData.setNumNulls(longNulls); + longData.setNumDVs(longDVs); + data.setLongStats(longData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(longcol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(1, stats.getStatsObjSize()); + ColumnStatisticsData colData = obj.getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, colData.getSetField()); + longData = colData.getLongStats(); + Assert.assertEquals(longHigh, longData.getHighValue()); + Assert.assertEquals(longLow, longData.getLowValue()); + Assert.assertEquals(longNulls, longData.getNumNulls()); + Assert.assertEquals(longDVs, longData.getNumDVs()); + } + + @Test + public void doubleTableStatistics() throws Exception { + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String doublecol = "doublecol"; + double doubleHigh = 123423.23423; + double doubleLow = 0.00001234233; + long doubleNulls = 92; + long doubleDVs = 1234123421L; + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(doublecol); + obj.setColType("double"); + ColumnStatisticsData data = new ColumnStatisticsData(); + DoubleColumnStatsData doubleData = new DoubleColumnStatsData(); + doubleData.setHighValue(doubleHigh); + doubleData.setLowValue(doubleLow); + doubleData.setNumNulls(doubleNulls); + doubleData.setNumDVs(doubleDVs); + data.setDoubleStats(doubleData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(doublecol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(1, stats.getStatsObjSize()); + ColumnStatisticsData colData = obj.getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, colData.getSetField()); + doubleData = colData.getDoubleStats(); + Assert.assertEquals(doubleHigh, doubleData.getHighValue(), 0.01); + Assert.assertEquals(doubleLow, doubleData.getLowValue(), 0.01); + Assert.assertEquals(doubleNulls, doubleData.getNumNulls()); + Assert.assertEquals(doubleDVs, doubleData.getNumDVs()); + } + + @Test + public void stringTableStatistics() throws Exception { + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String stringcol = "stringcol"; + long strMaxLen = 1234; + double strAvgLen = 32.3; + long strNulls = 987; + long strDVs = 906; + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(stringcol); + obj.setColType("string"); + ColumnStatisticsData data = new ColumnStatisticsData(); + StringColumnStatsData strData = new StringColumnStatsData(); + strData.setMaxColLen(strMaxLen); + strData.setAvgColLen(strAvgLen); + strData.setNumNulls(strNulls); + strData.setNumDVs(strDVs); + data.setStringStats(strData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(stringcol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(1, stats.getStatsObjSize()); + ColumnStatisticsData colData = obj.getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, colData.getSetField()); + strData = colData.getStringStats(); + Assert.assertEquals(strMaxLen, strData.getMaxColLen()); + Assert.assertEquals(strAvgLen, strData.getAvgColLen(), 0.01); + Assert.assertEquals(strNulls, strData.getNumNulls()); + Assert.assertEquals(strDVs, strData.getNumDVs()); + } + + @Test + public void binaryTableStatistics() throws Exception { + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String binarycol = "bincol"; + long binMaxLen = 123412987L; + double binAvgLen = 76.98; + long binNulls = 976998797L; + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(binarycol); + obj.setColType("binary"); + ColumnStatisticsData data = new ColumnStatisticsData(); + BinaryColumnStatsData binData = new BinaryColumnStatsData(); + binData.setMaxColLen(binMaxLen); + binData.setAvgColLen(binAvgLen); + binData.setNumNulls(binNulls); + data.setBinaryStats(binData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(binarycol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(1, stats.getStatsObjSize()); + ColumnStatisticsData colData = obj.getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.BINARY_STATS, colData.getSetField()); + binData = colData.getBinaryStats(); + Assert.assertEquals(binMaxLen, binData.getMaxColLen()); + Assert.assertEquals(binAvgLen, binData.getAvgColLen(), 0.01); + Assert.assertEquals(binNulls, binData.getNumNulls()); + } + + @Test + public void decimalTableStatistics() throws Exception { + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String decimalcol = "deccol"; + Decimal decHigh = new Decimal(); + decHigh.setScale((short)3); + decHigh.setUnscaled("3876".getBytes()); // I have not clue how this is translated, but it + // doesn't matter + Decimal decLow = new Decimal(); + decLow.setScale((short)3); + decLow.setUnscaled("38".getBytes()); + long decNulls = 13; + long decDVs = 923947293L; + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(decimalcol); + obj.setColType("decimal(5,3)"); + ColumnStatisticsData data = new ColumnStatisticsData(); + DecimalColumnStatsData decData = new DecimalColumnStatsData(); + decData.setHighValue(decHigh); + decData.setLowValue(decLow); + decData.setNumNulls(decNulls); + decData.setNumDVs(decDVs); + data.setDecimalStats(decData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(decimalcol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(1, stats.getStatsObjSize()); + ColumnStatisticsData colData = obj.getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, colData.getSetField()); + decData = colData.getDecimalStats(); + Assert.assertEquals(decHigh, decData.getHighValue()); + Assert.assertEquals(decLow, decData.getLowValue()); + Assert.assertEquals(decNulls, decData.getNumNulls()); + Assert.assertEquals(decDVs, decData.getNumDVs()); + } +} diff --git metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreCached.java metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreCached.java new file mode 100644 index 0000000..7ccfdb4 --- /dev/null +++ metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreCached.java @@ -0,0 +1,378 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Role; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * + */ +public class TestHBaseStoreCached { + private static final Log LOG = LogFactory.getLog(TestHBaseStoreCached.class.getName()); + static Map emptyParameters = new HashMap(); + + @Rule public ExpectedException thrown = ExpectedException.none(); + @Mock HTableInterface htable; + SortedMap rows = new TreeMap(); + HBaseStore store; + + @Before + public void init() throws IOException { + MockitoAnnotations.initMocks(this); + HiveConf conf = new HiveConf(); + store = MockUtils.init(conf, htable, rows); + } + + @Test + public void createTable() throws Exception { + String tableName = "mytable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + Table t = store.getTable("default", tableName); + Assert.assertEquals(1, t.getSd().getColsSize()); + Assert.assertEquals("col1", t.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", t.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", t.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp", t.getSd().getLocation()); + Assert.assertEquals("input", t.getSd().getInputFormat()); + Assert.assertEquals("output", t.getSd().getOutputFormat()); + Assert.assertEquals("me", t.getOwner()); + Assert.assertEquals("default", t.getDbName()); + Assert.assertEquals(tableName, t.getTableName()); + } + + @Test + public void alterTable() throws Exception { + String tableName = "alttable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + startTime += 10; + table.setLastAccessTime(startTime); + store.alterTable("default", tableName, table); + + Table t = store.getTable("default", tableName); + Assert.assertEquals(1, t.getSd().getColsSize()); + Assert.assertEquals("col1", t.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", t.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", t.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp", t.getSd().getLocation()); + Assert.assertEquals("input", t.getSd().getInputFormat()); + Assert.assertEquals("output", t.getSd().getOutputFormat()); + Assert.assertEquals("me", t.getOwner()); + Assert.assertEquals("default", t.getDbName()); + Assert.assertEquals(tableName, t.getTableName()); + Assert.assertEquals(startTime, t.getLastAccessTime()); + } + + @Test + public void dropTable() throws Exception { + String tableName = "dtable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, + emptyParameters, null, null, null); + store.createTable(table); + + Table t = store.getTable("default", tableName); + Assert.assertNotNull(t); + + store.dropTable("default", tableName); + Assert.assertNull(store.getTable("default", tableName)); + } + + @Test + public void createPartition() throws Exception { + String dbName = "default"; + String tableName = "myparttable"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List vals = Arrays.asList("fred"); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=fred"); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Partition p = store.getPartition(dbName, tableName, vals); + Assert.assertEquals(1, p.getSd().getColsSize()); + Assert.assertEquals("col1", p.getSd().getCols().get(0).getName()); + Assert.assertEquals("int", p.getSd().getCols().get(0).getType()); + Assert.assertEquals("nocomment", p.getSd().getCols().get(0).getComment()); + Assert.assertEquals("serde", p.getSd().getSerdeInfo().getName()); + Assert.assertEquals("seriallib", p.getSd().getSerdeInfo().getSerializationLib()); + Assert.assertEquals("file:/tmp/pc=fred", p.getSd().getLocation()); + Assert.assertEquals("input", p.getSd().getInputFormat()); + Assert.assertEquals("output", p.getSd().getOutputFormat()); + Assert.assertEquals(dbName, p.getDbName()); + Assert.assertEquals(tableName, p.getTableName()); + Assert.assertEquals(1, p.getValuesSize()); + Assert.assertEquals("fred", p.getValues().get(0)); + } + + @Test + public void getPartitions() throws Exception { + String dbName = "default"; + String tableName = "manyParts"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List partVals = Arrays.asList("alan", "bob", "carl", "doug", "ethan"); + for (String val : partVals) { + List vals = new ArrayList(); + vals.add(val); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=" + val); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Partition p = store.getPartition(dbName, tableName, vals); + Assert.assertEquals("file:/tmp/pc=" + val, p.getSd().getLocation()); + } + + List parts = store.getPartitions(dbName, tableName, -1); + Assert.assertEquals(5, parts.size()); + String[] pv = new String[5]; + for (int i = 0; i < 5; i++) pv[i] = parts.get(i).getValues().get(0); + Arrays.sort(pv); + Assert.assertArrayEquals(pv, partVals.toArray(new String[5])); + } + + @Test + public void listGetDropPartitionNames() throws Exception { + String dbName = "default"; + String tableName = "listParts"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + partCols.add(new FieldSchema("region", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + String[][] partVals = new String[][]{{"today", "north america"}, {"tomorrow", "europe"}}; + for (String[] pv : partVals) { + List vals = new ArrayList(); + for (String v : pv) vals.add(v); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=" + pv[0] + "/region=" + pv[1]); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + } + + List names = store.listPartitionNames(dbName, tableName, (short) -1); + Assert.assertEquals(2, names.size()); + String[] resultNames = names.toArray(new String[names.size()]); + Arrays.sort(resultNames); + Assert.assertArrayEquals(resultNames, new String[]{"pc=today/region=north america", + "pc=tomorrow/region=europe"}); + + List parts = store.getPartitionsByNames(dbName, tableName, names); + Assert.assertArrayEquals(partVals[0], parts.get(0).getValues().toArray(new String[2])); + Assert.assertArrayEquals(partVals[1], parts.get(1).getValues().toArray(new String[2])); + + store.dropPartitions(dbName, tableName, names); + List afterDropParts = store.getPartitions(dbName, tableName, -1); + Assert.assertEquals(0, afterDropParts.size()); + } + + + @Test + public void dropPartition() throws Exception { + String dbName = "default"; + String tableName = "myparttable2"; + int startTime = (int)(System.currentTimeMillis() / 1000); + List cols = new ArrayList(); + cols.add(new FieldSchema("col1", "int", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, emptyParameters); + List partCols = new ArrayList(); + partCols.add(new FieldSchema("pc", "string", "")); + Table table = new Table(tableName, dbName, "me", startTime, startTime, 0, sd, partCols, + emptyParameters, null, null, null); + store.createTable(table); + + List vals = Arrays.asList("fred"); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/pc=fred"); + Partition part = new Partition(vals, dbName, tableName, startTime, startTime, psd, + emptyParameters); + store.addPartition(part); + + Assert.assertNotNull(store.getPartition(dbName, tableName, vals)); + store.dropPartition(dbName, tableName, vals); + thrown.expect(NoSuchObjectException.class); + store.getPartition(dbName, tableName, vals); + } + + // Due to the way our mock stuff works, we can only insert one column at a time, so we'll test + // each stat type separately. We'll test them together in hte integration tests. + @Test + public void booleanTableStatistics() throws Exception { + // Because of the way our mock implementation works we actually need to not create the table + // before we set statistics on it. + long now = System.currentTimeMillis(); + String dbname = "default"; + String tableName = "statstable"; + String boolcol = "boolcol"; + long trues = 37; + long falses = 12; + long booleanNulls = 2; + + ColumnStatistics stats = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(); + desc.setLastAnalyzed(now); + desc.setDbName(dbname); + desc.setTableName(tableName); + desc.setIsTblLevel(true); + stats.setStatsDesc(desc); + + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName(boolcol); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + BooleanColumnStatsData boolData = new BooleanColumnStatsData(); + boolData.setNumTrues(trues); + boolData.setNumFalses(falses); + boolData.setNumNulls(booleanNulls); + data.setBooleanStats(boolData); + obj.setStatsData(data); + stats.addToStatsObj(obj); + + store.updateTableColumnStatistics(stats); + + stats = store.getTableColumnStatistics(dbname, tableName, Arrays.asList(boolcol)); + Assert.assertEquals(now, stats.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbname, stats.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, stats.getStatsDesc().getTableName()); + Assert.assertTrue(stats.getStatsDesc().isIsTblLevel()); + + Assert.assertEquals(1, stats.getStatsObjSize()); + ColumnStatisticsData colData = obj.getStatsData(); + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, colData.getSetField()); + boolData = colData.getBooleanStats(); + Assert.assertEquals(trues, boolData.getNumTrues()); + Assert.assertEquals(falses, boolData.getNumFalses()); + Assert.assertEquals(booleanNulls, boolData.getNumNulls()); + } + + +} diff --git metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestStatsCache.java metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestStatsCache.java new file mode 100644 index 0000000..1830360 --- /dev/null +++ metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestStatsCache.java @@ -0,0 +1,341 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +public class TestStatsCache { + private static final Log LOG = LogFactory.getLog(TestStatsCache.class.getName()); + + @Mock HTableInterface htable; + static Put[] puts = new Put[1]; + HBaseReadWrite hrw; + + @Before + public void before() throws IOException { + MockitoAnnotations.initMocks(this); + + // For reasons I don't understand we have to do the mockito setup here in before, so we allow + // each method to place one put in puts[], and then we return that. + Mockito.when(htable.get(Mockito.any(Get.class))).thenAnswer(new Answer() { + @Override + public Result answer(InvocationOnMock invocation) throws Throwable { + List cells = new ArrayList(); + if (puts[0] == null) return null; + Iterator>> iter = + puts[0].getFamilyCellMap().entrySet().iterator(); + for (Cell cell : puts[0].getFamilyCellMap().firstEntry().getValue()) { + cells.add(cell); + } + return Result.create(cells); + } + }); + + HConnection hconn = Mockito.mock(HConnection.class); + Mockito.when(hconn.getTable(Mockito.anyString())).thenReturn(htable); + HiveConf conf = new HiveConf(); + hrw = HBaseReadWrite.getInstance(conf); + hrw.setConnection(hconn); + StatsCache.getInstance(conf).clear(); + puts[0] = null; + } + + @Test + public void tableAllHit() throws IOException { + String dbName = "default"; + String tableName = "mytable"; + long now = System.currentTimeMillis(); + + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tableName); + desc.setLastAnalyzed(now); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setBooleanStats(new BooleanColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + obj = new ColumnStatisticsObj(); + obj.setColName("col2"); + obj.setColType("long"); + data = new ColumnStatisticsData(); + data.setLongStats(new LongColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + hrw.updateStatistics(dbName, tableName, null, null, cs); + + cs = new ColumnStatistics(); + desc = new ColumnStatisticsDesc(true, dbName, tableName); + desc.setLastAnalyzed(now); + cs.setStatsDesc(desc); + obj = new ColumnStatisticsObj(); + obj.setColName("col3"); + obj.setColType("double"); + data = new ColumnStatisticsData(); + data.setDoubleStats(new DoubleColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + hrw.updateStatistics(dbName, tableName, null, null, cs); + + // Now, ask for all 3 of these. We should hit all on the cache. We'll know if we don't + // because we've mocked hbase and it will return null on the get. + cs = hrw.getTableStatistics(dbName, tableName, Arrays.asList("col1", "col2", "col3")); + + Assert.assertEquals(now, cs.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbName, cs.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, cs.getStatsDesc().getTableName()); + Assert.assertTrue(cs.getStatsDesc().isIsTblLevel()); + + // There's no need to check every last field in each obj, as the objects aren't de/serialized + // in the cache. Just make sure we found the objects we expected. + Assert.assertEquals(3, cs.getStatsObjSize()); + for (ColumnStatisticsObj csobj : cs.getStatsObj()) { + if (csobj.getColName().equals("col1")) { + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, + csobj.getStatsData().getSetField()); + } else if (csobj.getColName().equals("col2")) { + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, + csobj.getStatsData().getSetField()); + } else if (csobj.getColName().equals("col3")) { + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, + csobj.getStatsData().getSetField()); + } else { + Assert.fail("Unknown column"); + } + } + } + + @Test + public void tableAllMiss() throws IOException { + String dbName = "default"; + String tableName = "misstable"; + long now = System.currentTimeMillis(); + + // Build a column stats object to return from mockito hbase + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tableName); + desc.setLastAnalyzed(now); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setBooleanStats(new BooleanColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + byte[] serialized = HBaseUtils.serializeStatsForOneColumn(cs, obj); + + // The easiest way to get this into hbase format is to shove it into a put and then pull out + // the result for mockito to return. + Put put = new Put(HBaseUtils.buildKey(dbName, tableName)); + put.add(HBaseReadWrite.STATS_CF, "col1".getBytes(HBaseUtils.ENCODING), serialized); + + obj = new ColumnStatisticsObj(); + obj.setColName("col2"); + obj.setColType("long"); + data = new ColumnStatisticsData(); + data.setLongStats(new LongColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + serialized = HBaseUtils.serializeStatsForOneColumn(cs, obj); + put.add(HBaseReadWrite.STATS_CF, "col2".getBytes(HBaseUtils.ENCODING), serialized); + puts[0] = put; + + // Now, ask for all 3 of these. We should miss all on the cache. + cs = hrw.getTableStatistics(dbName, tableName, Arrays.asList("col1", "col2", "col3")); + + Assert.assertEquals(now, cs.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbName, cs.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, cs.getStatsDesc().getTableName()); + Assert.assertTrue(cs.getStatsDesc().isIsTblLevel()); + + // There's no need to check every last field in each obj, as the objects aren't de/serialized + // in the cache. Just make sure we found the objects we expected. + Assert.assertEquals(2, cs.getStatsObjSize()); + for (ColumnStatisticsObj csobj : cs.getStatsObj()) { + if (csobj.getColName().equals("col1")) { + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, + csobj.getStatsData().getSetField()); + } else if (csobj.getColName().equals("col2")) { + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, + csobj.getStatsData().getSetField()); + } else { + Assert.fail("Unknown column"); + } + } + } + + @Test + public void tableSomeHit() throws IOException { + String dbName = "default"; + String tableName = "sometable"; + long now = System.currentTimeMillis(); + + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tableName); + desc.setLastAnalyzed(now); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setBooleanStats(new BooleanColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + obj = new ColumnStatisticsObj(); + obj.setColName("col2"); + obj.setColType("long"); + data = new ColumnStatisticsData(); + data.setLongStats(new LongColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + hrw.updateStatistics(dbName, tableName, null, null, cs); + + cs = new ColumnStatistics(); + desc = new ColumnStatisticsDesc(true, dbName, tableName); + desc.setLastAnalyzed(now); + cs.setStatsDesc(desc); + obj = new ColumnStatisticsObj(); + obj.setColName("col3"); + obj.setColType("double"); + data = new ColumnStatisticsData(); + data.setDoubleStats(new DoubleColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + Put put = new Put(HBaseUtils.buildKey(dbName, tableName)); + byte[] serialized = HBaseUtils.serializeStatsForOneColumn(cs, obj); + put.add(HBaseReadWrite.STATS_CF, "col3".getBytes(HBaseUtils.ENCODING), serialized); + puts[0] = put; + + // Now, ask for all 3 of these. We should hit the first two on the cache and the third from + // the get + cs = hrw.getTableStatistics(dbName, tableName, Arrays.asList("col1", "col2", "col3")); + + Assert.assertEquals(now, cs.getStatsDesc().getLastAnalyzed()); + Assert.assertEquals(dbName, cs.getStatsDesc().getDbName()); + Assert.assertEquals(tableName, cs.getStatsDesc().getTableName()); + Assert.assertTrue(cs.getStatsDesc().isIsTblLevel()); + + // There's no need to check every last field in each obj, as the objects aren't de/serialized + // in the cache. Just make sure we found the objects we expected. + Assert.assertEquals(3, cs.getStatsObjSize()); + for (ColumnStatisticsObj csobj : cs.getStatsObj()) { + if (csobj.getColName().equals("col1")) { + Assert.assertEquals(ColumnStatisticsData._Fields.BOOLEAN_STATS, + csobj.getStatsData().getSetField()); + } else if (csobj.getColName().equals("col2")) { + Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, + csobj.getStatsData().getSetField()); + } else if (csobj.getColName().equals("col3")) { + Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, + csobj.getStatsData().getSetField()); + } else { + Assert.fail("Unknown column"); + } + } + } + + @Ignore + public void tableTimeout() throws Exception { + String dbName = "default"; + String tableName = "mytable"; + long now = System.currentTimeMillis(); + + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tableName); + desc.setLastAnalyzed(now); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("boolean"); + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setBooleanStats(new BooleanColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + obj = new ColumnStatisticsObj(); + obj.setColName("col2"); + obj.setColType("long"); + data = new ColumnStatisticsData(); + data.setLongStats(new LongColumnStatsData()); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + hrw.updateStatistics(dbName, tableName, null, null, cs); + + StatsCache.getInstance(null).makeWayOld(); + + // Now, ask for all 3 of these. We should hit all on the cache. We'll know if we don't + // because we've mocked hbase and it will return null on the get. + cs = hrw.getTableStatistics(dbName, tableName, Arrays.asList("col1", "col2", "col3")); + + Assert.assertNull(cs); + + + + } + + // TODO test table timed out + // TODO test part all hit + // TODO test part all miss + // TODO test part some hit + // TODO test part timed out + // TODO test cleaning + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index ef6db3a..9d9c7da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -408,6 +408,12 @@ public int compile(String command, boolean resetTaskIds) { getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class); + // Flush the metastore cache. This assures that we don't pick up objects from a previous + // query running in this same thread. This has to be done after we get our semantic + // analyzer (this is when the connection to the metastore is made) but before we analyze, + // because at that point we need access to the objects. + Hive.get().getMSC().flushCache(); + // Do semantic analysis and plan generation if (saHooks != null) { HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 3fdcc53..a934bbc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2838,8 +2838,13 @@ public HiveMetaHook getHook( } } }; - return RetryingMetaStoreClient.getProxy(conf, hookLoader, - SessionHiveMetaStoreClient.class.getName()); + + if (conf.getBoolVar(ConfVars.METASTORE_FASTPATH)) { + return new SessionHiveMetaStoreClient(conf, hookLoader); + } else { + return RetryingMetaStoreClient.getProxy(conf, hookLoader, + SessionHiveMetaStoreClient.class.getName()); + } } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index f8007e1..8331a49 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.hbase.SharedStorageDescriptor; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; @@ -152,6 +153,10 @@ public Partition(Table tbl, Map partSpec, Path location) throws * Refactored into a method. */ public static StorageDescriptor cloneSd(Table tbl) throws HiveException { + if (tbl.getSd() instanceof SharedStorageDescriptor) { + return new SharedStorageDescriptor((SharedStorageDescriptor)tbl.getSd()); + } + // What is the point of this? Why not just use the copy constructor in StorageDescriptor? StorageDescriptor sd = new StorageDescriptor(); try { // replace with THRIFT-138 @@ -615,6 +620,7 @@ public boolean isStoredAsSubDirectories() { } public List getSkewedColNames() { + LOG.debug("sd is " + tPartition.getSd().getClass().getName()); return tPartition.getSd().getSkewedInfo().getSkewedColNames(); }