From c575b5aa07ab9c0702768f41a807c097c6b17548 Mon Sep 17 00:00:00 2001 From: Prabhu Joseph Date: Sun, 11 Aug 2019 19:32:41 +0530 Subject: [PATCH] YARN-9373. Configurable Pre Splits for Hbase tables created by HBaseTimelineSchemaCreator. --- .../storage/HBaseTimelineSchemaCreator.java | 34 +++++++++++++- .../storage/application/ApplicationTableRW.java | 8 ++-- .../storage/apptoflow/AppToFlowTableRW.java | 7 ++- .../common/TimelineHBaseSchemaConstants.java | 44 +++++++++++++----- .../storage/domain/DomainTableRW.java | 8 ++-- .../storage/entity/EntityTableRW.java | 8 ++-- .../subapplication/SubApplicationTableRW.java | 8 ++-- .../common/TestHBaseTimelineSchemaCreator.java | 52 ++++++++++++++++++++++ 8 files changed, 142 insertions(+), 27 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/test/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TestHBaseTimelineSchemaCreator.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineSchemaCreator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineSchemaCreator.java index 4263fac..1bfe995 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineSchemaCreator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineSchemaCreator.java @@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.server.timelineservice.storage.application.ApplicationTableRW; import org.apache.hadoop.yarn.server.timelineservice.storage.apptoflow.AppToFlowTableRW; import org.apache.hadoop.yarn.server.timelineservice.storage.common.HBaseTimelineStorageUtils; +import org.apache.hadoop.yarn.server.timelineservice.storage.common.TimelineHBaseSchemaConstants; import org.apache.hadoop.yarn.server.timelineservice.storage.entity.EntityTableRW; import org.apache.hadoop.yarn.server.timelineservice.storage.flow.FlowActivityTableRW; import org.apache.hadoop.yarn.server.timelineservice.storage.flow.FlowRunTableRW; @@ -73,6 +74,8 @@ public HBaseTimelineSchemaCreator() { private static final String ENTITY_TABLE_NAME_SHORT = "e"; private static final String HELP_SHORT = "h"; private static final String CREATE_TABLES_SHORT = "c"; + private static final String USERNAME_PRE_SPLITS = "up"; + private static final String CLUSTERID_PRE_SPLITS = "cp"; public void createTimelineSchema(String[] args) throws Exception { @@ -141,6 +144,21 @@ public void createTimelineSchema(String[] args) throws Exception { new SubApplicationTableRW().setMetricsTTL(subAppMetricsTTL, hbaseConf); } + // Grab the userName PreSplits + String userNameSplits = commandLine.getOptionValue(USERNAME_PRE_SPLITS); + if (StringUtils.isNotBlank(userNameSplits)) { + hbaseConf.set(TimelineHBaseSchemaConstants.USERNAME_SPLITS_CONF_NAME, + userNameSplits); + } + + // Grab the clusterId PreSplits + String clusterIdSplits = + commandLine.getOptionValue(CLUSTERID_PRE_SPLITS); + if (StringUtils.isNotBlank(clusterIdSplits)) { + hbaseConf.set(TimelineHBaseSchemaConstants.CLUSTERID_SPLITS_CONF_NAME, + clusterIdSplits); + } + // create all table schemas in hbase final boolean skipExisting = commandLine.hasOption( SKIP_EXISTING_TABLE_OPTION_SHORT); @@ -221,6 +239,16 @@ private static CommandLine parseArgs(String[] args) throws ParseException { o.setRequired(false); options.addOption(o); + o = new Option(USERNAME_PRE_SPLITS, "userNameSplits", + true, "PreSplits for tables with RowKey starts with UserName"); + o.setRequired(false); + options.addOption(o); + + o = new Option(CLUSTERID_PRE_SPLITS, "clusterIdSplits", + true, "PreSplits for tables with RowKey starts with clusterId"); + o.setRequired(false); + options.addOption(o); + CommandLineParser parser = new PosixParser(); CommandLine commandLine = null; try { @@ -259,7 +287,11 @@ private static void printUsage() { + " ]" + " TTL for metrics in the SubApplication table\n") .append("[-skipExistingTable] Whether to skip existing" + - " hbase tables\n"); + " hbase tables\n") + .append("[-userNameSplits ] PreSplits" + + " for Tables with RowKey starts with userName\n") + .append("[-clusterIdSplits ] PreSplits" + + " for Tables with RowKey starts with clusterId\n"); System.out.println(usage.toString()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/application/ApplicationTableRW.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/application/ApplicationTableRW.java index 808994e..cbbf9e5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/application/ApplicationTableRW.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/application/ApplicationTableRW.java @@ -31,6 +31,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.yarn.server.timelineservice.storage.common.TimelineHBaseSchemaConstants.CLUSTERID_SPLITS_CONF_NAME; + /** * Create, read and write to the Application Table. */ @@ -118,9 +120,9 @@ public void createTable(Admin admin, Configuration hbaseConf) applicationTableDescp.setRegionSplitPolicyClassName( "org.apache.hadoop.hbase.regionserver.KeyPrefixRegionSplitPolicy"); applicationTableDescp.setValue("KeyPrefixRegionSplitPolicy.prefix_length", - TimelineHBaseSchemaConstants.USERNAME_SPLIT_KEY_PREFIX_LENGTH); - admin.createTable(applicationTableDescp, - TimelineHBaseSchemaConstants.getUsernameSplits()); + TimelineHBaseSchemaConstants.SPLIT_KEY_PREFIX_LENGTH); + admin.createTable(applicationTableDescp, TimelineHBaseSchemaConstants. + getPreSplits(hbaseConf.get(CLUSTERID_SPLITS_CONF_NAME))); LOG.info("Status of table creation for " + table.getNameAsString() + "=" + admin.tableExists(table)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/apptoflow/AppToFlowTableRW.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/apptoflow/AppToFlowTableRW.java index 05c4e57..d79e1e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/apptoflow/AppToFlowTableRW.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/apptoflow/AppToFlowTableRW.java @@ -32,6 +32,9 @@ import java.io.IOException; +import static org.apache.hadoop.yarn.server.timelineservice.storage.common.TimelineHBaseSchemaConstants.USERNAME_SPLITS_CONF_NAME; + + /** * Create, read and write to the AppToFlow Table. */ @@ -78,8 +81,8 @@ public void createTable(Admin admin, Configuration hbaseConf) new HColumnDescriptor(AppToFlowColumnFamily.MAPPING.getBytes()); mappCF.setBloomFilterType(BloomType.ROWCOL); appToFlowTableDescp.addFamily(mappCF); - - admin.createTable(appToFlowTableDescp); + admin.createTable(appToFlowTableDescp, TimelineHBaseSchemaConstants. + getPreSplits(hbaseConf.get(USERNAME_SPLITS_CONF_NAME))); LOG.info("Status of table creation for " + table.getNameAsString() + "=" + admin.tableExists(table)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TimelineHBaseSchemaConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TimelineHBaseSchemaConstants.java index 8e6c259..1884c7e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TimelineHBaseSchemaConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TimelineHBaseSchemaConstants.java @@ -20,6 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.yarn.conf.YarnConfiguration; /** * contains the constants used in the context of schema accesses for @@ -33,12 +34,10 @@ private TimelineHBaseSchemaConstants() { } /** - * Used to create a pre-split for tables starting with a username in the - * prefix. TODO: this may have to become a config variable (string with - * separators) so that different installations can presplit based on their own - * commonly occurring names. + * Used to create a pre-split for tables starting with a username + * / clusterId in the prefix. */ - private final static byte[][] USERNAME_SPLITS = { + private final static byte[][] DEFAULT_PRE_SPLITS = { Bytes.toBytes("a"), Bytes.toBytes("ad"), Bytes.toBytes("an"), Bytes.toBytes("b"), Bytes.toBytes("ca"), Bytes.toBytes("cl"), Bytes.toBytes("d"), Bytes.toBytes("e"), Bytes.toBytes("f"), @@ -54,18 +53,39 @@ private TimelineHBaseSchemaConstants() { /** * The length at which keys auto-split. */ - public static final String USERNAME_SPLIT_KEY_PREFIX_LENGTH = "4"; + public static final String SPLIT_KEY_PREFIX_LENGTH = "4"; + + public static final String USERNAME_SPLITS_CONF_NAME = + YarnConfiguration.TIMELINE_SERVICE_PREFIX + ".table.username.presplits"; + + public static final String CLUSTERID_SPLITS_CONF_NAME = + YarnConfiguration.TIMELINE_SERVICE_PREFIX + ".table.clusterid.presplits"; + + private static final String SPLITS_DELIMITER = ","; /** - * @return splits for splits where a user is a prefix. + * @return splits for splits where a user / clusterId is a prefix. */ - public static byte[][] getUsernameSplits() { - byte[][] kloon = USERNAME_SPLITS.clone(); + public static byte[][] getDefaultPreSplits() { + byte[][] kloon = DEFAULT_PRE_SPLITS.clone(); // Deep copy. - for (int row = 0; row < USERNAME_SPLITS.length; row++) { - kloon[row] = Bytes.copy(USERNAME_SPLITS[row]); + for (int row = 0; row < DEFAULT_PRE_SPLITS.length; row++) { + kloon[row] = Bytes.copy(DEFAULT_PRE_SPLITS[row]); } return kloon; } -} \ No newline at end of file + public static byte[][] getPreSplits(String splits) { + if (splits != null && splits.length() > 0) { + String[] splitPoints = splits.split(SPLITS_DELIMITER); + byte[][] preSplits = new byte[splitPoints.length][]; + int index = 0; + for (String split : splitPoints) { + preSplits[index++] = Bytes.toBytes(split); + } + return preSplits; + } + return getDefaultPreSplits(); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/domain/DomainTableRW.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/domain/DomainTableRW.java index d2ae18e..cff8765 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/domain/DomainTableRW.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/domain/DomainTableRW.java @@ -32,6 +32,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.yarn.server.timelineservice.storage.common.TimelineHBaseSchemaConstants.CLUSTERID_SPLITS_CONF_NAME; + /** * Create, read and write to the domain Table. */ @@ -83,9 +85,9 @@ public void createTable(Admin admin, Configuration hbaseConf) .setRegionSplitPolicyClassName( "org.apache.hadoop.hbase.regionserver.KeyPrefixRegionSplitPolicy"); domainTableDescp.setValue("KeyPrefixRegionSplitPolicy.prefix_length", - TimelineHBaseSchemaConstants.USERNAME_SPLIT_KEY_PREFIX_LENGTH); - admin.createTable(domainTableDescp, - TimelineHBaseSchemaConstants.getUsernameSplits()); + TimelineHBaseSchemaConstants.SPLIT_KEY_PREFIX_LENGTH); + admin.createTable(domainTableDescp, TimelineHBaseSchemaConstants. + getPreSplits(hbaseConf.get(CLUSTERID_SPLITS_CONF_NAME))); LOG.info("Status of table creation for " + table.getNameAsString() + "=" + admin.tableExists(table)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/entity/EntityTableRW.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/entity/EntityTableRW.java index 111ae71..d3c678d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/entity/EntityTableRW.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/entity/EntityTableRW.java @@ -31,6 +31,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.yarn.server.timelineservice.storage.common.TimelineHBaseSchemaConstants.USERNAME_SPLITS_CONF_NAME; + /** * Create, read and write to the Entity Table. */ @@ -117,9 +119,9 @@ public void createTable(Admin admin, Configuration hbaseConf) entityTableDescp.setRegionSplitPolicyClassName( "org.apache.hadoop.hbase.regionserver.KeyPrefixRegionSplitPolicy"); entityTableDescp.setValue("KeyPrefixRegionSplitPolicy.prefix_length", - TimelineHBaseSchemaConstants.USERNAME_SPLIT_KEY_PREFIX_LENGTH); - admin.createTable(entityTableDescp, - TimelineHBaseSchemaConstants.getUsernameSplits()); + TimelineHBaseSchemaConstants.SPLIT_KEY_PREFIX_LENGTH); + admin.createTable(entityTableDescp, TimelineHBaseSchemaConstants. + getPreSplits(hbaseConf.get(USERNAME_SPLITS_CONF_NAME))); LOG.info("Status of table creation for " + table.getNameAsString() + "=" + admin.tableExists(table)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/subapplication/SubApplicationTableRW.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/subapplication/SubApplicationTableRW.java index 256b24b..13fd839 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/subapplication/SubApplicationTableRW.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/subapplication/SubApplicationTableRW.java @@ -31,6 +31,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.yarn.server.timelineservice.storage.common.TimelineHBaseSchemaConstants.USERNAME_SPLITS_CONF_NAME; + /** * Create, read and write to the SubApplication table. */ @@ -118,9 +120,9 @@ public void createTable(Admin admin, Configuration hbaseConf) subAppTableDescp.setRegionSplitPolicyClassName( "org.apache.hadoop.hbase.regionserver.KeyPrefixRegionSplitPolicy"); subAppTableDescp.setValue("KeyPrefixRegionSplitPolicy.prefix_length", - TimelineHBaseSchemaConstants.USERNAME_SPLIT_KEY_PREFIX_LENGTH); - admin.createTable(subAppTableDescp, - TimelineHBaseSchemaConstants.getUsernameSplits()); + TimelineHBaseSchemaConstants.SPLIT_KEY_PREFIX_LENGTH); + admin.createTable(subAppTableDescp, TimelineHBaseSchemaConstants. + getPreSplits(hbaseConf.get(USERNAME_SPLITS_CONF_NAME))); LOG.info("Status of table creation for " + table.getNameAsString() + "=" + admin.tableExists(table)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/test/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TestHBaseTimelineSchemaCreator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/test/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TestHBaseTimelineSchemaCreator.java new file mode 100644 index 0000000..c04c2fb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/test/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/TestHBaseTimelineSchemaCreator.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.timelineservice.storage.common; + +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.Test; + +/** + * Test cases for {@link HBaseTimelineSchemaCreator}. + */ +public class TestHBaseTimelineSchemaCreator { + + @Test + public void testHbasePreSplits() throws Exception { + byte[][] expectedPreSplits = {Bytes.toBytes("1"), Bytes.toBytes("10"), + Bytes.toBytes("A"), Bytes.toBytes("Z"), Bytes.toBytes("a"), + Bytes.toBytes("z")}; + String splits = "1,10,A,Z,a,z"; + byte[][] preSplits = TimelineHBaseSchemaConstants.getPreSplits(splits); + Assert.assertEquals(expectedPreSplits.length, preSplits.length); + for (int i=0; i