From 5d618f77bd8a2ab514a34ae0176c1ebd82ed6dca Mon Sep 17 00:00:00 2001 From: sicheng Date: Wed, 8 Jan 2020 18:51:49 +0800 Subject: [PATCH] HBASE-23656 [MERGETOOL] HBASE Support Merge region by pattern --- bin/hbase | 3 + bin/hbase.cmd | 1 + .../hadoop/hbase/util/OnlineMergeTool.java | 510 ++++++++++++++++++ .../hbase/util/RegionSizeCalculator.java | 127 +++++ .../hbase/util/TestOnlineMergeTool.java | 236 ++++++++ 5 files changed, 877 insertions(+) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestOnlineMergeTool.java diff --git a/bin/hbase b/bin/hbase index 81778988f2..65c7a5fae8 100755 --- a/bin/hbase +++ b/bin/hbase @@ -122,6 +122,7 @@ if [ $# = 0 ]; then echo " pre-upgrade Run Pre-Upgrade validator tool" echo " hbtop Run HBTop tool" echo " CLASSNAME Run the class named CLASSNAME" + echo " onlinemerge Run the merge tool" exit 1 fi @@ -635,6 +636,8 @@ elif [ "$COMMAND" = "mapredcp" ] ; then elif [ "$COMMAND" = "classpath" ] ; then echo "$CLASSPATH" exit 0 +elif [ "$COMMAND" = "onlinemerge" ] ; then + CLASS='org.apache.hadoop.hbase.util.OnlineMergeTool' elif [ "$COMMAND" = "pe" ] ; then CLASS='org.apache.hadoop.hbase.PerformanceEvaluation' HBASE_OPTS="$HBASE_OPTS $HBASE_PE_OPTS" diff --git a/bin/hbase.cmd b/bin/hbase.cmd index fbeb1f8290..8a4f7eea57 100644 --- a/bin/hbase.cmd +++ b/bin/hbase.cmd @@ -466,4 +466,5 @@ goto :eof echo mapredcp Dump CLASSPATH entries required by mapreduce echo version Print the version echo CLASSNAME Run the class named CLASSNAME + echo onlinemerge Run the merge tool goto :eof diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java new file mode 100644 index 0000000000..d2447f93c7 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/OnlineMergeTool.java @@ -0,0 +1,510 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.ClusterMetrics; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionLocator; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility that can merge any two regions in the same table: adjacent, + * overlapping or disjoint. It can also merge every regions, two by two. + */ +@InterfaceAudience.Private public class OnlineMergeTool extends Configured implements Tool { + static final Logger LOG = LoggerFactory.getLogger(OnlineMergeTool.class); + private final int COMPACTPAUSETIME = 180 * 1000; + private final int DEFAULTMERGEPAUSETIME = 120 * 1000; + private final String COMPACTIONATTRIBUTE = "MAJOR"; + private final long GB = 1024L * 1024L * 1024L; + private final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + private final HBaseConfiguration conf; + private volatile boolean isMetaTable; + private volatile Connection connection; + private volatile Admin admin; + // Name of table + private String tableName = null; + // Name of region 1 + private String startRegion = null; + // Name of region 2 + private String stopRegion = null; + // Name of maxRegionSize + private Long maxRegionSize = 0L; + // Name of maxRegionCreateTime + private String maxRegionCreateTime = null; + // Name of numMaxMergePlans + private String numMaxMergePlans = null; + // Name of targetRegionCount + private Long targetRegionCount = 0L; + /** + * print Execution Plan information + */ + private boolean printExecutionPlan = true; + /** + * config merge pause time + */ + private int mergePauseTime = 0; + + /** + * default constructor + */ + public OnlineMergeTool() throws IOException { + this(new HBaseConfiguration()); + } + + /** + * @param conf The current configuration. + * @throws IOException If IO problem encountered + */ + public OnlineMergeTool(HBaseConfiguration conf) throws IOException { + super(conf); + this.conf = conf; + this.conf.setInt("hbase.client.retries.number", 3); + this.conf.setInt("hbase.client.pause", 1000); + this.connection = ConnectionFactory.createConnection(this.conf); + this.admin = connection.getAdmin(); + } + + /** + * Main program + * + * @param args The command line parameters. + */ + public static void main(String[] args) { + int status = 0; + try { + status = ToolRunner.run(new OnlineMergeTool(), args); + } catch (Exception e) { + LOG.error("exiting due to error", e); + status = -1; + } + System.exit(status); + } + + @Override public int run(String[] args) throws Exception { + + if (!doCommandLine(args)) { + return -1; + } + + isMetaTable = + Bytes.compareTo(Bytes.toBytes(tableName), TableName.META_TABLE_NAME.getName()) == 0; + // Verify file system is up. + FileSystem fs = FileSystem.get(this.conf); // get DFS handle + LOG.info("Verifying that file system is available..."); + try { + FSUtils.checkFileSystemAvailable(fs); + } catch (IOException e) { + LOG.error("File system is not available", e); + return -1; + } + + // Verify HBase is up + LOG.info("Verifying that HBase is running..."); + try { + admin.getClusterMetrics(EnumSet.of(ClusterMetrics.Option.HBASE_VERSION)); + } catch (MasterNotRunningException e) { + LOG.error("HBase cluster must be on-line."); + return -1; + } + + List hRegionInfoList = admin.getRegions(TableName.valueOf(tableName)); + try { + if (isMetaTable) { + throw new Exception("Can't merge meta tables online"); + } else if (hRegionInfoList.size() <= targetRegionCount) { + throw new Exception("Can't merge tables because regionCount=" + hRegionInfoList.size() + + " less than targetRegionCount=" + targetRegionCount); + } else if (printExecutionPlan) { + executionPlan(); + } else { + mergeRegions(); + } + return 0; + } catch (Exception e) { + LOG.error("Merge failed", e); + return -1; + } + } + + private boolean doCommandLine(final String[] args) { + if (args.length < 1) { + printUsage(null); + return false; + } + try { + for (int i = 0; i < args.length; i++) { + String cmd = args[i]; + if (cmd.equals("-h") || cmd.startsWith("--h")) { + printUsage(null); + return false; + } + + final String tableNameKey = "--tableName="; + if (cmd.startsWith(tableNameKey)) { + tableName = cmd.substring(tableNameKey.length()); + continue; + } + + final String startRegionKey = "--startRegion="; + if (cmd.startsWith(startRegionKey)) { + startRegion = cmd.substring(startRegionKey.length()); + continue; + } + + final String stopRegionKey = "--stopRegion="; + if (cmd.startsWith(stopRegionKey)) { + stopRegion = cmd.substring(stopRegionKey.length()); + continue; + } + + final String maxRegionSizeKey = "--maxRegionSize="; + if (cmd.startsWith(maxRegionSizeKey)) { + maxRegionSize = Long.parseLong(cmd.substring(maxRegionSizeKey.length())) * GB; + continue; + } + + final String maxRegionCreateTimeKey = "--maxRegionCreateTime="; + if (cmd.startsWith(maxRegionCreateTimeKey)) { + maxRegionCreateTime = cmd.substring(maxRegionCreateTimeKey.length()); + continue; + } + + final String numMaxMergePlansKey = "--numMaxMergePlans="; + if (cmd.startsWith(numMaxMergePlansKey)) { + numMaxMergePlans = cmd.substring(numMaxMergePlansKey.length()); + continue; + } + + final String targetRegionCountKey = "--targetRegionCount="; + if (cmd.startsWith(targetRegionCountKey)) { + targetRegionCount = Long.parseLong(cmd.substring(targetRegionCountKey.length())); + continue; + } + + final String printExecutionPlanKey = "--printExecutionPlan="; + if (cmd.startsWith(printExecutionPlanKey)) { + printExecutionPlan = Boolean.parseBoolean(cmd.substring(printExecutionPlanKey.length())); + continue; + } + + final String mergePauseTimekey = "--configMergePauseTime="; + if (cmd.startsWith(mergePauseTimekey)) { + mergePauseTime = Integer.parseInt(cmd.substring(mergePauseTimekey.length())); + continue; + } + } + + if (null == tableName || tableName.isEmpty()) { + printUsage("table name must be not null"); + return false; + } + + if (null == maxRegionSize || tableName.isEmpty()) { + printUsage("table name must be not null"); + return false; + } + + if (startRegion != null && stopRegion != null) { + if (notInTable(Bytes.toBytes(tableName), Bytes.toBytes(startRegion)) || notInTable( + Bytes.toBytes(tableName), Bytes.toBytes(stopRegion))) { + LOG.error( + "Can't merge region not in table or region is null startRegion is " + startRegion + + " stopRegion is " + stopRegion); + return false; + } else if (startRegion.equals(stopRegion)) { + LOG.error("Can't merge a region with itself"); + return false; + } + } + if (startRegion != null) { + if (null == stopRegion) { + printUsage( + "The startRegion and the stopRegion must be used in pairs stopRegion=" + stopRegion); + return false; + } + } + if (stopRegion != null) { + if (null == startRegion) { + printUsage( + "The startRegion and the stopRegion must be used in pairs startRegion=" + startRegion); + return false; + } + } + + } catch (Exception e) { + e.printStackTrace(); + printUsage("Can't start because " + e.getMessage()); + return false; + } + return true; + } + + private List> executionPlan() throws IOException, ParseException { + List hris = getListRegionInfo(tableName, startRegion, stopRegion); + if (hris.size() < 2) { + throw new IOException("The table doesn't have 2 or more regions region count=" + hris.size()); + } + RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(tableName)); + RegionSizeCalculator regionSizeCalculator = new RegionSizeCalculator(regionLocator, admin); + List> mergePlans = new ArrayList>(); + for (int i = 0; i < hris.size() - 1; i += 2) { + RegionInfo regionInfo1 = hris.get(i); + RegionInfo regionInfo2 = hris.get(i + 1); + if (regionInfo1.isOffline() || regionInfo1.isSplit() || regionInfo2.isOffline() || regionInfo2 + .isSplit()) { + LOG.info("Skip Region split or offline region1=" + regionInfo1.getRegionNameAsString() + + " region2=" + regionInfo2.getRegionNameAsString()); + continue; + } + if (null != maxRegionCreateTime) { + long time2Timestamp = DATE_FORMAT.parse(maxRegionCreateTime).getTime(); + if (regionInfo1.getRegionId() > time2Timestamp + || regionInfo2.getRegionId() > time2Timestamp) { + StringBuffer mesg = new StringBuffer(); + mesg.append("Skip Region timestamp region1="); + mesg.append(Bytes.toString(regionInfo1.getEncodedNameAsBytes())); + mesg.append(" region1Timestamp=").append(regionInfo1.getRegionId()); + mesg.append(" > maxRegionCreateTime=").append(time2Timestamp); + mesg.append(" or region2=").append(Bytes.toString(regionInfo2.getEncodedNameAsBytes())); + mesg.append(" region1Timestamp=").append(regionInfo2.getRegionId()); + mesg.append(" > maxRegionCreateTime=").append(time2Timestamp); + LOG.info(mesg.toString()); + continue; + } + } + long regionSize = regionSizeCalculator.getRegionSize(regionInfo1.getRegionName()); + long regionSize_next = regionSizeCalculator.getRegionSize(regionInfo2.getRegionName()); + if (regionSize > maxRegionSize || regionSize_next > maxRegionSize) { + StringBuilder mesg = new StringBuilder(); + mesg.append("Skip Region size region1="); + mesg.append(Bytes.toString(regionInfo1.getEncodedNameAsBytes())); + mesg.append(" region1Size=").append(regionSize); + mesg.append(" > maxRegionSize=").append(maxRegionSize); + mesg.append(" or region2=").append(Bytes.toString(regionInfo2.getEncodedNameAsBytes())); + mesg.append(" region2Size=").append(regionSize_next); + mesg.append(" > maxRegionSize=").append(maxRegionSize); + + LOG.info(mesg.toString()); + continue; + } + Pair pair = new Pair(); + pair.setFirst(regionInfo1.getEncodedNameAsBytes()); + pair.setSecond(regionInfo2.getEncodedNameAsBytes()); + mergePlans.add(pair); + StringBuilder mesg = new StringBuilder(); + mesg.append("Print merge plans region1="); + mesg.append(Bytes.toString(regionInfo1.getEncodedNameAsBytes())); + mesg.append(" region1=").append(regionSize / GB); + mesg.append("G region2="); + mesg.append(Bytes.toString(regionInfo2.getEncodedNameAsBytes())); + mesg.append(" region2=").append(regionSize_next / GB).append("G"); + LOG.info(mesg.toString()); + } + return mergePlans; + } + + /* + * Merges two regions from a user table. + */ + private void mergeRegions() throws IOException, InterruptedException, ParseException { + List> mergePlans = executionPlan(); + if (mergePlans.size() < 1) { + printCompletedMesg(); + return; + } + if (null == numMaxMergePlans) { + for (Pair region : mergePlans) { + admin.mergeRegionsAsync(region.getFirst(), region.getSecond(), false); + LOG.info("Merging regions " + Bytes.toString(region.getFirst()) + " and " + Bytes + .toString(region.getSecond()) + " in table " + tableName); + } + Thread.sleep(mergePauseTime > 0 ? mergePauseTime : DEFAULTMERGEPAUSETIME); + if (maxRegionSize != 0) { + admin.compact(TableName.valueOf(tableName)); + LOG.info("Table=" + tableName + " is runing compact"); + runCompaction(tableName); + } + } else { + long numMaxMergePlans2Long = Long.parseLong(numMaxMergePlans); + for (int i = 0; i < mergePlans.size(); i++) { + admin.mergeRegionsAsync(mergePlans.get(i).getFirst(), mergePlans.get(i).getSecond(), false); + LOG.info("Merging regions " + Bytes.toString(mergePlans.get(i).getFirst()) + " and " + Bytes + .toString(mergePlans.get(i).getSecond()) + " in table " + tableName); + if (i + 1 % numMaxMergePlans2Long == 0) { + Thread.sleep(mergePauseTime > 0 ? mergePauseTime : DEFAULTMERGEPAUSETIME); + if (maxRegionSize != 0) { + admin.compact(TableName.valueOf(tableName)); + LOG.info("Table=" + tableName + " is runing compact"); + runCompaction(tableName); + } + } + } + Thread.sleep(mergePauseTime > 0 ? mergePauseTime : DEFAULTMERGEPAUSETIME); + if (maxRegionSize != 0) { + admin.compact(TableName.valueOf(tableName)); + LOG.info("Table=" + tableName + " is runing compact"); + runCompaction(tableName); + } + } + List hRegionInfoList = admin.getRegions(TableName.valueOf(tableName)); + if (hRegionInfoList.size() <= targetRegionCount + || getListRegionInfo(tableName, startRegion, stopRegion).size() < 2) { + printCompletedMesg(); + } else { + mergeRegions(); + } + } + + /** + * print merge completed Mesg + */ + private void printCompletedMesg() { + StringBuilder mesg = new StringBuilder(); + mesg.append("Merge completed table="); + mesg.append(tableName); + mesg.append(" startRegion="); + mesg.append(startRegion); + mesg.append(" stopRegion="); + mesg.append(stopRegion); + mesg.append(" maxRegionSize="); + mesg.append(maxRegionSize / GB).append("G"); + mesg.append(" maxRegionCreateTime=").append(maxRegionCreateTime); + mesg.append(" numMaxMergePlans="); + mesg.append(numMaxMergePlans); + mesg.append(" targetRegionCount="); + mesg.append(targetRegionCount); + LOG.info(mesg.toString()); + } + + /** + * Get the list of a HRIs in a table + * + * @return list of hris + * @throws IOException If IO problem encountered + */ + List getListRegionInfo(String tableName, String startRegion, String stopRegion) + throws IOException { + boolean isAdd = false; + List hris = new ArrayList(); + List tableRegions = this.admin.getRegions(TableName.valueOf(tableName)); + for (RegionInfo hri : tableRegions) { + if (null == startRegion && null == stopRegion) { + hris.add(hri); + LOG.info("Add legitimate range resgion=" + hri.getRegionNameAsString()); + + } else if (null != startRegion && null != stopRegion) { + if (hri.getRegionNameAsString().equals(startRegion)) { + LOG.info("Open interval startRegion=" + hri.getRegionNameAsString()); + isAdd = true; + continue; + } + if (hri.getRegionNameAsString().equals(stopRegion)) { + LOG.info("Open interval stopRegion=" + hri.getRegionNameAsString()); + isAdd = false; + break; + } + if (isAdd) { + hris.add(hri); + LOG.info("Add legitimate range resgion=" + hri.getRegionNameAsString()); + } + } + } + return hris; + } + + /** + * Waiting for compaction complete + * + * @param tableName table name + * @throws IOException If IO problem encountered + * @throws InterruptedException If Interrupted problem encountered + */ + private void runCompaction(String tableName) throws IOException, InterruptedException { + while (true) { + long startTime = System.currentTimeMillis(); + String compactionState = + this.admin.getCompactionState(TableName.valueOf(tableName)).toString(); + if (!COMPACTIONATTRIBUTE.equals(compactionState)) { + LOG.info("Table=" + tableName + " compationState=" + compactionState + " compact complete"); + break; + } + Thread.sleep(COMPACTPAUSETIME); + long waitTime = (System.currentTimeMillis() - startTime) / 1000; + LOG.info("Table=" + tableName + " compationState=" + compactionState + " the waiting time " + + waitTime + "seconds"); + } + } + + private boolean notInTable(final byte[] tn, final byte[] rn) { + if (WritableComparator.compareBytes(tn, 0, tn.length, rn, 0, tn.length) != 0) { + LOG.error("Region " + Bytes.toString(rn) + " does not belong to table " + Bytes.toString(tn)); + return true; + } + return false; + } + + /* + * @param errorMsg Error message. Can be null. + */ + private void printUsage(final String errorMsg) { + if (errorMsg != null && errorMsg.length() > 0) { + System.err.println("ERROR: " + errorMsg); + } + System.err.println("Usage: bin/hbase onlinemerge [--tableName=] " + + "[--startRegion=] [--stopRegion=] [--maxRegionSize=] " + + "[--maxRegionCreateTime=] [--numMaxMergePlans=] " + + "[--targetRegionCount=] [--printExecutionPlan=] [--configMergePauseTime=]\n"); + System.err.println("Options:"); + System.err.println("--h or --h print help"); + System.err.println("--tableName table name must be not null"); + System.err.println("--startRegion start region"); + System.err.println("--stopRegion stop region"); + System.err.println("--maxRegionSize max region size Unit GB"); + System.err.println("--maxRegionCreateTime max Region Create Time yyyy/MM/dd HH:mm:ss"); + System.err.println("--numMaxMergePlans num MaxMerge Plans"); + System.err.println("--targetRegionCount target Region Count"); + System.err.println("--configMergePauseTime config Merge Pause Time In milliseconds"); + System.err.println("--printExecutionPlan Value default is true print execution plans " + + "false is execution merge\n"); + System.err.println("Examples:"); + System.err.println("bin/hbase onlinemerge --tableName=test:test1 " + + "--startRegion=test:test1,,1576835912332.01d0d6c2b41e204104524d9aec6074fb. " + + "--stopRegion=test:test1,bbbbbbbb,1573044786980.0c9b5bd93f3b19eb9bd1a1011ddff66f. " + + "--maxRegionSize=0 --maxRegionCreateTime=yyyy/MM/dd HH:mm:ss " + + "--numMaxMergePlans=2 --targetRegionCount=4 --printExecutionPlan=false"); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java new file mode 100644 index 0000000000..250164e1a1 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSizeCalculator.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.RegionMetrics; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.Size; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.RegionLocator; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; + +/** + * Computes size of each region for given table and given column families. + * The value is used by MapReduce for better scheduling. + * */ +@InterfaceAudience.Private +public class RegionSizeCalculator { + + private static final Logger LOG = LoggerFactory.getLogger(RegionSizeCalculator.class); + + /** + * Maps each region to its size in bytes. + * */ + private final Map sizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); + + static final String ENABLE_REGIONSIZECALCULATOR = "hbase.regionsizecalculator.enable"; + private static final long MEGABYTE = 1024L * 1024L; + + /** + * Computes size of each region for table and given column families. + * */ + public RegionSizeCalculator(RegionLocator regionLocator, Admin admin) throws IOException { + init(regionLocator, admin); + } + + private void init(RegionLocator regionLocator, Admin admin) + throws IOException { + if (!enabled(admin.getConfiguration())) { + LOG.info("Region size calculation disabled."); + return; + } + + if (regionLocator.getName().isSystemTable()) { + LOG.info("Region size calculation disabled for system tables."); + return; + } + + LOG.info("Calculating region sizes for table \"" + regionLocator.getName() + "\"."); + + // Get the servers which host regions of the table + Set tableServers = getRegionServersOfTable(regionLocator); + + for (ServerName tableServerName : tableServers) { + for (RegionMetrics regionLoad : admin.getRegionMetrics( + tableServerName,regionLocator.getName())) { + + byte[] regionId = regionLoad.getRegionName(); + long regionSizeBytes + = ((long) regionLoad.getStoreFileSize().get(Size.Unit.MEGABYTE)) * MEGABYTE; + + sizeMap.put(regionId, regionSizeBytes); + + if (LOG.isDebugEnabled()) { + LOG.debug("Region " + regionLoad.getNameAsString() + " has size " + regionSizeBytes); + } + } + } + LOG.debug("Region sizes calculated"); + } + + private Set getRegionServersOfTable(RegionLocator regionLocator) + throws IOException { + + Set tableServers = Sets.newHashSet(); + for (HRegionLocation regionLocation : regionLocator.getAllRegionLocations()) { + tableServers.add(regionLocation.getServerName()); + } + return tableServers; + } + + boolean enabled(Configuration configuration) { + return configuration.getBoolean(ENABLE_REGIONSIZECALCULATOR, true); + } + + /** + * Returns size of given region in bytes. Returns 0 if region was not found. + * */ + public long getRegionSize(byte[] regionId) { + Long size = sizeMap.get(regionId); + if (size == null) { + LOG.debug("Unknown region:" + Arrays.toString(regionId)); + return 0; + } else { + return size; + } + } + + public Map getRegionSizeMap() { + return Collections.unmodifiableMap(sizeMap); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestOnlineMergeTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestOnlineMergeTool.java new file mode 100644 index 0000000000..3a554d42c5 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestOnlineMergeTool.java @@ -0,0 +1,236 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Tests merging a normal table's regions + */ +@Category({ MiscTests.class, MediumTests.class }) public class TestOnlineMergeTool { + + @ClassRule public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestOnlineMergeTool.class); + + static final Logger LOG = LoggerFactory.getLogger(TestOnlineMergeTool.class); + private final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private final byte[] COLUMN_NAME = Bytes.toBytes("f1"); + private static final byte[] VALUE; + private Connection connection; + private byte[][] splitKeys; + + static { + // We will use the same value for the rows as that is not really important here + String partialValue = String.valueOf(System.currentTimeMillis()); + StringBuilder val = new StringBuilder(); + while (val.length() < 100) { + val.append(partialValue); + } + VALUE = Bytes.toBytes(val.toString()); + } + + @Before public void beforeSetUp() throws Exception { + splitKeys = + new byte[][] { Bytes.toBytes("a"), Bytes.toBytes("c"), Bytes.toBytes("b"), Bytes.toBytes("e"), + Bytes.toBytes("f"), Bytes.toBytes("h"), Bytes.toBytes("i") }; + + // Set maximum regionsize down. + UTIL.getConfiguration().setLong(HConstants.HREGION_MAX_FILESIZE, 64L * 1024L * 1024L); + // Make it so we don't split. + UTIL.getConfiguration().setInt("hbase.regionserver.regionSplitLimit", 0); + // Startup hdfs. Its in here we'll be putting our manually made regions. + UTIL.startMiniDFSCluster(1); + // Create hdfs hbase rootdir. + Path rootdir = UTIL.createRootDir(); + FileSystem fs = FileSystem.get(UTIL.getConfiguration()); + if (fs.exists(rootdir)) { + if (fs.delete(rootdir, true)) { + LOG.info("Cleaned up existing " + rootdir); + } + } + // Now create the root and meta regions and insert the data regions + LOG.info("Starting mini zk cluster"); + UTIL.startMiniZKCluster(); + LOG.info("Starting mini hbase cluster"); + UTIL.startMiniHBaseCluster(1, 1); + Configuration c = new Configuration(UTIL.getConfiguration()); + connection = ConnectionFactory.createConnection(c); + } + + @After public void afterSetUp() throws Exception { + UTIL.shutdownMiniCluster(); + LOG.info("After cluster shutdown"); + } + + /** + * test merge help + * + * @throws ClassNotFoundException Class not found exception + * @throws NoSuchMethodException exception + * @throws IllegalAccessException exception + * @throws InvocationTargetException exception + * @throws InstantiationException exception + */ + + @Test public void testMergeHelp() + throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, + InvocationTargetException, InstantiationException { + String[] args = new String[] { "-h" }; + HBaseConfiguration hBaseConfiguration = new HBaseConfiguration(UTIL.getConfiguration()); + Class aClass1 = Class.forName("org.apache.hadoop.hbase.util.OnlineMergeTool"); + Constructor constructor = aClass1.getConstructor(HBaseConfiguration.class); + OnlineMergeTool onlineMergeTool = (OnlineMergeTool) constructor.newInstance(hBaseConfiguration); + try { + onlineMergeTool.run(args); + Assert.assertTrue(true); + } catch (Exception e) { + Assert.assertFalse(false); + } + } + + /** + * test merge size based region + * + * @throws Exception exception + */ + @Test(timeout = 500000) public void testMergeSizeBasedRegion() throws Exception { + final String tableName = "test"; + // Table we are manually creating offline. + HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(Bytes.toBytes(tableName))); + desc.addFamily(new HColumnDescriptor(COLUMN_NAME)); + Admin admin = connection.getAdmin(); + admin.createTable(desc, splitKeys); + String[] args = + new String[] { "--tableName=" + tableName, "--maxRegionSize=0", "--targetRegionCount=2", + "--printExecutionPlan=false", "--configMergePauseTime=10000" }; + HBaseConfiguration hBaseConfiguration = new HBaseConfiguration(UTIL.getConfiguration()); + Class aClass1 = Class.forName("org.apache.hadoop.hbase.util.OnlineMergeTool"); + Constructor constructor = aClass1.getConstructor(HBaseConfiguration.class); + OnlineMergeTool onlineMergeTool = (OnlineMergeTool) constructor.newInstance(hBaseConfiguration); + onlineMergeTool.run(args); + List merge2regionName = getListRegionName(admin, tableName); + Assert.assertEquals(Integer.parseInt(args[2].substring("--targetRegionCount=".length())), + merge2regionName.size()); + } + + /** + * test merge rang region + * + * @throws Exception exception + */ + @Test(timeout = 500000) public void testMergeRangRegion() throws Exception { + final String tableName = "testRangRegion"; + // Table we are manually creating offline. + HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(Bytes.toBytes(tableName))); + desc.addFamily(new HColumnDescriptor(COLUMN_NAME)); + Admin admin = connection.getAdmin(); + admin.createTable(desc, splitKeys); + String[] args = new String[7]; + args[0] = "--tableName=" + tableName; + List regionName = getListRegionName(admin, tableName); + for (int i = 0; i < regionName.size(); i++) { + if (0 == i) { + args[1] = "--startRegion=" + regionName.get(i); + } + if (3 == i) { + args[2] = "--stopRegion=" + regionName.get(i); + } + } + args[3] = "--maxRegionSize=" + 0; + args[4] = "--targetRegionCount=" + 2; + args[5] = "--printExecutionPlan=false"; + args[6] = "--configMergePauseTime=10000"; + + HBaseConfiguration hBaseConfiguration = new HBaseConfiguration(UTIL.getConfiguration()); + Class aClass1 = Class.forName("org.apache.hadoop.hbase.util.OnlineMergeTool"); + Constructor constructor = aClass1.getConstructor(HBaseConfiguration.class); + OnlineMergeTool onlineMergeTool = (OnlineMergeTool) constructor.newInstance(hBaseConfiguration); + onlineMergeTool.run(args); + List merge2regionName = getListRegionName(admin, tableName); + Assert.assertEquals(regionName.size() - 1, merge2regionName.size()); + } + + /** + * test merge time Based Region + * + * @throws Exception exception + */ + @Test(timeout = 500000) public void testMergeTimeBasedRegion() throws Exception { + final String tableName = "testTimeBasedRegion"; + // Table we are manually creating offline. + HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(Bytes.toBytes(tableName))); + desc.addFamily(new HColumnDescriptor(COLUMN_NAME)); + Admin admin = connection.getAdmin(); + admin.createTable(desc, splitKeys); + String[] args = new String[6]; + SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); + String format = DATE_FORMAT.format(System.currentTimeMillis() + 30 * 1000); + args[0] = "--tableName=" + tableName; + args[1] = "--maxRegionSize=" + 0; + args[2] = "--maxRegionCreateTime=" + format; + args[3] = "--targetRegionCount=" + 2; + args[4] = "--printExecutionPlan=false"; + args[5] = "--configMergePauseTime=10000"; + HBaseConfiguration hBaseConfiguration = new HBaseConfiguration(UTIL.getConfiguration()); + Class aClass1 = Class.forName("org.apache.hadoop.hbase.util.OnlineMergeTool"); + Constructor constructor = aClass1.getConstructor(HBaseConfiguration.class); + OnlineMergeTool onlineMergeTool = (OnlineMergeTool) constructor.newInstance(hBaseConfiguration); + onlineMergeTool.run(args); + List merge2regionName = getListRegionName(admin, tableName); + Assert.assertEquals(Integer.parseInt(args[3].substring("--targetRegionCount=".length())), + merge2regionName.size()); + } + + private List getListRegionName(Admin admin, String tableName) throws IOException { + List tableRegions = admin.getRegions(TableName.valueOf(tableName)); + List list = new ArrayList(); + for (RegionInfo hri : tableRegions) { + list.add(hri.getRegionNameAsString()); + } + return list; + } + +} + -- 2.19.0