Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (working copy) @@ -161,6 +161,21 @@ putsToMetaTable(catalogTracker, puts); LOG.info("Added " + puts.size() + " regions in META"); } + + /** + * Adds a META row for the specified new region and updates its location. + * @param catalogTracker CatalogTracker + * @param regionInfo region information + * @param ServerName location of region-server which host the new region + * @throws IOException if problem connecting or updating meta + */ + public static void addRegionAndLocationToMeta(final CatalogTracker catalogTracker, + final HRegionInfo regionInfo, final ServerName sn) throws IOException { + final Put put = makePutFromRegionInfo(regionInfo); + addLocation(put, sn); + putToMetaTable(catalogTracker, put); + LOG.info("Added region " + regionInfo.getRegionNameAsString() + " to META"); + } /** * Offline parent in meta. Index: src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (working copy) @@ -24,6 +24,7 @@ import java.io.InterruptedIOException; import java.lang.reflect.UndeclaredThrowableException; import java.net.SocketTimeoutException; +import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -1181,7 +1182,7 @@ this.connection.getHRegionConnection(sn.getHostname(), sn.getPort()); rs.flushRegion(hri); } - + /** * Compact a table or an individual region. * Asynchronous operation. @@ -1463,6 +1464,64 @@ throw RemoteExceptionHandler.decodeRemoteException(re); } } + + /** + * Delete the specified region, more IRB friendly version. The input format + * of the region name would look like + * TestTable,0094429456,1289497600452.527db22f95c8a9e0116f0cc13c680396. + * Asynchronous operation. + * + * @param regionName name of the region to be deleted + * @throws IOException if a remote or network exception occurs + */ + public void deleteRegion(final String regionName) + throws IOException { + List regionList = new ArrayList(); + regionList.add(Bytes.toBytes(regionName)); + deleteRegion(regionList); + } + + /** + * Delete the specified region, more IRB friendly version. + * Asynchronous operation. + * + * @param regionNameList name of the region to be deleted + * @throws IOException if a remote or network exception occurs + */ + public void deleteRegion(final List regionNameList) + throws IOException { + CatalogTracker ct = getCatalogTracker(); + try { + if (isRegionName(regionNameList.get(0), ct)) { + Pair pair = + MetaReader.getRegion(ct, regionNameList.get(0)); + if (pair == null || pair.getSecond() == null) { + LOG.info("No server in .META. for " + + Bytes.toStringBinary(regionNameList.get(0)) + "; pair=" + pair); + } else { + final HRegionInterface rs = + this.connection.getHRegionConnection(pair.getSecond().getHostname(), + pair.getSecond().getPort()); + int regionsCount = rs.getRegionsCount(regionNameList.get(0)); + if (regionsCount < 2) { + throw new IOException("Region server serves only one region per table." + + " This operation is not allowed."); + } + List encodedRegionNameList = + new ArrayList(regionNameList.size()); + encodedRegionNameList.add(pair.getFirst().getEncodedNameAsBytes()); + getMaster().deleteRegion(encodedRegionNameList); + } + } else { + LOG.info("Region name " + + Bytes.toStringBinary(regionNameList.get(0)) + " does not exist" ); + } + } catch (RemoteException re) { + throw RemoteExceptionHandler.decodeRemoteException(re); + } finally { + cleanupCatalogTracker(ct); + } + } /** * @param tableNameOrRegionName Name of a table or name of a region. Index: src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (working copy) @@ -141,6 +141,20 @@ */ public void modifyTable(byte[] tableName, HTableDescriptor htd) throws IOException; + + /** + * Passing a list of regions gives us flexibility in dealing with the + * following requests: + * 1. only one region is requested to be deleted + * 2. a continuous range of regions are requested to be deleted + * 3. sparse regions (where start and end keys may not be adjacent) + * can be deleted in one call to the master + * + * @param encodedRegionNameList list of regions to be deleted + * @throws IOException e + */ + public void deleteRegion(List encodedRegionNameList) + throws IOException; /** * Shutdown an HBase cluster. Index: src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (working copy) @@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.WritableByteArrayComparable; import org.apache.hadoop.hbase.io.hfile.BlockCacheColumnFamilySummary; +import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.RegionOpeningState; import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; import org.apache.hadoop.hbase.regionserver.wal.HLog; @@ -361,7 +362,7 @@ * @throws IOException e */ public List getOnlineRegions() throws IOException; - + /** * Method used when a master is taking the place of another failed one. * @return This servers {@link HServerInfo}; it has RegionServer POV on the @@ -530,7 +531,7 @@ */ void compactRegion(HRegionInfo regionInfo, boolean major) throws NotServingRegionException, IOException; - + /** * Replicates the given entries. The guarantee is that the given entries * will be durable on the slave cluster if this method returns without @@ -623,4 +624,13 @@ @Override public void stop(String why); + + /** + * Gets the count of online regions of the table in a region server. + * This method looks at the in-memory onlineRegions. + * @param regionName + * @return int regions count + * @throws IOException + */ + public int getRegionsCount(byte[] regionName) throws IOException; } Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -71,6 +71,7 @@ import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator; import org.apache.hadoop.hbase.master.handler.CreateTableHandler; +import org.apache.hadoop.hbase.master.handler.DeleteRegionHandler; import org.apache.hadoop.hbase.master.handler.DeleteTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; @@ -95,6 +96,7 @@ import org.apache.hadoop.hbase.util.VersionInfo; import org.apache.hadoop.hbase.zookeeper.ClusterId; import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; +import org.apache.hadoop.hbase.zookeeper.MasterDeleteRegionTracker; import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker; import org.apache.hadoop.hbase.zookeeper.MasterSchemaChangeTracker; import org.apache.hadoop.hbase.zookeeper.RegionServerTracker; @@ -169,6 +171,9 @@ // Schema change tracker private MasterSchemaChangeTracker schemaChangeTracker; + + //Delete-region change tracker + private MasterDeleteRegionTracker deleteRegionTracker; // buffer for "fatal error" notices from region servers // in the cluster. This is only used for assisting @@ -425,6 +430,11 @@ this, this, conf.getInt("hbase.instant.schema.alter.timeout", 60000)); this.schemaChangeTracker.start(); + + // initialize delete region tracker + this.deleteRegionTracker = new MasterDeleteRegionTracker(getZooKeeper(), + this,this, conf.getInt("hbase.delete.region.timeout", 1800000)); + this.deleteRegionTracker.start(); LOG.info("Server active/primary master; " + this.serverName + ", sessionid=0x" + @@ -707,6 +717,11 @@ public MasterSchemaChangeTracker getSchemaChangeTracker() { return this.schemaChangeTracker; } + + @Override + public MasterDeleteRegionTracker getDeleteRegionTracker() { + return this.deleteRegionTracker; + } public RegionServerTracker getRegionServerTracker() { return this.regionServerTracker; @@ -833,6 +848,25 @@ }; return Threads.setDaemonThreadRunning(chore.getThread()); } + + /** + * Start the delete-region janitor. This Janitor will periodically sweep + * the failed/expired delete-region requests.. + * @param master + * @return Thread + */ + private Thread getAndStartDeleteRegionJanitorChore(final HMaster master) { + String name = master.getServerName() + "-DeleteRegionJanitorChore"; + int deleteRegionJanitorPeriod = + master.getConfiguration().getInt("hbase.delete.region.janitor.period", 120000); + Chore chore = new Chore(name, deleteRegionJanitorPeriod, master) { + @Override + protected void chore() { + master.getDeleteRegionTracker().handleFailedOrExpiredDeleteRegionRequests(); + } + }; + return Threads.setDaemonThreadRunning(chore.getThread()); + } private Thread getAndStartBalancerChore(final HMaster master) { @@ -972,6 +1006,11 @@ "it to complete before running the load balancer."); return false; } + if (deleteRegionTracker.isDeleteRegionInProgress()) { + LOG.debug("Delete region operation is in progress. Waiting for " + + "it to complete before running the load balancer."); + return false; + } loadBalancerRunning = true; if (this.cpHost != null) { try { @@ -1224,8 +1263,47 @@ return new Pair(0,0); } } + + /** + * Used by the client to get the status of delete region + * + * @param regionName + * @return int (0 - nothing to process, -1 failure, 1 in-process, 2 success + * @throws IOException + */ + private int getDeleteRegionStatusFromDeleteRegionTracker(byte[] regionName) + throws IOException { + MasterDeleteRegionTracker.RegionDeletionStatus drs = null; + try { + drs = + this.deleteRegionTracker.getDeleteRegionStatus(Bytes.toString(regionName)); + } catch (KeeperException ke) { + LOG.error("KeeperException while getting delete-region status for region = " + + Bytes.toString(regionName), ke); + } + if (drs != null) { + LOG.debug("Getting DeleteRegionStatus from DeleteRegionTracker for region = " + + Bytes.toString(regionName) + ". DeleteRegionStatus = " + + drs.toString()); + if (drs.getRegionDeletionStatus() + == MasterDeleteRegionTracker.RegionDeletionStatus.RegionDeletionState.FAILURE) { + return -1; + } else if (drs.getRegionDeletionStatus() + == MasterDeleteRegionTracker.RegionDeletionStatus.RegionDeletionState.INPROCESS) { + return 1; + } else if (drs.getRegionDeletionStatus() + == MasterDeleteRegionTracker.RegionDeletionStatus.RegionDeletionState.SUCCESS) { + return 2; + } else { + return -1; + } + } else { + LOG.debug("DeleteRegionStatus is NULL for region = " + + Bytes.toString(regionName)); + return -1; + } + } - public void addColumn(byte [] tableName, HColumnDescriptor column) throws IOException { if (cpHost != null) { @@ -1339,6 +1417,16 @@ cpHost.postModifyTable(tableName, htd); } } + + @Override + public void deleteRegion(final List encodedRegionNameList) + throws IOException { + if (encodedRegionNameList != null && encodedRegionNameList.size() > 1) { + throw new IOException("Only one region name is supported"); + } + this.executorService.submit( + new DeleteRegionHandler(encodedRegionNameList.get(0), this, this, this)); + } private boolean isOnlineSchemaChangeAllowed() { return conf.getBoolean( Index: src/main/java/org/apache/hadoop/hbase/master/MasterServices.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterServices.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/master/MasterServices.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.zookeeper.MasterDeleteRegionTracker; import org.apache.hadoop.hbase.zookeeper.MasterSchemaChangeTracker; import org.apache.hadoop.hbase.zookeeper.RegionServerTracker; @@ -91,5 +92,11 @@ * @return RegionServerTracker */ public RegionServerTracker getRegionServerTracker(); + + /** + * Get Master delete region tracker + * @return MasterDeleteRegionTracker + */ + public MasterDeleteRegionTracker getDeleteRegionTracker(); } Index: src/main/java/org/apache/hadoop/hbase/master/handler/DeleteRegionHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/DeleteRegionHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/handler/DeleteRegionHandler.java (revision 0) @@ -0,0 +1,151 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.executor.EventHandler; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.ipc.HMasterInterface; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.MasterDeleteRegionTracker; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Handles deleting a region. + */ +public class DeleteRegionHandler extends EventHandler { + + private static final Log LOG = LogFactory.getLog(DeleteRegionHandler.class); + private final byte[] encodedRegionName; + private HMasterInterface master = null; + private MasterServices masterServices = null; + + public DeleteRegionHandler(final byte [] encodedRegionName, + final Server server, final HMasterInterface masterInterface, + final MasterServices masterServices) throws IOException { + super(server, EventType.RS_ZK_REGION_CLOSED); + this.encodedRegionName = encodedRegionName; + this.master = masterInterface; + this.masterServices = masterServices; + } + + @Override + public void process() { + LOG.debug("Handling DELETE_REGION event for " + encodedRegionName); + MonitoredTask status = TaskMonitor.get().createStatus( + "Handling delete-region request for region = " + + new String(encodedRegionName)); + boolean prevBalanceSwitch = false; + String regionNameStr = Bytes.toString(encodedRegionName); + try { + // turn off load balancer synchronously + prevBalanceSwitch = master.synchronousBalanceSwitch(false); + waitForInflightSplit(encodedRegionName, status); + MasterDeleteRegionTracker deleteRegionTracker = + this.masterServices.getDeleteRegionTracker(); + deleteRegionTracker.createDeleteRegionNode(regionNameStr); + + while(!deleteRegionTracker.doesDeleteRegionNodeExist( + regionNameStr)) { + try { + Thread.sleep(50); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + status.markComplete("Created ZK node for handling the delete-region" + + " request for region = " + regionNameStr); + } catch (final KeeperException e) { + LOG.warn("Delete-region failed for region " + regionNameStr, e); + status.setStatus("Delete-region failed for region " + regionNameStr + + " Cause = " + e.getCause()); + } catch (final IOException ioe) { + LOG.warn("Delete-region failed for region " + regionNameStr, ioe); + status.setStatus("Delete-region failed for region " + regionNameStr + + " Cause = " + ioe.getCause()); + } finally { + master.synchronousBalanceSwitch(prevBalanceSwitch); + } + } + + /** + * Wait for region split transaction in progress (if any) + * @param regions + * @param status + */ + private void waitForInflightSplit(byte[] encodedRegionName, + MonitoredTask status) { + while (isSplitInProgress(encodedRegionName)) { + try { + status.setStatus("Delete-region is waiting for split region to complete."); + Thread.sleep(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + /** + * Check whether any of the regions from the list of regions is + * undergoing a split. We simply check whether there is a unassigned node + * for any of the region and if so we return as true. + * @param encodedRegionName + * @return true if split is in progress otherwise false + */ + private boolean isSplitInProgress(byte[] encodedRegionName) { + ZooKeeperWatcher zkw = this.masterServices.getZooKeeper(); + final String node = ZKAssign.getNodeName(zkw, Bytes.toString(encodedRegionName)); + try { + if (ZKUtil.checkExists(zkw, node) != -1) { + LOG.debug("Region " + Bytes.toString(encodedRegionName) + + " is unassigned. Assuming that it is undergoing a split"); + return true; + } + } catch (final KeeperException ke) { + LOG.debug("KeeperException while determining splits in progress.", ke); + return false; + } + return false; + } + + @Override + public String toString() { + String name = "UnknownServerName"; + if(server != null && server.getServerName() != null) { + name = server.getServerName().toString(); + } + return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-" + + new String(encodedRegionName); + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -3712,7 +3712,7 @@ deleteRegion(fs, HRegion.getRegionDir(rootdir, info)); } - private static void deleteRegion(FileSystem fs, Path regiondir) + static void deleteRegion(FileSystem fs, Path regiondir) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("DELETING region " + regiondir.toString()); @@ -3879,12 +3879,46 @@ " into new region " + newRegionInfo.toString() + " with start key <" + Bytes.toStringBinary(startKey) + "> and end key <" + Bytes.toStringBinary(endKey) + ">"); - // Move HStoreFiles under new region directory Map> byFamily = new TreeMap>(Bytes.BYTES_COMPARATOR); byFamily = filesByFamily(byFamily, a.close()); byFamily = filesByFamily(byFamily, b.close()); + moveStoreFilesToNewRegionDir(byFamily, fs, tableDir, newRegionInfo); + if (LOG.isDebugEnabled()) { + LOG.debug("Files for new region"); + listPaths(fs, newRegionDir); + } + HRegion dstRegion = HRegion.newHRegion(tableDir, log, fs, conf, + newRegionInfo, a.getTableDesc(), null); + dstRegion.readRequestsCount.set(a.readRequestsCount.get() + b.readRequestsCount.get()); + dstRegion.writeRequestsCount.set(a.writeRequestsCount.get() + b.writeRequestsCount.get()); + dstRegion.initialize(); + dstRegion.compactStores(); + if (LOG.isDebugEnabled()) { + LOG.debug("Files for new region"); + listPaths(fs, dstRegion.getRegionDir()); + } + deleteRegion(fs, a.getRegionDir()); + deleteRegion(fs, b.getRegionDir()); + + LOG.info("merge completed. New region is " + dstRegion); + + return dstRegion; + } + + /** + * Move HStoreFiles under new region directory + * @param byFamily + * @param fs + * @param tableDir + * @param newRegionInfo + * @throws IOException + */ + static void moveStoreFilesToNewRegionDir( + final Map> byFamily, + final FileSystem fs, Path tableDir, + final HRegionInfo newRegionInfo) throws IOException{ for (Map.Entry> es : byFamily.entrySet()) { byte [] colFamily = es.getKey(); makeColumnFamilyDirs(fs, tableDir, newRegionInfo, colFamily); @@ -3907,26 +3941,6 @@ newRegionInfo.getEncodedName(), colFamily))); } } - if (LOG.isDebugEnabled()) { - LOG.debug("Files for new region"); - listPaths(fs, newRegionDir); - } - HRegion dstRegion = HRegion.newHRegion(tableDir, log, fs, conf, - newRegionInfo, a.getTableDesc(), null); - dstRegion.readRequestsCount.set(a.readRequestsCount.get() + b.readRequestsCount.get()); - dstRegion.writeRequestsCount.set(a.writeRequestsCount.get() + b.writeRequestsCount.get()); - dstRegion.initialize(); - dstRegion.compactStores(); - if (LOG.isDebugEnabled()) { - LOG.debug("Files for new region"); - listPaths(fs, dstRegion.getRegionDir()); - } - deleteRegion(fs, a.getRegionDir()); - deleteRegion(fs, b.getRegionDir()); - - LOG.info("merge completed. New region is " + dstRegion); - - return dstRegion; } /* @@ -3936,7 +3950,7 @@ * @param family * @return Returns byFamily */ - private static Map> filesByFamily( + static Map> filesByFamily( Map> byFamily, List storeFiles) { for (StoreFile src: storeFiles) { byte [] family = src.getFamily(); @@ -3970,7 +3984,7 @@ * @param dir * @throws IOException */ - private static void listPaths(FileSystem fs, Path dir) throws IOException { + public static void listPaths(FileSystem fs, Path dir) throws IOException { if (LOG.isDebugEnabled()) { FileStatus[] stats = FSUtils.listStatus(fs, dir, null); if (stats == null || stats.length == 0) { Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -38,6 +38,8 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.NavigableMap; +import java.util.TreeSet; import java.util.Map.Entry; import java.util.Random; import java.util.Set; @@ -55,6 +57,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.DoNotRetryIOException; @@ -66,6 +69,7 @@ import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HServerLoad; +import org.apache.hadoop.hbase.HServerLoad.RegionLoad; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterAddressTracker; @@ -83,6 +87,8 @@ import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.client.Action; import org.apache.hadoop.hbase.client.Append; +import org.apache.hadoop.hbase.client.MetaScanner; +import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.RowMutation; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; @@ -116,6 +122,7 @@ import org.apache.hadoop.hbase.ipc.ProtocolSignature; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.mapreduce.TableSplit; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; import org.apache.hadoop.hbase.regionserver.handler.CloseMetaHandler; @@ -142,11 +149,14 @@ import org.apache.hadoop.hbase.util.Sleeper; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.VersionInfo; +import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker; +import org.apache.hadoop.hbase.zookeeper.RegionServerDeleteRegionTracker; import org.apache.hadoop.hbase.zookeeper.SchemaChangeTracker; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.hadoop.hbase.zookeeper.RegionServerDeleteRegionTracker.RegionDeletionStatus; import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.ipc.RemoteException; @@ -288,6 +298,9 @@ // Schema change Tracker private SchemaChangeTracker schemaChangeTracker; + + //Delete region Tracker + private RegionServerDeleteRegionTracker deleteRegionTracker; // Log Splitting Worker private SplitLogWorker splitLogWorker; @@ -586,6 +599,11 @@ this.schemaChangeTracker = new SchemaChangeTracker(this.zooKeeper, this, this); this.schemaChangeTracker.start(); + + // Delete region tracker + this.deleteRegionTracker = new RegionServerDeleteRegionTracker(this.zooKeeper, + this, this); + this.deleteRegionTracker.start(); } /** @@ -825,8 +843,9 @@ Collection regions = getOnlineRegionsLocalContext(); TreeMap regionLoads = new TreeMap(Bytes.BYTES_COMPARATOR); + RegionLoad regionLoad = null; for (HRegion region: regions) { - regionLoads.put(region.getRegionName(), createRegionLoad(region)); + regionLoads.put(region.getRegionName(), createRegionLoad(region)); } MemoryUsage memory = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage(); @@ -986,6 +1005,11 @@ private void deleteMyEphemeralNode() throws KeeperException { ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath()); } + + private void watchDeleteRegionZNode() throws KeeperException { + ZKUtil.watchAndCheckExists(this.zooKeeper, this.zooKeeper.deleteRegionZNode); + } + public RegionServerAccounting getRegionServerAccounting() { return regionServerAccounting; @@ -1041,7 +1065,8 @@ totalStaticIndexSizeKB, totalStaticBloomSizeKB, (int) r.readRequestsCount.get(), (int) r.writeRequestsCount.get(), totalCompactingKVs, currentCompactedKVs, - r.getCoprocessorHost().getCoprocessors()); + (r.getCoprocessorHost() != null ? + r.getCoprocessorHost().getCoprocessors() : new TreeSet())); } /** @@ -2852,11 +2877,28 @@ } } } + + /** + * Wait for mid-flight delete-region requests. (if any) + * @param regionName region to be deleted. + */ + private void waitForDeleteRegion(String regionName) { + while (deleteRegionTracker.isDeleteRegionInProgress(regionName)) { + try { + LOG.debug("Delete region is inprogress for region = " + regionName + + " Waiting for delete region to complete"); + Thread.sleep(100); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } @Override public void splitRegion(HRegionInfo regionInfo, byte[] splitPoint) throws NotServingRegionException, IOException { waitForSchemaChange(Bytes.toString(regionInfo.getTableName())); + waitForDeleteRegion(regionInfo.getEncodedName()); checkOpen(); HRegion region = getRegion(regionInfo.getRegionName()); region.flushcache(); @@ -3633,7 +3675,311 @@ } } } - + + /** + * Move data from adjacent region to newly created region. + * @param adjRegion + * @param startKey + * @param endKey + * @return HRegionInfo + * @throws IOException + */ + private HRegionInfo moveDataFromAdjacentRegionToNewRegion( + final HRegion adjRegion, byte[] startKey, byte[] endKey) + throws IOException{ + LOG.info("Found adjacent region " + adjRegion + ". StartKey=" + + adjRegion.getStartKey() + ", EndKey=" + adjRegion.getEndKey()); + adjRegion.flushcache(); + adjRegion.compactStores(true); + final HRegionInfo newRegionInfo = + new HRegionInfo(adjRegion.getTableDesc().getName(), startKey, endKey); + if (LOG.isDebugEnabled()) { + LOG.info("Creating new region " + newRegionInfo.toString()); + } + String encodedName = newRegionInfo.getEncodedName(); + final Path newRegionDir = HRegion.getRegionDir(adjRegion.getTableDir(), + encodedName); + if (fs.exists(newRegionDir)) { + FileStatus[] status = FSUtils.listStatus(fs, newRegionDir, null); + // we are fine with empty directory + if (status != null && status.length > 0) { + throw new IOException("Cannot merge; target file collision at " + + newRegionDir + ", startKey=" + startKey + ", endKey=" + endKey + + ". Please validate and cleanup the " + newRegionDir + " directory"); + } + } else { + if (!fs.mkdirs(newRegionDir)) { + throw new IOException("Could not create a new region directory " + + newRegionDir ); + } + } + // Move HStoreFiles of adjRegion to new region directory + if (LOG.isDebugEnabled()) { + LOG.debug("Moving HStoreFiles of adjRegion to new region dir " + + newRegionDir); + } + final Map> byFamily = + new TreeMap>(Bytes.BYTES_COMPARATOR); + HRegion.filesByFamily(byFamily, adjRegion.close()); + HRegion.moveStoreFilesToNewRegionDir(byFamily, + adjRegion.getFilesystem(), adjRegion.getTableDir(),newRegionInfo); + return newRegionInfo; + } + + /** + * Find adjacent region, span the start/end key of adjacent region, + * create new region, move data from adjacent region to new region, + * remove adjacent region from META or file-system and remove + * destination region from META or file-system. + * @param hRegion destination region + * @return HRegion newly created region + * @throws IOException + * @throws KeeperException + */ + private HRegion createNewRegionFromAdjacentRegion( + final HRegion hRegion) throws IOException, KeeperException { + final Pair keys = getStartEndKeys( + hRegion.getTableDesc().getName(), hRegion.getConf()); + if (keys == null || keys.getFirst() == null) { + throw new IOException("Expecting at least one region."); + } + byte[] startKey = hRegion.getStartKey(); + byte[] endKey = hRegion.getEndKey(); + HRegionInfo adjRegionInfo = null; + HRegion newRegion = null; + final String regionName = hRegion.getRegionInfo().getEncodedName(); + //Find start/end keys so that adjacent region can span + //the region to be deleted + for (int i = 0; i < keys.getFirst().length; i++) { + if (LOG.isDebugEnabled()) { + LOG.debug("i="+i+", hRegion.getStartKey()="+hRegion.getStartKey() + +", keys.getFirst()["+i+"]="+ keys.getFirst()[i] + +", hRegion.getEndKey()="+ hRegion.getEndKey() + + ", keys.getSecond()[" + i + "]=" + keys.getSecond()[i] ); + } + // Check both start and end key + if (Bytes.equals(hRegion.getStartKey(), keys.getFirst()[i]) + && Bytes.equals(hRegion.getEndKey(), keys.getSecond()[i])) { + final NavigableMap hriMap = + MetaScanner.allTableRegions(hRegion.getConf(), + hRegion.getTableDesc().getName(), false); + final HRegionInfo[] hriArray = + (HRegionInfo[]) hriMap.keySet().toArray(new HRegionInfo[0]); + if (i == 0 && keys.getFirst().length > 1) { + //first region, start-key is null, so fall to next region + endKey = keys.getSecond()[i+1]; + adjRegionInfo = (HRegionInfo) hriArray[i+1]; + } else if (i > 0 && keys.getFirst().length > 1) { + //last region or middle, so fall to previous region + startKey = keys.getFirst()[i-1]; + adjRegionInfo = (HRegionInfo) hriArray[i-1]; + } + if (adjRegionInfo == null) { + throw new IOException("Could not find the adjacent region " + + "to bridge the hole in .META."); + } + if (LOG.isDebugEnabled()) { + LOG.debug("To bridge the hole in META: New startKey=" + + Bytes.toString(startKey) + ", endKey=" + Bytes.toString(endKey)); + } + // Find adj region, get the data from adj region to new region + HRegion adjRegion = HRegion.newHRegion(hRegion.getTableDir(), + hRegion.getLog(), hRegion.getFilesystem(), hRegion.getConf(), + adjRegionInfo, hRegion.getTableDesc(), null); + // using new start/end key, move the data from adj region to new region + HRegionInfo newRegionInfo = moveDataFromAdjacentRegionToNewRegion( + adjRegion, startKey, endKey); + // deploy the new region + newRegion = createNewRegion(hRegion, newRegionInfo); + // delete the dest region from META + deleteRegionFromMeta(regionName, + hRegion.getRegionInfo(), + RegionDeletionStatus.RegionDeletionState.DEST_REGION_REMOVED_FROM_META, + RegionDeletionStatus.RegionDeletionState.DEST_REGION_REMOVE_FROM_META_FAILED); + // delete the dest region in FS + deleteRegionFromFs(regionName, + hRegion, + RegionDeletionStatus.RegionDeletionState.DEST_REGION_DELETION_FROM_FS, + RegionDeletionStatus.RegionDeletionState.DEST_REGION_DELETION_FROM_FS_FAILED); + // delete the adj region from META + deleteRegionFromMeta(regionName, + adjRegion.getRegionInfo(), + RegionDeletionStatus.RegionDeletionState.ADJ_REGION_REMOVED_FROM_META, + RegionDeletionStatus.RegionDeletionState.ADJ_REGION_REMOVE_FROM_META_FAILED); + // delete the adj region in FS + deleteRegionFromFs(regionName, + adjRegion, + RegionDeletionStatus.RegionDeletionState.ADJ_REGION_DELETION_FROM_FS, + RegionDeletionStatus.RegionDeletionState.ADJ_REGION_DELETION_FROM_FS_FAILED); + removeFromOnlineRegions(adjRegion.getRegionInfo().getEncodedName()); + break; + } + } + return newRegion; + } + + private HRegion createNewRegion(final HRegion destRegion, + final HRegionInfo newRegionInfo) throws IOException{ + final HRegion newRegion = HRegion.newHRegion(destRegion.getTableDir(), + destRegion.getLog(), destRegion.getFilesystem(), + destRegion.getConf(), newRegionInfo, destRegion.getTableDesc(), null); + newRegion.readRequestsCount.set(destRegion.readRequestsCount.get()); + newRegion.writeRequestsCount.set(destRegion.writeRequestsCount.get()); + newRegion.initialize(); + newRegion.compactStores(); + if (LOG.isDebugEnabled()) { + LOG.debug("Created new region in file system " + + newRegion.getRegionDir() + "Files "); + HRegion.listPaths(fs, newRegion.getRegionDir()); + } + return newRegion; + } + + private void deleteRegionFromMeta(final String regionName, + final HRegionInfo hRegionInfo, + RegionDeletionStatus.RegionDeletionState successState, + RegionDeletionStatus.RegionDeletionState failureState) + throws KeeperException, IOException{ + RegionDeletionStatus drs = this.deleteRegionTracker + .getDeleteRegionStatus(regionName); + if (drs.getRegionDeletionStatus() != successState) { + try { + MetaEditor.deleteRegion(this.getCatalogTracker(), hRegionInfo); + drs.update(successState); + } catch (IOException ioe) { + drs.update(failureState, ioe.getCause().getMessage()); + throw ioe; + } finally { + this.deleteRegionTracker.updateDeleteRegionStatus( + regionName, drs); + } + } + } + + private void deleteRegionFromFs(final String regionName, + final HRegion hRegion, + RegionDeletionStatus.RegionDeletionState successState, + RegionDeletionStatus.RegionDeletionState failureState) + throws KeeperException, IOException{ + RegionDeletionStatus drs = this.deleteRegionTracker + .getDeleteRegionStatus(regionName); + if (drs.getRegionDeletionStatus() != successState ){ + try { + HRegion.deleteRegion(hRegion.getFilesystem(), + hRegion.getRegionDir()); + drs.update(successState); + } catch (IOException ioe) { + drs.update(failureState, ioe.getCause().getMessage()); + throw ioe; + } finally { + this.deleteRegionTracker.updateDeleteRegionStatus( + regionName, drs); + } + } + } + + private void addNewRegionInMeta(final String regionName, + final HRegion hRegion, + RegionDeletionStatus.RegionDeletionState successState, + RegionDeletionStatus.RegionDeletionState failureState) + throws KeeperException, IOException{ + RegionDeletionStatus drs = this.deleteRegionTracker + .getDeleteRegionStatus(regionName); + if (drs.getRegionDeletionStatus() != successState ){ + try { + MetaEditor.addRegionAndLocationToMeta( + this.getCatalogTracker(), hRegion.getRegionInfo(), + this.serverNameFromMasterPOV); + drs.update(successState); + addToOnlineRegions(hRegion); + } catch (IOException ioe) { + drs.update(failureState, ioe.getCause().getMessage()); + throw ioe; + } finally { + this.deleteRegionTracker.updateDeleteRegionStatus( + regionName, drs); + } + } + } + + /** + * Process the delete-region request. + * @param hRegion Hregion instance to be deleted. + * @throws IOException + * @throws KeeperException + */ + private void processDeleteRegion(final HRegion hRegion) throws IOException, + KeeperException{ + if (LOG.isDebugEnabled()) { + LOG.debug("Trying to bridge the hole for region " + hRegion +" in META." + + " StartKey=" + Bytes.toString(hRegion.getStartKey()) + + ", EndKey=" + Bytes.toString(hRegion.getEndKey())); + } + HRegion newRegion = createNewRegionFromAdjacentRegion(hRegion); + addNewRegionInMeta(hRegion.getRegionInfo().getEncodedName(), + newRegion, + RegionDeletionStatus.RegionDeletionState.NEW_REGION_ADDED_IN_META, + RegionDeletionStatus.RegionDeletionState.NEW_REGION_ADD_IN_META_FAILED); + removeFromOnlineRegions(hRegion.getRegionInfo().getEncodedName()); + } + + /** + * Delete the specified region. + * @param encodedRegionName region to delete + * @throws IOException + */ + public void deleteRegion(final String encodedRegionName) + throws IOException, KeeperException { + if (encodedRegionName != null) { + HRegion hRegion = this.onlineRegions.get(encodedRegionName); + if (hRegion != null) { + LOG.info("Delete region process is started for region " + + encodedRegionName); + hRegion.flushcache(); + hRegion.compactStores(); + hRegion.close(); + processDeleteRegion(hRegion); + } else { + throw new IOException("Region does not exist"); + } + } + } + + /** + * Gets the starting and ending row keys for every region in the currently + * open table. + * @return Pair of arrays of region starting and ending row keys + * @throws IOException if a remote or network exception occurs + */ + private Pair getStartEndKeys(final byte[] tableName, + Configuration conf) throws IOException { + final List startKeyList = new ArrayList(); + final List endKeyList = new ArrayList(); + MetaScannerVisitor visitor = new MetaScannerVisitor() { + public boolean processRow(Result rowResult) throws IOException { + byte [] bytes = rowResult.getValue(HConstants.CATALOG_FAMILY, + HConstants.REGIONINFO_QUALIFIER); + if (bytes == null) { + LOG.warn("Null " + HConstants.REGIONINFO_QUALIFIER + " cell in " + + rowResult); + return true; + } + HRegionInfo info = Writables.getHRegionInfo(bytes); + if (Bytes.equals(info.getTableName(), tableName)) { + if (!(info.isOffline() || info.isSplit())) { + startKeyList.add(info.getStartKey()); + endKeyList.add(info.getEndKey()); + } + } + return true; + } + }; + MetaScanner.metaScan(conf, visitor,tableName); + return new Pair( + startKeyList.toArray(new byte[startKeyList.size()][]), + endKeyList.toArray(new byte[endKeyList.size()][])); + } + /** * Gets the online regions of the specified table. * This method looks at the in-memory onlineRegions. It does not go to .META.. @@ -3642,6 +3988,7 @@ * So, the returned list may not necessarily be ALL regions in this table, its * all the ONLINE regions in the table. * @param tableName + * @throws IOException * @return Online regions from tableName */ public List getOnlineRegions(byte[] tableName) { @@ -3656,10 +4003,26 @@ } return tableRegions; } + + /** + * Gets the count of online regions of the table in a region server. + * This method looks at the in-memory onlineRegions. + * @param regionName + * @return int regions count + * @throws IOException + */ + public int getRegionsCount(byte[] regionName) throws IOException { + return getOnlineRegions(getRegionInfo(regionName).getTableName()) + .size(); + } public SchemaChangeTracker getSchemaChangeTracker() { return this.schemaChangeTracker; } + + public RegionServerDeleteRegionTracker getDeleteRegionTracker() { + return this.deleteRegionTracker; + } // used by org/apache/hbase/tmpl/regionserver/RSStatusTmpl.jamon (HBASE-4070). public String[] getCoprocessors() { Index: src/main/java/org/apache/hadoop/hbase/regionserver/OnlineRegions.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/OnlineRegions.java (revision 1236386) +++ src/main/java/org/apache/hadoop/hbase/regionserver/OnlineRegions.java (working copy) @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.List; +import org.apache.zookeeper.KeeperException; /** * Interface to Map of online regions. In the Map, the key is the region's @@ -65,5 +66,14 @@ * @param hRegion */ public void refreshRegion(HRegion hRegion) throws IOException; + + /** + * Delete the specified region + * + * @param regionName + * @throws IOException + * @throws KeeperException + */ + public void deleteRegion(String regionName) throws IOException, KeeperException; } Index: src/main/java/org/apache/hadoop/hbase/zookeeper/MasterDeleteRegionTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/MasterDeleteRegionTracker.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/MasterDeleteRegionTracker.java (revision 0) @@ -0,0 +1,502 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.zookeeper; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.HashMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.VersionedWritable; +import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.io.Writable; +import org.apache.zookeeper.KeeperException; + +/** + * Tracks the list of regions to be deleted via ZK. + * + *

This class is responsible for watching for changes to the + * region name list. It handles additions/deletions in the region name + * list and watches each node. + */ +public class MasterDeleteRegionTracker extends ZooKeeperNodeTracker { + + public static final Log LOG = + LogFactory.getLog(MasterDeleteRegionTracker.class); + private final MasterServices masterServices; + // Used by tests only. Do not change this. + private volatile int sleepTimeMillis = 0; + // delete-region request pending more than this time will be timed out. + private long deleteRegionTimeoutMillis = 30000; + + private Map monitoredTaskMap = null; + + /** + * Constructs a new ZK node tracker. + *

+ *

After construction, use {@link #start} to kick off tracking. + * + * @param watcher + * @param abortable + * @param masterServices + * @param deleteRegionTimeoutMillis + */ + public MasterDeleteRegionTracker(ZooKeeperWatcher watcher, + Abortable abortable, MasterServices masterServices, + long deleteRegionTimeoutMillis) { + super(watcher, watcher.deleteRegionZNode, abortable); + this.masterServices = masterServices; + this.deleteRegionTimeoutMillis = deleteRegionTimeoutMillis; + this.monitoredTaskMap = new HashMap(); + } + + @Override + public void start() { + try { + watcher.registerListener(this); + List encodedRegionNames = + ZKUtil.listChildrenNoWatch(watcher, watcher.deleteRegionZNode); + processDeletedRegions(encodedRegionNames); + } catch (final KeeperException e) { + LOG.error("MasterDeleteRegionTracker startup failed.", e); + abortable.abort("MasterDeleteRegionTracker startup failed", e); + } + } + + private List getCurrentRegions() throws KeeperException { + return + ZKUtil.listChildrenNoWatch(watcher, watcher.deleteRegionZNode); + } + + /** + * When a primary master crashes and the secondary master takes over + * mid-flight during an delete-region process, the secondary should cleanup + * any completed delete-request not handled by the previous master. + * @param list of encodedRegionNames + * @throws KeeperException + */ + private void processDeletedRegions(List encodedRegionNames) + throws KeeperException { + if (encodedRegionNames == null || encodedRegionNames.isEmpty()) { + String msg = "No current delete region(s) in progress. Skipping cleanup"; + LOG.debug(msg); + return; + } + String msg = "Master seeing following regions undergoing delete " + + "process. Regions = " + encodedRegionNames; + setMonitoredTaskStatus("master", msg, false, false); + LOG.debug(msg); + for (String region : encodedRegionNames) { + LOG.debug("Processing region = "+ region); + setMonitoredTaskStatus(region, "Processing region = "+ region, + false, false); + try { + processRegionNode(region); + } catch (IOException e) { + String errmsg = "IOException while processing completed delete-region." + + " Cause = " + e.getCause(); + LOG.error(errmsg, e); + setMonitoredTaskStatus(region, errmsg, false, false); + } + } + } + + /** + * Get current delete-region request for a region. + * @param regionName + * @return DeleteRegionStatus + * @throws KeeperException + * @throws IOException + */ + public RegionDeletionStatus getDeleteRegionStatus(String regionName) + throws KeeperException, IOException { + String path = getDeleteRegionNodePath(regionName); + byte[] state = ZKUtil.getData(watcher, path); + if (state == null || state.length <= 0) { + return null; + } + RegionDeletionStatus drs = new RegionDeletionStatus(regionName); + Writables.getWritable(state, drs); + return drs; + } + + + /** + * If delete-region request is handled for this region, + * then delete the ZK node created for this region. + * @param regionName + * @throws KeeperException + */ + private void processRegionNode(final String regionName) throws KeeperException, + IOException { + LOG.debug("processRegionNode. RegionName = " + regionName); + RegionDeletionStatus drs = getDeleteRegionStatus(regionName); + if (drs == null) { + LOG.debug("DeleteRegionStatus is NULL. region = " + regionName); + return; + } + processDeleteRegionStatus(drs, regionName); + } + + private void setMonitoredTaskStatus(String regionName, String msg, + boolean completed, boolean aborted) { + if (monitoredTaskMap.get(regionName) == null) { + monitoredTaskMap.put(regionName, TaskMonitor.get().createStatus(msg)); + } else { + monitoredTaskMap.get(regionName).setStatus(msg); + } + if (completed) { + monitoredTaskMap.get(regionName).markComplete(msg); + } else if (aborted) { + monitoredTaskMap.get(regionName).abort(msg); + } + if (completed || aborted) { + monitoredTaskMap.remove(regionName); + } + } + + /** + * Evaluate the master delete-region status and determine the current status. + * @param RegionDeletionStatus + * @param regionName + * @param servers + */ + private void processDeleteRegionStatus(RegionDeletionStatus deleteStatus, + String regionName) throws KeeperException { + + if (deleteStatus.getRegionDeletionStatus() + == RegionDeletionStatus.RegionDeletionState.SUCCESS ){ + // delete region completed. + String msg = "Region server has successfully processed the " + + "delete-region request for region = " + regionName; + setMonitoredTaskStatus(regionName, msg, true, false); + LOG.debug(msg); + cleanupDeleteRegionNode(getDeleteRegionNodePath(regionName)); + } else { + if (deleteStatus.getErrorCause() != null + && deleteStatus.getErrorCause().trim().length() > 0) { + String msg = "Delete-region request failed " + + "for region = " + regionName + + ", Error Cause = " + deleteStatus.getErrorCause(); + setMonitoredTaskStatus(regionName, msg, false, true); + LOG.debug(msg); + } else { + String msg = "Delete-region request is in process " + + deleteStatus.getRegionDeletionStatus(); + LOG.debug(msg); + } + } + } + + /** + * Check whether a in-flight delete-region request has expired. + * @param regionName + * @return true if the delete-region request expired. + * @throws IOException + */ + private boolean hasDeleteRegionExpired(String regionName) + throws IOException, KeeperException { + RegionDeletionStatus drs = getDeleteRegionStatus(regionName); + long createdTimeStamp = drs.getTimestamp(); + long duration = System.currentTimeMillis() - createdTimeStamp; + LOG.debug("Created TimeStamp = " + createdTimeStamp + + " duration = " + duration + " Region = " + regionName + + " Master Delete Region Status = " + drs); + return (duration > deleteRegionTimeoutMillis); + } + + /** + * Check whether there are any delete-region requests that are in progress now. + * We simply assume that a delete-region is in progress if we see + * a ZK delete-region for any region. + * @return true if it is in process + */ + public boolean isDeleteRegionInProgress() { + try { + int deleteRegionCount = + ZKUtil.getNumberOfChildren(this.watcher, watcher.deleteRegionZNode); + return deleteRegionCount > 0; + } catch (final KeeperException ke) { + LOG.debug("KeeperException while getting current delete region progress.", ke); + } + return false; + } + + /** + * Handle failed and expired delete-region requests. We simply delete all the + * expired/failed delete-region attempts. Why we should do this ? + * 1) Keeping the failed/expired delete-region nodes longer prohibits any + * future delete-region for the regions. + * 2) Any lingering expired/failed delete-region requests will prohibit the + * load balancer from running. + */ + public void handleFailedOrExpiredDeleteRegionRequests() { + try { + List regionsList = getCurrentRegions(); + for (String namedRegion : regionsList) { + String statmsg = "Cleaning failed or expired delete-region requests. " + + "Current regions undergoing " + + "delete-region process = " + regionsList; + setMonitoredTaskStatus(namedRegion, statmsg, false, false); + LOG.debug(statmsg); + if (hasDeleteRegionExpired(namedRegion)) { + // time out.. currently, we abandon the in-flight delete-region due to + // time out. + String msg = "Delete-region for region = " + namedRegion + " has expired." + + " Delete-region for this region has been in progress for " + + + deleteRegionTimeoutMillis + + ". Deleting the node now."; + LOG.debug(msg); + ZKUtil.deleteNode(this.watcher, + getDeleteRegionNodePath(namedRegion)); + } else { + String msg = "Delete-region request is in progress for " + + "region = " + namedRegion; + LOG.debug(msg); + setMonitoredTaskStatus(namedRegion, msg, false, false); + } + } + } catch (final IOException e) { + String msg = "IOException during handleFailedOrExpiredDeleteRegionRequests. " + + e.getCause(); + LOG.error(msg, e); + setMonitoredTaskStatus("master", msg, false, false); + } catch (final KeeperException ke) { + String msg = "KeeperException during handleFailedOrExpiredDeleteRegionRequests. " + + ke.getCause(); + LOG.error(msg, ke); + setMonitoredTaskStatus("master", msg, false, false); + } + } + + /** + * Clean the nodes of completed delete-region. + * @param path + * @throws KeeperException + */ + private void cleanupDeleteRegionNode(String path) + throws KeeperException { + ZKUtil.deleteNode(this.watcher, path); + LOG.debug("Deleted a node for path " + path); + } + + private int getZKNodeVersion(String nodePath) throws KeeperException { + return ZKUtil.checkExists(this.watcher, nodePath); + } + + /** + * Create a new delete-region ZK node. + * @param regionName region name that is getting deleted + * @throws KeeperException + */ + public void createDeleteRegionNode(String regionName) + throws KeeperException, IOException { + String msg = "Creating delete-region node for region = " + regionName; + setMonitoredTaskStatus(regionName, msg, false, false); + LOG.debug(msg + ". Path = " + + getDeleteRegionNodePath(regionName)); + if (doesDeleteRegionNodeExist(regionName)) { + LOG.debug("Delete-region node already exists for region = " + regionName + + " Deleting the delete-region node."); + while(doesDeleteRegionNodeExist(regionName)) { + try { + Thread.sleep(50); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + RegionDeletionStatus drs = new RegionDeletionStatus(regionName); + LOG.debug("Master creating the master delete-region status = " + drs); + ZKUtil.createSetData(this.watcher, + getDeleteRegionNodePath(regionName), Writables.getBytes(drs)); + setMonitoredTaskStatus(regionName, "Created the ZK node for delete region." + + " Current delete region status = " + drs.toString(), true, false); + ZKUtil.watchAndCheckExists(this.watcher, + getDeleteRegionNodePath(regionName)); + } + + /** + * Check whether encodedRegionName node exist under + * /delete-region + * @param regionName + * @throws KeeperException + */ + public boolean doesDeleteRegionNodeExist(String regionName) + throws KeeperException { + return ZKUtil.checkExists(watcher, + getDeleteRegionNodePath(regionName)) != -1; + } + + /** + * We get notified as and when the RS cloud updates their ZK nodes with + * progress information. The path will be of the format + * /delete-region/ + * @param path + */ + @Override + public void nodeDataChanged(String path) { + String regionName = null; + if (path.startsWith(watcher.deleteRegionZNode) && + !path.equals(watcher.deleteRegionZNode)) { + try { + LOG.debug("NodeDataChanged Path = " + path); + String[] paths = path.split("/"); + regionName = paths[3]; + processRegionNode(regionName); + } catch (final KeeperException e) { + setMonitoredTaskStatus(regionName, + "MasterDeleteRegionTracker: ZK exception while processing " + + " nodeDataChanged() event for region = " + regionName + + ". Cause = " + e.getCause(), false, false); + LOG.error("MasterDeleteRegionTracker: Unexpected zk exception getting" + + " delete-region change nodes", e); + } catch(final IOException ioe) { + setMonitoredTaskStatus(regionName, + "MasterDeleteRegionTracker: IOException while processing " + + "nodeDataChanged() event for region = " + regionName + + ". Cause = " + ioe.getCause(), false, false); + LOG.error("MasterDeleteRegionTracker: Unexpected IOException getting" + + " delete-region change nodes", ioe); + } + } + } + + public String getDeleteRegionNodePath(String regionName) { + return ZKUtil.joinZNode(watcher.deleteRegionZNode, regionName); + } + + public void setSleepTimeMillis(int sleepTimeMillis) { + this.sleepTimeMillis = sleepTimeMillis; + } + + /** + * Holds the current state for a region to be deleted. DeleteRegionState + * includes the current delete status (INPROCESS, FAILURE or SUCCESS), + * timestamp of delete request. + * + * Master keeps track of delete regions requests using the delete-region status + * and periodically updates the region-delete status based on RS processing. + */ + public static class RegionDeletionStatus extends VersionedWritable + implements Writable { + + public enum RegionDeletionState { + INPROCESS, // delete-region request is in process + SUCCESS, // completed delete-region request + DEST_REGION_REMOVED_FROM_META, // current region is removed from META + DEST_REGION_REMOVE_FROM_META_FAILED,// failed to remove destination region from .META. + ADJ_REGION_REMOVED_FROM_META, // adjacent region is removed from META + ADJ_REGION_REMOVE_FROM_META_FAILED, // adjacent region remove request from META is failed + NEW_REGION_ADDED_IN_META, // new region is added in META + NEW_REGION_ADD_IN_META_FAILED, // new region add request is failed in META + DEST_REGION_DELETION_FROM_FS, // dest region is deleted from file-system + DEST_REGION_DELETION_FROM_FS_FAILED, // dest region delete request from file-system is failed + ADJ_REGION_DELETION_FROM_FS, // adjacent region is deleted from file-system + ADJ_REGION_DELETION_FROM_FS_FAILED, // adjacent region delete request from file-system is failed + FAILURE // failed request, master will do clean-up after the request + // exceeds the deleteRegionTimeoutMillis value + } + + private static final byte VERSION = 0; + private RegionDeletionState regionDeletionStatus; + private long timestamp; + private String errorCause = ""; + private String regionName = ""; + + public RegionDeletionStatus(String regionName) { + this.timestamp = System.currentTimeMillis(); + this.regionDeletionStatus = RegionDeletionState.INPROCESS; + this.regionName = regionName; + } + + public RegionDeletionState getRegionDeletionStatus() { + return regionDeletionStatus; + } + + public void setRegionDeletionStatus(RegionDeletionState deleteRegionState) { + this.regionDeletionStatus = deleteRegionState; + } + + public long getTimestamp() { + return timestamp; + } + + public void setTimestamp(long stamp) { + this.timestamp = stamp; + } + + public String getErrorCause() { + return errorCause; + } + + public void setErrorCause(String errorCause) { + if (errorCause == null) { + return; + } + this.errorCause = errorCause; + } + + /** @return the object version number */ + @Override + public byte getVersion(){ + return VERSION; + } + + @Override + public void readFields(DataInput in) throws IOException { + byte version = in.readByte(); + if (version > VERSION) { + throw new IOException("Version mismatch; " + version); + } + regionDeletionStatus = RegionDeletionState.valueOf(in.readUTF()); + timestamp = in.readLong(); + regionName = Bytes.toString(Bytes.readByteArray(in)); + errorCause = Bytes.toString(Bytes.readByteArray(in)); + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeUTF(regionDeletionStatus.name()); + out.writeLong(timestamp); + Bytes.writeByteArray(out, Bytes.toBytes(regionName)); + Bytes.writeByteArray(out, Bytes.toBytes(errorCause)); + } + + @Override + public String toString() { + return + "regionName= " + regionName + + ", status= " + regionDeletionStatus + + ", ts= " + timestamp + + ", errorCause = " + errorCause; + } + } +} Index: src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerDeleteRegionTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerDeleteRegionTracker.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerDeleteRegionTracker.java (revision 0) @@ -0,0 +1,402 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.zookeeper; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RegionServerServices; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.VersionedWritable; +import org.apache.hadoop.hbase.util.Writables; +import org.apache.zookeeper.KeeperException; + +import org.apache.hadoop.io.Writable; + +import java.io.*; +import java.util.List; + +/** + * Region server delete-region tracker. RS uses this tracker to keep track of + * delete-region requests from master and updates the status once + * the delete-region is complete. + */ +public class RegionServerDeleteRegionTracker extends ZooKeeperNodeTracker { + + public static final Log LOG = + LogFactory.getLog(RegionServerDeleteRegionTracker.class); + private RegionServerServices regionServer = null; + private volatile int sleepTimeMillis = 0; + + /** + * Constructs a new ZK node tracker. + *

+ *

After construction, use {@link #start} to kick off tracking. + * + * @param watcher + * @param node + * @param abortable + */ + public RegionServerDeleteRegionTracker(ZooKeeperWatcher watcher, + Abortable abortable, + RegionServerServices regionServer) { + super(watcher, watcher.deleteRegionZNode, abortable); + this.regionServer = regionServer; + } + + @Override + public void start() { + try { + watcher.registerListener(this); + ZKUtil.listChildrenAndWatchThem(watcher, node); + } catch (final KeeperException e) { + LOG.error("RegionServerDeleteRegionTracker startup failed with " + + "KeeperException.", e); + } + } + + /** + * Check whether there are any delete-region requests that are in progress now + * for the given region. We simply assume that a delete-region is in progress + * if we see a ZK delete-region node. + * @return true if it is progress otherwise false + */ + public boolean isDeleteRegionInProgress(String regionName) { + try { + List regionList = ZKUtil.listChildrenAndWatchThem(this.watcher, + watcher.deleteRegionZNode); + if (regionList != null) { + for (String region : regionList) { + if (region.equals(regionName)) { + return true; + } + } + return false; + } + } catch (final KeeperException ke) { + LOG.debug("isDeleteRegionInProgress. " + + "KeeperException while getting current delete-region progress."); + return false; + } + return false; + } + + + /** + * This event will be triggered whenever new delete-region request + * is processed by the master. The path will be of the format + * /delete-region/region-name + * @param path full path of the node whose children have changed + */ + @Override + public void nodeChildrenChanged(String path) { + LOG.debug("NodeChildrenChanged. Path = " + path); + if (path.equals(watcher.deleteRegionZNode)) { + try { + List regionsList = + ZKUtil.listChildrenAndWatchThem(watcher, watcher.deleteRegionZNode); + LOG.debug("RegionServerDeleteRegionTracker: " + + "Current list of regions with delete-region request= " + regionsList); + if (regionsList != null && regionsList.size() > 0) { + handleDeleteRegion(regionsList); + } else { + LOG.error("No region found for delete-region event." + + " Skipping delete-region"); + } + } catch (final KeeperException ke) { + String errmsg = "KeeperException while handling nodeChildrenChanged for path = " + + path + " Cause = " + ke.getCause(); + LOG.error(errmsg, ke); + TaskMonitor.get().createStatus(errmsg); + } + } + } + + private void handleDeleteRegion(List regionsList) { + for (String regionName : regionsList) { + if (regionName != null) { + LOG.debug("Processing delete-region with status for region = " + + regionName); + handleDeleteRegion(regionName); + } + } + } + + private void reportAndLogDeleteRegionError(String regionName, + Throwable exception, MonitoredTask status) { + try { + String errmsg = "Region Server " + + regionServer.getServerName().getServerName() + + " failed during delete-region process. Cause = " + + exception.getCause(); + RegionDeletionStatus drs = getDeleteRegionStatus(regionName); + drs.update(RegionDeletionStatus.RegionDeletionState.FAILURE, errmsg); + String nodePath = getDeleteRegionNodePath(regionName); + ZKUtil + .updateExistingNodeData(this.watcher, nodePath, + Writables.getBytes(drs), getZKNodeVersion(nodePath)); + LOG.debug("reportAndLogDeleteRegionError() " + + " Updated child ZKNode with DeleteRegionStatus = " + + drs + " for region = " + regionName); + if (status == null) { + status = TaskMonitor.get().createStatus(errmsg); + } else { + status.setStatus(errmsg); + } + } catch (final KeeperException e) { + String errmsg = "KeeperException while updating the delete-region node with " + + "error status for region=" + regionName + ", server=" + + regionServer.getServerName().getServerName() + + ", Cause=" + e.getCause(); + LOG.error(errmsg, e); + TaskMonitor.get().createStatus(errmsg); + } catch (final IOException ioe) { + String errmsg = "IOException while updating the delete-region node with " + + "server name for region=" + regionName + ", server=" + + regionServer.getServerName().getServerName() + + ", Cause=" + ioe.getCause(); + TaskMonitor.get().createStatus(errmsg); + LOG.error(errmsg, ioe); + } + } + + private void handleDeleteRegion(final String regionName) { + MonitoredTask status = null; + try { + if (regionServer.getFromOnlineRegions(regionName) == null) { + LOG.debug("Region " + regionName + " does not exist"); + return; + } + status = TaskMonitor.get().createStatus("Region server " + + regionServer.getServerName().getServerName() + + " handling delete region for region = " + regionName); + regionServer.deleteRegion(regionName); + RegionDeletionStatus drs = getDeleteRegionStatus(regionName); + drs.update(RegionDeletionStatus.RegionDeletionState.SUCCESS); + updateDeleteRegionStatus(regionName, drs); + String msg = "Delete-region request completed for region = " + + regionName + " on server = " + + regionServer.getServerName().getServerName(); + LOG.info(msg); + status.setStatus(msg); + } catch (final IOException ioe) { + reportAndLogDeleteRegionError(regionName, ioe, status); + } catch (final KeeperException ke) { + reportAndLogDeleteRegionError(regionName, ke, status); + } + } + + private int getZKNodeVersion(String nodePath) throws KeeperException { + return ZKUtil.checkExists(this.watcher, nodePath); + } + + /** + * Get the data from /delete-region/ + * znode and build RegionDeletionStatus object + * @param regionName + * @return RegionDeletionStatus + * @throws KeeperException + * @throws IOException + */ + public RegionDeletionStatus getDeleteRegionStatus(String regionName) + throws KeeperException, IOException { + byte[] statusBytes = ZKUtil.getData(this.watcher, + getDeleteRegionNodePath(regionName.trim())); + if (statusBytes == null || statusBytes.length <= 0) { + LOG.info("Could not find the znode for region " + regionName); + return null; + } + RegionDeletionStatus drs = new RegionDeletionStatus(); + Writables.getWritable(statusBytes, drs); + return drs; + } + + public void updateDeleteRegionStatus(String regionName, + RegionDeletionStatus deleteRegionStatus) + throws KeeperException, IOException { + try { + if(sleepTimeMillis > 0) { + try { + LOG.debug("RegionServerDeleteRegionTracker sleeping for " + + sleepTimeMillis); + Thread.sleep(sleepTimeMillis); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + ZKUtil.updateExistingNodeData(this.watcher, + getDeleteRegionNodePath(regionName), + Writables.getBytes(deleteRegionStatus), -1); + String msg = "RegionServerDeleteRegionTracker completed for region=" + + regionName + ", status=" + deleteRegionStatus; + LOG.debug(msg); + TaskMonitor.get().createStatus(msg); + } catch (final KeeperException.NoNodeException e) { + String errmsg = "KeeperException.NoNodeException while updating the" + + " delete-region node with server name for region=" + regionName + + ", server=" + regionServer.getServerName().getServerName() + + ", Cause=" + e.getCause(); + TaskMonitor.get().createStatus(errmsg); + LOG.error(errmsg, e); + } catch (final KeeperException e) { + String errmsg = "KeeperException while updating the delete-region node" + + " with server name for region=" + regionName + ", server=" + + regionServer.getServerName().getServerName() + + ", Cause=" + e.getCause(); + LOG.error(errmsg, e); + TaskMonitor.get().createStatus(errmsg); + } catch (final IOException ioe) { + String errmsg = "IOException while updating the delete-region node with " + + "server name for region=" + regionName + ", server=" + + regionServer.getServerName().getServerName() + + ", Cause=" + ioe.getCause(); + LOG.error(errmsg, ioe); + TaskMonitor.get().createStatus(errmsg); + } + } + + private String getDeleteRegionNodePath(String regionName) { + return ZKUtil.joinZNode(watcher.deleteRegionZNode, regionName); + } + + public int getSleepTimeMillis() { + return sleepTimeMillis; + } + + /** + * Set a sleep time in millis before this RS can update it's progress status. + * Used only for test cases to test complex test scenarios such as RS failures + * and RS exception handling. + * @param sleepTimeMillis + */ + public void setSleepTimeMillis(int sleepTimeMillis) { + if (sleepTimeMillis > 0) { + this.sleepTimeMillis = sleepTimeMillis; + } + } + + + /** + * Holds the current delete-region state for a region. Delete region state + * includes the current delete state, timestamp of delete request, + * and an errorCause in case if the RS failed during the delete-region process. + */ + public static class RegionDeletionStatus extends VersionedWritable + implements Writable { + + public enum RegionDeletionState { + INPROCESS, // delete-region request is in process + SUCCESS, // completed delete-region request + DEST_REGION_REMOVED_FROM_META, // current region is removed from META + DEST_REGION_REMOVE_FROM_META_FAILED,// failed to remove destination region from .META. + ADJ_REGION_REMOVED_FROM_META, // adjacent region is removed from META + ADJ_REGION_REMOVE_FROM_META_FAILED, // adjacent region remove request from META is failed + NEW_REGION_ADDED_IN_META, // new region is added in META + NEW_REGION_ADD_IN_META_FAILED, // new region add request is failed in META + DEST_REGION_DELETION_FROM_FS, // dest region is deleted from file-system + DEST_REGION_DELETION_FROM_FS_FAILED, // dest region delete request from file-system is failed + ADJ_REGION_DELETION_FROM_FS, // adjacent region is deleted from file-system + ADJ_REGION_DELETION_FROM_FS_FAILED, // adjacent region delete request from file-system is failed + FAILURE // failed request, master will do clean-up after the request + // exceeds the deleteRegionTimeoutMillis value + } + + private static final byte VERSION = 0; + private RegionDeletionState regionDeletionStatus; + private long timestamp; + private String errorCause = ""; + private String regionName= ""; + + public RegionDeletionStatus() { + } + + public RegionDeletionStatus(String regionName) { + this.timestamp = System.currentTimeMillis(); + this.regionDeletionStatus = RegionDeletionState.INPROCESS; + this.regionName = regionName; + } + + public RegionDeletionState getRegionDeletionStatus() { + return regionDeletionStatus; + } + + public void setRegionDeletionStatus(RegionDeletionState drs) { + this.regionDeletionStatus = drs; + } + + public String getErrorCause() { + return errorCause; + } + + public void setErrorCause(String errorCause) { + if (errorCause == null) { + return; + } + this.errorCause = errorCause; + } + + public void update(RegionDeletionState state, String errorCause) { + this.regionDeletionStatus = state; + this.errorCause = errorCause; + } + + public void update(RegionDeletionState status) { + this.regionDeletionStatus = status; + } + + /** @return the object version number */ + @Override + public byte getVersion(){ + return VERSION; + } + + @Override + public void readFields(DataInput in) throws IOException { + byte version = in.readByte(); + if (version > VERSION) { + throw new IOException("Version mismatch; " + version); + } + regionDeletionStatus = RegionDeletionState.valueOf(in.readUTF()); + timestamp = in.readLong(); + regionName = Bytes.toString(Bytes.readByteArray(in)); + errorCause = Bytes.toString(Bytes.readByteArray(in)); + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeUTF(regionDeletionStatus.name()); + out.writeLong(timestamp); + Bytes.writeByteArray(out, Bytes.toBytes(regionName)); + Bytes.writeByteArray(out, Bytes.toBytes(errorCause)); + } + + @Override + public String toString() { + return + " regionName= " + regionName + + " status= " + regionDeletionStatus + + ", ts= " + timestamp + + ", errorCause = " + errorCause; + } + } +} Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (revision 1241350) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (working copy) @@ -102,6 +102,8 @@ public String splitLogZNode; // znode used to record table schema changes public String schemaZNode; + //znode used to record delete region + public String deleteRegionZNode; // Certain ZooKeeper nodes need to be world-readable public static final ArrayList CREATOR_ALL_AND_WORLD_READABLE = @@ -165,6 +167,7 @@ ZKUtil.createAndFailSilent(this, tableZNode); ZKUtil.createAndFailSilent(this, splitLogZNode); ZKUtil.createAndFailSilent(this, schemaZNode); + ZKUtil.createAndFailSilent(this, deleteRegionZNode); } catch (KeeperException e) { throw new ZooKeeperConnectionException( prefix("Unexpected KeeperException creating base node"), e); @@ -216,6 +219,8 @@ conf.get("zookeeper.znode.splitlog", HConstants.SPLIT_LOGDIR_NAME)); schemaZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.schema", "schema")); + deleteRegionZNode = ZKUtil.joinZNode(baseZNode, + conf.get("zookeeper.znode.deleteRegion", "delete-region")); } /** Index: src/main/ruby/hbase/admin.rb =================================================================== --- src/main/ruby/hbase/admin.rb (revision 1236386) +++ src/main/ruby/hbase/admin.rb (working copy) @@ -62,6 +62,12 @@ end #---------------------------------------------------------------------------------------------- + # Requests a region delete + def delete_region(region_name) + @admin.deleteRegion(region_name) + end + + #---------------------------------------------------------------------------------------------- # Requests a regionserver's HLog roll def hlog_roll(server_name) @admin.rollHLogWriter(server_name) Index: src/main/ruby/shell.rb =================================================================== --- src/main/ruby/shell.rb (revision 1236386) +++ src/main/ruby/shell.rb (working copy) @@ -270,6 +270,7 @@ unassign zk_dump hlog_roll + delete_region ] ) Index: src/main/ruby/shell/commands/delete_region.rb =================================================================== --- src/main/ruby/shell/commands/delete_region.rb (revision 0) +++ src/main/ruby/shell/commands/delete_region.rb (revision 0) @@ -0,0 +1,37 @@ +# +# Copyright 2010 The Apache Software Foundation +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class Compact < Command + def help + return <<-EOF +Delete the named region +EOF + end + + def command(region_name) + format_simple_command do + admin.delete_region(region_name) + end + end + end + end +end Index: src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 1236386) +++ src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -1245,12 +1245,39 @@ return null; } LOG.debug("Found " + metaRows.size() + " rows for table " + - Bytes.toString(tableName)); - byte [] firstrow = metaRows.get(0); - LOG.debug("FirstRow=" + Bytes.toString(firstrow)); - int index = hbaseCluster.getServerWith(firstrow); + Bytes.toString(tableName)); + return getRSForRegionInTable(metaRows, 0); + } + + public HRegionServer getRSForSecondRegionInTable(byte[] tableName) + throws IOException { + List metaRows = getMetaTableRows(tableName); + if (metaRows == null || metaRows.size() < 2) { + return null; + } + LOG.debug("Found " + metaRows.size() + " rows for table " + + Bytes.toString(tableName)); + return getRSForRegionInTable(metaRows, 1); + } + + public HRegionServer getRSForLastRegionInTable(byte[] tableName) + throws IOException { + List metaRows = getMetaTableRows(tableName); + if (metaRows == null || metaRows.size() < 2) { + return null; + } + LOG.debug("Found " + metaRows.size() + " rows for table " + + Bytes.toString(tableName)); + return getRSForRegionInTable(metaRows, metaRows.size()-1); + } + + public HRegionServer getRSForRegionInTable(List metaRows, int regionIndex){ + byte [] row = metaRows.get(regionIndex); + int index = hbaseCluster.getServerWith(row); return hbaseCluster.getRegionServerThreads().get(index).getRegionServer(); } + + /** * Starts a MiniMRCluster with a default number of Index: src/test/java/org/apache/hadoop/hbase/client/TestDeleteRegion.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/client/TestDeleteRegion.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/client/TestDeleteRegion.java (revision 0) @@ -0,0 +1,364 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.MasterDeleteRegionTracker; +import org.apache.zookeeper.KeeperException; +import org.junit.*; +import org.junit.experimental.categories.Category; + +/** + * Class to test delete-region using HBaseAdmin. + * Spins up the minicluster once at test start and then takes it down afterward. + */ +@Category(LargeTests.class) +public class TestDeleteRegion { + + final Log LOG = LogFactory.getLog(getClass()); + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private HBaseAdmin admin; + private static MiniHBaseCluster miniHBaseCluster = null; + private static MasterDeleteRegionTracker mdrt = null; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100); + TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250); + TEST_UTIL.getConfiguration().setInt("hbase.client.retries.number", 6); + TEST_UTIL.getConfiguration().setBoolean("hbase.delete.region.enabled", true); + miniHBaseCluster = TEST_UTIL.startMiniCluster(2,5); + mdrt = TEST_UTIL.getHBaseCluster().getMaster().getDeleteRegionTracker(); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setUp() throws Exception { + this.admin = TEST_UTIL.getHBaseAdmin(); + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void testDeleteFirstRegion() throws IOException, + KeeperException, InterruptedException { + + String tableName = "TestHBADeleteRegionFirst"; + createTable(tableName); + String regionName = getRegionName(tableName, 0); + admin.deleteRegion(regionName); + waitForDeleteRegionProcess(regionName); + assertFalse(mdrt.doesDeleteRegionNodeExist(regionName)); + } + + @Test + public void testDeleteMiddleRegion() throws IOException, + KeeperException, InterruptedException { + String tableName = "TestHBADeleteRegionMiddle"; + createTable(tableName); + String regionName = getRegionName(tableName, 1); + admin.deleteRegion(regionName); + waitForDeleteRegionProcess(regionName); + assertFalse(mdrt.doesDeleteRegionNodeExist(regionName)); + } + + @Test + public void testDeleteLastRegion() throws IOException, + KeeperException, InterruptedException { + String tableName = "TestHBADeleteRegionLast"; + createTable(tableName); + String regionName = getRegionName(tableName, 2); + admin.deleteRegion(regionName); + waitForDeleteRegionProcess(regionName); + assertFalse(mdrt.doesDeleteRegionNodeExist(regionName)); + } + + private String getRegionName(String tableName, int regionIndex){ + String regionName = ""; + HRegionServer rs = null; + try { + if (regionIndex == 0) { + rs = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName)); + } else if (regionIndex == 1) { + rs = TEST_UTIL.getRSForSecondRegionInTable(Bytes.toBytes(tableName)); + } else { + rs = TEST_UTIL.getRSForLastRegionInTable(Bytes.toBytes(tableName)); + } + + List onlineRegions = rs.getOnlineRegions(); + for (HRegionInfo regionInfo : onlineRegions) { + if (!regionInfo.isMetaTable()) { + if (regionInfo.getRegionNameAsString().contains(tableName)) { + regionName = regionInfo.getEncodedName(); + break; + } + } + } + } catch (Exception e) { + LOG.error(e.getMessage(), e); + } + return regionName; + } + + private void createTable(String name) throws IOException{ + byte[] TABLENAME = Bytes.toBytes(name); + HTableDescriptor htd = new HTableDescriptor(TABLENAME); + HColumnDescriptor hcd = new HColumnDescriptor("value"); + htd.addFamily(hcd); + admin.createTable(htd, "00000000".getBytes(), "zzzzzzzz".getBytes(), 10); + } + + private void waitForDeleteRegionProcess(final String regionName) + throws KeeperException, InterruptedException { + waitForDeleteRegionProcess(regionName, 90000); + } + + private void waitForDeleteRegionProcess(final String regionName, + final long waitTimeMills) throws KeeperException, InterruptedException { + LOG.info("Waiting for ZK node creation for region = " + regionName); + final MasterDeleteRegionTracker mdrt = + TEST_UTIL.getHBaseCluster().getMaster().getDeleteRegionTracker(); + + final Runnable r = new Runnable() { + public void run() { + try { + while(!mdrt.doesDeleteRegionNodeExist(regionName)) { + try { + Thread.sleep(50); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } catch (KeeperException ke) { + LOG.error(ke.getMessage(), ke); + ke.printStackTrace(); + } + LOG.info("Waiting for ZK node deletion for region = " + regionName); + try { + while(mdrt.doesDeleteRegionNodeExist(regionName)) { + try { + Thread.sleep(50); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } catch (KeeperException ke) { + LOG.error(ke.getMessage(), ke); + ke.printStackTrace(); + } + } + }; + Thread t = new Thread(r); + t.start(); + if (waitTimeMills > 0) { + t.join(waitTimeMills); + } else { + t.join(10000); + } + } + + /** + * The delete-region request blocks while a LB run is in progress. This + * test validates this behavior. + * @throws IOException + * @throws InterruptedException + * @throws KeeperException + */ + @Test + public void testConcurrentDeleteRegionAndLoadBalancerRun() throws IOException, + InterruptedException, KeeperException { + + final String tableName = "testDeleteRegionWithLoadBalancerRunning"; + createTable(tableName); + LOG.info("Start testConcurrentDeleteRegionAndLoadBalancerRun()"); + final MasterDeleteRegionTracker msct = + TEST_UTIL.getHBaseCluster().getMaster().getDeleteRegionTracker(); + + Runnable balancer = new Runnable() { + public void run() { + // run the balancer now. + miniHBaseCluster.getMaster().balance(); + } + }; + + Runnable deleteRegion = new Runnable() { + public void run() { + try { + admin.deleteRegion(getRegionName(tableName, 0)); + } catch (IOException e) { + + } + } + }; + + balancer.run(); + deleteRegion.run(); + waitForDeleteRegionProcess(getRegionName(tableName, 0), 40000); + assertFalse(msct.doesDeleteRegionNodeExist(getRegionName(tableName, 0))); + + LOG.info("End testConcurrentDeleteRegionAndLoadBalancerRun() "); + } + + + /** + * This test validates two things. One is that the LoadBalancer does not run + * when a delete-region process is in progress. The second thing is that + * it also checks that failed/expired delete-region(s) are expired + * to unblock the load balancer run. + * @throws IOException + * @throws InterruptedException + * @throws KeeperException + */ + @Test (timeout=70000) + public void testLoadBalancerBlocksDuringDeleteRegionRequests() + throws KeeperException, IOException, InterruptedException { + LOG.info("Start testLoadBalancerBlocksDuringDeleteRegionRequests() "); + final MasterDeleteRegionTracker mdrt = + TEST_UTIL.getHBaseCluster().getMaster().getDeleteRegionTracker(); + // Test that the load balancer does not run while an in-flight delete-region + // operation is in progress.Simulate a new delete-region request. + mdrt.createDeleteRegionNode("testLoadBalancerBlocks"); + // The delete-region node is created. + assertTrue(mdrt.doesDeleteRegionNodeExist("testLoadBalancerBlocks")); + // Now, request an explicit LB run. + + Runnable balancer1 = new Runnable() { + public void run() { + // run the balancer now. + miniHBaseCluster.getMaster().balance(); + } + }; + balancer1.run(); + + // Load balancer should not run now. + assertTrue(miniHBaseCluster.getMaster().isLoadBalancerRunning() == false); + LOG.info("End testLoadBalancerBlocksDuringDeleteRegionRequests() "); + } + + /** + * Test that delete-region blocks while LB is running. + * @throws KeeperException + * @throws IOException + * @throws InterruptedException + */ + @Test (timeout=10000) + public void testDeleteRegionBlocksDuringLoadBalancerRun() + throws KeeperException, IOException, InterruptedException { + final MasterDeleteRegionTracker mdrt = + TEST_UTIL.getHBaseCluster().getMaster().getDeleteRegionTracker(); + final String tableName = "testDeleteRegionBlocksDuringLoadBalancerRun"; + createTable(tableName); + // Test that the delete-region request does not run while an in-flight + // LB run is in progress. First, request an explicit LB run. + Runnable balancer1 = new Runnable() { + public void run() { + // run the balancer now. + miniHBaseCluster.getMaster().balance(); + } + }; + + Runnable deleteRegion = new Runnable() { + public void run() { + try { + admin.deleteRegion(getRegionName(tableName, 0)); + } catch (IOException e) { + + } + } + }; + + Thread t1 = new Thread(balancer1); + Thread t2 = new Thread(deleteRegion); + t1.start(); + t2.start(); + + // check that they both happen concurrently + Runnable balancerCheck = new Runnable() { + public void run() { + // check whether balancer is running. + while((miniHBaseCluster != null) && + (miniHBaseCluster.getMaster() != null) && + (!miniHBaseCluster.getMaster().isLoadBalancerRunning())) { + try { + Thread.sleep(10); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + try { + assertFalse( + mdrt.doesDeleteRegionNodeExist(getRegionName(tableName, 0))); + } catch (KeeperException ke) { + LOG.error(ke.getMessage(), ke); + ke.printStackTrace(); + } + LOG.debug("Load Balancer is now running or skipped"); + while(miniHBaseCluster.getMaster().isLoadBalancerRunning()) { + try { + Thread.sleep(10); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + assertTrue( + miniHBaseCluster.getMaster().isLoadBalancerRunning() == false); + try { + assertTrue( + mdrt.doesDeleteRegionNodeExist(getRegionName(tableName, 0))); + } catch (KeeperException ke) { + LOG.error(ke.getMessage(), ke); + } + + } + }; + + Thread t = new Thread(balancerCheck); + t.start(); + t.join(1000); + LOG.info("End testDeleteRegionBlocksDuringLoadBalancerRun()"); + } + + @org.junit.Rule + public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = + new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); +} Index: src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (revision 1236386) +++ src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (working copy) @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.hbase.zookeeper.MasterDeleteRegionTracker; import org.apache.hadoop.hbase.zookeeper.MasterSchemaChangeTracker; import org.apache.hadoop.hbase.zookeeper.RegionServerTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -260,6 +261,11 @@ public RegionServerTracker getRegionServerTracker() { return null; } + + @Override + public MasterDeleteRegionTracker getDeleteRegionTracker() { + return null; + } } @Test Index: src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (revision 1236386) +++ src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (working copy) @@ -147,5 +147,10 @@ public boolean isAborted() { return false; } + + @Override + public void deleteRegion(String regionName) throws IOException, KeeperException { + //no-op + } }