Index: hbase-protocol/src/main/protobuf/hbase.proto =================================================================== --- hbase-protocol/src/main/protobuf/hbase.proto (revision 1446711) +++ hbase-protocol/src/main/protobuf/hbase.proto (working copy) @@ -267,15 +267,15 @@ * Description of the snapshot to take */ message SnapshotDescription { - required string name = 1; - optional string table = 2; // not needed for delete, but checked for in taking snapshot - optional int64 creationTime = 3 [default = 0]; - enum Type { - DISABLED = 0; - FLUSH = 1; - } - optional Type type = 4 [default = FLUSH]; - optional int32 version = 5; + required string name = 1; + optional string table = 2; // not needed for delete, but checked for in taking snapshot + optional int64 creationTime = 3 [default = 0]; + enum Type { + DISABLED = 0; + FLUSH = 1; + } + optional Type type = 4 [default = FLUSH]; + optional int32 version = 5; } message EmptyMsg { Index: hbase-protocol/src/main/protobuf/ErrorHandling.proto =================================================================== --- hbase-protocol/src/main/protobuf/ErrorHandling.proto (revision 1446711) +++ hbase-protocol/src/main/protobuf/ErrorHandling.proto (working copy) @@ -16,7 +16,7 @@ * limitations under the License. */ -// This file contains protocol buffers that used to error handling +// This file contains protocol buffers that are used for error handling option java_package = "org.apache.hadoop.hbase.protobuf.generated"; option java_outer_classname = "ErrorHandlingProtos"; Index: hbase-protocol/src/main/protobuf/MasterAdmin.proto =================================================================== --- hbase-protocol/src/main/protobuf/MasterAdmin.proto (revision 1446711) +++ hbase-protocol/src/main/protobuf/MasterAdmin.proto (working copy) @@ -330,8 +330,8 @@ rpc execMasterService(CoprocessorServiceRequest) returns(CoprocessorServiceResponse); - /** - * Create a snapshot for the given table. + /** + * Create a snapshot for the given table. * @param snapshot description of the snapshot to take */ rpc snapshot(TakeSnapshotRequest) returns(TakeSnapshotResponse); @@ -343,7 +343,7 @@ rpc listSnapshots(ListSnapshotRequest) returns(ListSnapshotResponse); /** - * Delete an existing snapshot. This method can also be used to clean up a aborted snapshot. + * Delete an existing snapshot. This method can also be used to clean up an aborted snapshot. * @param snapshotName snapshot to delete */ rpc deleteSnapshot(DeleteSnapshotRequest) returns(DeleteSnapshotResponse); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (working copy) @@ -44,7 +44,8 @@ public class ForeignExceptionDispatcher implements ForeignExceptionListener, ForeignExceptionSnare { public static final Log LOG = LogFactory.getLog(ForeignExceptionDispatcher.class); protected final String name; - protected final List listeners = new ArrayList(); + protected final List listeners = + new ArrayList(); private ForeignException exception; public ForeignExceptionDispatcher(String name) { @@ -69,7 +70,7 @@ if (e != null) { exception = e; } else { - exception = new ForeignException(name, e); + exception = new ForeignException(name, ""); } // notify all the listeners @@ -77,16 +78,16 @@ } @Override - public void rethrowException() throws ForeignException { + public synchronized void rethrowException() throws ForeignException { if (exception != null) { // This gets the stack where this is caused, (instead of where it was deserialized). - // This which is much more useful for debugging + // This is much more useful for debugging throw new ForeignException(exception.getSource(), exception.getCause()); } } @Override - public boolean hasException() { + public synchronized boolean hasException() { return exception != null; } @@ -102,7 +103,6 @@ */ private void dispatch(ForeignException e) { // update all the listeners with the passed error - LOG.debug(name + " Recieved error, notifying listeners..."); for (ForeignExceptionListener l: listeners) { l.receive(e); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java (working copy) @@ -21,7 +21,7 @@ import org.apache.hadoop.classification.InterfaceStability; /** - * Exception for a timeout of a task. + * Exception for timeout of a task. * @see TimeoutExceptionInjector */ @InterfaceAudience.Public Index: hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java (working copy) @@ -52,20 +52,6 @@ private final String source; /** - * Create a new ForeignException that can be serialized. It is assumed that this came from a - * remote source. - * @param source - * @param cause - */ - private ForeignException(String source, String clazz, ProxyThrowable cause) { - super(cause); - assert source != null; - assert cause != null; - assert clazz != null; - this.source = source; - } - - /** * Create a new ForeignException that can be serialized. It is assumed that this came form a * local source. * @param source @@ -114,7 +100,7 @@ /** * Convert a stack trace to list of {@link StackTraceElement}. - * @param stackTrace the stack trace to convert to protobuf message + * @param trace the stack trace to convert to protobuf message * @return null if the passed stack is null. */ private static List toStackTraceElementMessages( @@ -146,10 +132,10 @@ } /** - * Converts an ForeignException to a array of bytes. + * Converts a ForeignException to an array of bytes. * @param source the name of the external exception source * @param t the "local" external exception (local) - * @return protobuf serialized version of ForeignThreadException + * @return protobuf serialized version of ForeignException */ public static byte[] serialize(String source, Throwable t) { GenericExceptionMessage.Builder gemBuilder = GenericExceptionMessage.newBuilder(); @@ -158,7 +144,8 @@ gemBuilder.setMessage(t.getMessage()); } // set the stack trace, if there is one - List stack = ForeignException.toStackTraceElementMessages(t.getStackTrace()); + List stack = + ForeignException.toStackTraceElementMessages(t.getStackTrace()); if (stack != null) { gemBuilder.addAllTrace(stack); } @@ -172,16 +159,16 @@ /** * Takes a series of bytes and tries to generate an ForeignException instance for it. * @param bytes - * @return the ExternalExcpetion instance + * @return the ForeignExcpetion instance * @throws InvalidProtocolBufferException if there was deserialization problem this is thrown. */ public static ForeignException deserialize(byte[] bytes) throws InvalidProtocolBufferException { // figure out the data we need to pass ForeignExceptionMessage eem = ForeignExceptionMessage.parseFrom(bytes); GenericExceptionMessage gem = eem.getGenericException(); - StackTraceElement [] trace = ForeignException.toStack(gem.getTraceList()); + StackTraceElement [] trace = ForeignException.toStackTrace(gem.getTraceList()); ProxyThrowable dfe = new ProxyThrowable(gem.getMessage(), trace); - ForeignException e = new ForeignException(eem.getSource(), gem.getClassName(), dfe); + ForeignException e = new ForeignException(eem.getSource(), dfe); return e; } @@ -192,7 +179,7 @@ * @return the deserialized list or null if it couldn't be unwound (e.g. wasn't set on * the sender). */ - private static StackTraceElement[] toStack(List traceList) { + private static StackTraceElement[] toStackTrace(List traceList) { if (traceList == null || traceList.size() == 0) { return new StackTraceElement[0]; // empty array } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (working copy) @@ -54,8 +54,8 @@ private final SubprocedureFactory builder; private final ProcedureMemberRpcs rpcs; -// private final WeakValueMapping subprocs = new WeakValueMapping(); - private final ConcurrentMap subprocs = new MapMaker().concurrencyLevel(4).weakValues().makeMap(); + private final ConcurrentMap subprocs = + new MapMaker().concurrencyLevel(4).weakValues().makeMap(); private final ExecutorService pool; /** @@ -167,7 +167,8 @@ public void receivedReachedGlobalBarrier(String procName) { Subprocedure subproc = subprocs.get(procName); if (subproc == null) { - LOG.warn("Unexpected reached glabal barrier message for Procedure '" + procName + "'"); + LOG.warn("Unexpected reached glabal barrier message for Sub-Procedure '" + procName + "'"); + return; } subproc.receiveReachedGlobalBarrier(); } @@ -187,7 +188,7 @@ * @return true if successfully, false if bailed due to timeout. * @throws InterruptedException */ - public boolean closeAndWait(long timeoutMs) throws InterruptedException { + boolean closeAndWait(long timeoutMs) throws InterruptedException { pool.shutdown(); return pool.awaitTermination(timeoutMs, TimeUnit.MILLISECONDS); } @@ -204,9 +205,9 @@ */ public void controllerConnectionFailure(final String message, final IOException cause) { Collection toNotify = subprocs.values(); + LOG.error(message, cause); for (Subprocedure sub : toNotify) { // TODO notify the elements, if they aren't null - LOG.error(message, cause); sub.cancel(message, cause); } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Subprocedure.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Subprocedure.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Subprocedure.java (working copy) @@ -181,7 +181,7 @@ insideBarrier(); LOG.debug("Subprocedure '" + barrierName + "' locally completed"); - // Ack that the member has executed and relased local barrier + // Ack that the member has executed and released local barrier rpcs.sendMemberCompleted(this); LOG.debug("Subprocedure '" + barrierName + "' has notified controller of completion"); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java (working copy) @@ -165,8 +165,7 @@ } /** - * Returns a copy of the procedure members still trying to enter the barrier. - * @return + * @return String of the procedure members both trying to enter the barrier and already in barrier */ public String getStatus() { String waiting, done; @@ -176,9 +175,9 @@ } return "Procedure " + procName + " { waiting=" + waiting + " done="+ done + " }"; } - + /** - * Get the ExternalErrorDispatcher + * Get the ForeignExceptionDispatcher * @return the Procedure's monitor. */ public ForeignExceptionDispatcher getErrorMonitor() { @@ -306,7 +305,7 @@ LOG.debug("Waiting on: " + acquiredBarrierLatch + " remaining members to acquire global barrier"); } else { LOG.warn("Member " + member + " joined barrier, but we weren't waiting on it to join." + - " Continuting on."); + " Continuing on."); } } @@ -345,7 +344,7 @@ } /** - * A callback that handles incoming ExternalExceptions. + * A callback that handles incoming ForeignExceptions. */ @Override public void receive(ForeignException e) { @@ -371,7 +370,9 @@ if (monitor != null) { monitor.rethrowException(); } - ForeignExceptionDispatcher.LOG.debug("Waiting for '" + latchDescription + "' latch. (sleep:" + wakeFrequency + " ms)"); + /* + ForeignExceptionDispatcher.LOG.debug("Waiting for '" + latchDescription + "' latch. (sleep:" + + wakeFrequency + " ms)"); */ released = latch.await(wakeFrequency, TimeUnit.MILLISECONDS); } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java (working copy) @@ -66,6 +66,9 @@ * appear, first acquire to relevant listener or sets watch waiting for notification of * the acquire node * + * @param proc the Procedure + * @param info data to be stored in the acquire node + * @param nodeNames children of the acquire phase * @throws IOException if any failure occurs. */ @Override @@ -79,12 +82,10 @@ if (ZKUtil.watchAndCheckExists(zkProc.getWatcher(), abortNode)) { abort(abortNode); } - // If we get an abort node watch triggered here, we'll go complete creating the acquired // znode but then handle the acquire znode and bail out - } catch (KeeperException e) { - LOG.error("Failed to create abort", e); + LOG.error("Failed to watch abort", e); throw new IOException("Failed while watching abort node:" + abortNode, e); } @@ -155,11 +156,12 @@ * Start monitoring znodes in ZK - subclass hook to start monitoring znodes they are about. * @return true if succeed, false if encountered initialization errors. */ - final public boolean start(final ProcedureCoordinator listener) { + final public boolean start(final ProcedureCoordinator coordinator) { if (this.coordinator != null) { - throw new IllegalStateException("ZKProcedureCoordinator already started and already has listener installed"); + throw new IllegalStateException( + "ZKProcedureCoordinator already started and already has listener installed"); } - this.coordinator = listener; + this.coordinator = coordinator; try { this.zkProc = new ZKProcedureUtil(watcher, procedureType, coordName) { @@ -170,15 +172,15 @@ logZKTree(this.baseZNode); if (isAcquiredPathNode(path)) { // node wasn't present when we created the watch so zk event triggers acquire - listener.memberAcquiredBarrier(ZKUtil.getNodeName(ZKUtil.getParent(path)), ZKUtil.getNodeName(path)); - } - if (isReachedPathNode(path)) { - // node wasn't present when we created the watch so zk event triggers the finished barrier. + coordinator.memberAcquiredBarrier(ZKUtil.getNodeName(ZKUtil.getParent(path)), + ZKUtil.getNodeName(path)); + } else if (isReachedPathNode(path)) { + // node was absent when we created the watch so zk event triggers the finished barrier. - // TODO Nothing enforces that acquire and reached znodes from showing up in the wrong order. - listener.memberFinishedBarrier(ZKUtil.getNodeName(ZKUtil.getParent(path)), ZKUtil.getNodeName(path)); - } - if (isAbortPathNode(path)) { + // TODO Nothing enforces that acquire and reached znodes from showing up in wrong order. + coordinator.memberFinishedBarrier(ZKUtil.getNodeName(ZKUtil.getParent(path)), + ZKUtil.getNodeName(path)); + } else if (isAbortPathNode(path)) { abort(path); } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java (working copy) @@ -234,7 +234,7 @@ * @param root name of the root directory in zk to print * @throws KeeperException */ - public void logZKTree(String root) { + void logZKTree(String root) { if (!LOG.isDebugEnabled()) return; LOG.debug("Current zk system:"); String prefix = "|-"; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java (working copy) @@ -112,7 +112,7 @@ } String procName = proc.getName(); - // make sure we aren't already running an procedure of that name + // make sure we aren't already running a procedure of that name synchronized (procedures) { Procedure oldProc = procedures.get(procName); if (oldProc != null) { @@ -129,9 +129,9 @@ // kick off the procedure's execution in a separate thread Future f = null; try { - f = this.pool.submit(proc); - // if everything got started properly, we can add it known running procedures synchronized (procedures) { + f = this.pool.submit(proc); + // if everything got started properly, we can add it known running procedures this.procedures.put(procName, proc); } return true; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (working copy) @@ -62,7 +62,7 @@ private ZKProcedureUtil zkController; /** - * Must call {@link #start(ProcedureMember)} before this is can be used. + * Must call {@link #start(ProcedureMember)} before this can be used. * @param watcher {@link ZooKeeperWatcher} to be owned by this. Closed via * {@link #close()}. * @param procType name of the znode describing the procedure type @@ -120,12 +120,6 @@ return zkController; } - public void start() { - LOG.debug("Starting the procedure member"); - watchForAbortedProcedures(); - waitForNewProcedures(); - } - @Override public String getMemberName() { return memberName; @@ -145,7 +139,8 @@ LOG.debug("Checking for aborted procedures on node: '" + zkController.getAbortZnode() + "'"); try { // this is the list of the currently aborted procedues - for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), zkController.getAbortZnode())) { + for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAbortZnode())) { String abortNode = ZKUtil.joinZNode(zkController.getAbortZnode(), node); abort(abortNode); } @@ -157,11 +152,12 @@ private void waitForNewProcedures() { // watch for new procedues that we need to start subprocedures for - LOG.debug("Looking for new procedures under znode: '" + zkController.getAcquiredBarrier() + "'"); - List runningProcedure = null; + LOG.debug("Looking for new procedures under znode:'" + zkController.getAcquiredBarrier() + "'"); + List runningProcedures = null; try { - runningProcedure = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), zkController.getAcquiredBarrier()); - if (runningProcedure == null) { + runningProcedures = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAcquiredBarrier()); + if (runningProcedures == null) { LOG.debug("No running procedures."); return; } @@ -169,7 +165,11 @@ member.controllerConnectionFailure("General failure when watching for new procedures", new IOException(e)); } - for (String procName : runningProcedure) { + if (runningProcedures == null) { + LOG.debug("No running procedures."); + return; + } + for (String procName : runningProcedures) { // then read in the procedure information String path = ZKUtil.joinZNode(zkController.getAcquiredBarrier(), procName); startNewSubprocedure(path); @@ -177,7 +177,7 @@ } /** - * Kick off a new procedure on the listener with the data stored in the passed znode. + * Kick off a new sub-procedure on the listener with the data stored in the passed znode. *

* Will attempt to create the same procedure multiple times if an procedure znode with the same * name is created. It is left up the coordinator to ensure this doesn't occur. @@ -238,7 +238,8 @@ try { LOG.debug("Member: '" + memberName + "' joining acquired barrier for procedure (" + procName + ") in zk"); - String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode(zkController, procName), memberName); + String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode( + zkController, procName), memberName); ZKUtil.createAndFailSilent(zkController.getWatcher(), acquiredZNode); // watch for the complete node for this snapshot @@ -254,7 +255,7 @@ } /** - * This acts as the ack for a completed + * This acts as the ack for a completed snapshot */ @Override public void sendMemberCompleted(Subprocedure sub) throws IOException { @@ -278,12 +279,12 @@ public void sendMemberAborted(Subprocedure sub, ForeignException ee) { if (sub == null) { LOG.error("Failed due to null subprocedure", ee); + return; } String procName = sub.getName(); LOG.debug("Aborting procedure (" + procName + ") in zk"); String procAbortZNode = zkController.getAbortZNode(procName); try { - LOG.debug("Creating abort znode:" + procAbortZNode); String source = (ee.getSource() == null) ? memberName: ee.getSource(); byte[] errorInfo = ProtobufUtil.prependPBMagic(ForeignException.serialize(source, ee)); ZKUtil.createAndFailSilent(zkController.getWatcher(), procAbortZNode, errorInfo); @@ -316,9 +317,10 @@ LOG.error(msg); // we got a remote exception, but we can't describe it so just return exn from here ee = new ForeignException(getMemberName(), new IllegalArgumentException(msg)); + } else { + data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); + ee = ForeignException.deserialize(data); } - data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); - ee = ForeignException.deserialize(data); } catch (InvalidProtocolBufferException e) { LOG.warn("Got an error notification for op:" + opName + " but we can't read the information. Killing the procedure."); @@ -336,7 +338,8 @@ public void start(ProcedureMember listener) { LOG.debug("Starting procedure member '" + this.memberName + "'"); this.member = listener; - this.start(); + watchForAbortedProcedures(); + waitForNewProcedures(); } @Override Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java (working copy) @@ -362,7 +362,8 @@ * Extract the list of files (HFiles/HLogs) to copy using Map-Reduce. * @return list of files referenced by the snapshot (pair of path and size) */ - private List> getSnapshotFiles(final FileSystem fs, final Path snapshotDir) throws IOException { + private List> getSnapshotFiles(final FileSystem fs, final Path snapshotDir) + throws IOException { SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); final List> files = new ArrayList>(); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (working copy) @@ -95,7 +95,7 @@ */ @InterfaceAudience.Private public class RestoreSnapshotHelper { - private static final Log LOG = LogFactory.getLog(RestoreSnapshotHelper.class); + private static final Log LOG = LogFactory.getLog(RestoreSnapshotHelper.class); private final Map regionsMap = new TreeMap(Bytes.BYTES_COMPARATOR); @@ -290,7 +290,7 @@ } /** - * Restore region by removing files not it in the snapshot + * Restore region by removing files not in the snapshot * and adding the missing ones from the snapshot. */ private void restoreRegion(HRegionInfo regionInfo) throws IOException { Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/UnexpectedSnapshotException.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/UnexpectedSnapshotException.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/UnexpectedSnapshotException.java (working copy) @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.snapshot; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; - -/** - * General exception when an unexpected error occurs while running a snapshot. - */ -@SuppressWarnings("serial") -@InterfaceAudience.Public -@InterfaceStability.Evolving -public class UnexpectedSnapshotException extends HBaseSnapshotException { - - /** - * General exception for some cause - * @param msg reason why the snapshot couldn't be completed - * @param cause root cause of the failure - * @param snapshot description of the snapshot attempted - */ - public UnexpectedSnapshotException(String msg, Exception cause, SnapshotDescription snapshot) { - super(msg, cause, snapshot); - } - -} Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/TablePartiallyOpenException.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/TablePartiallyOpenException.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/TablePartiallyOpenException.java (working copy) @@ -24,7 +24,7 @@ import org.apache.hadoop.hbase.util.Bytes; /** - * Thrown if a table should be online/offline but is partial open + * Thrown if a table should be online/offline but is partially open */ @InterfaceAudience.Public @InterfaceStability.Evolving Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotTask.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotTask.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotTask.java (working copy) @@ -19,6 +19,7 @@ import java.util.concurrent.Callable; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.errorhandling.ForeignException; import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare; import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; @@ -27,6 +28,7 @@ /** * General snapshot operation taken on a regionserver */ +@InterfaceAudience.Private public abstract class SnapshotTask implements ForeignExceptionSnare, Callable{ protected final SnapshotDescription snapshot; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (working copy) @@ -949,7 +949,7 @@ /** * A {@link PathFilter} that returns only regular files. */ - public static class FileFilter implements PathFilter { + static class FileFilter implements PathFilter { private final FileSystem fs; public FileFilter(final FileSystem fs) { @@ -995,21 +995,6 @@ } /** - * Filter out paths that are hidden (start with '.') and are not directories. - */ - public static class VisibleDirectory extends DirFilter { - - public VisibleDirectory(FileSystem fs) { - super(fs); - } - - @Override - public boolean accept(Path file) { - return super.accept(file) && !file.getName().startsWith("."); - } - } - - /** * Heuristic to determine whether is safe or not to open a file for append * Looks both for dfs.support.append and use reflection to search * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush() Index: hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSVisitor.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSVisitor.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSVisitor.java (working copy) @@ -118,7 +118,7 @@ } /** - * Iterate over each region in the table the table and inform about recovered.edits + * Iterate over each region in the table and inform about recovered.edits * * @param fs {@link FileSystem} * @param tableDir {@link Path} to the table directory Index: hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (working copy) @@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.regionserver.HRegion; @@ -187,26 +188,14 @@ public static void deleteRegions(final Configuration conf, final FileSystem fs, final CatalogTracker catalogTracker, final List regions) throws IOException { if (regions != null && regions.size() > 0) { + List deletes = new ArrayList(regions.size()); for (HRegionInfo hri: regions) { - deleteRegion(conf, fs, catalogTracker, hri); + deletes.add(new Delete(hri.getRegionName())); + + // "Delete" region from FS + HFileArchiver.archiveRegion(conf, fs, hri); } + MetaEditor.deleteFromMetaTable(catalogTracker, deletes); } } - - /** - * Remove region from file-system and .META. - * (The region must be offline). - * - * @param fs {@link FileSystem} on which to delete the region directory - * @param catalogTracker the catalog tracker - * @param regionInfo {@link HRegionInfo} to delete. - */ - public static void deleteRegion(final Configuration conf, final FileSystem fs, - final CatalogTracker catalogTracker, final HRegionInfo regionInfo) throws IOException { - // Remove region from .META. - MetaEditor.deleteRegion(catalogTracker, regionInfo); - - // "Delete" region from FS - HFileArchiver.archiveRegion(conf, fs, regionInfo); - } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (working copy) @@ -2116,7 +2116,8 @@ } /** - * Create a timestamp consistent snapshot for the given table. + * Take a snapshot for the given table. If the table is enabled, a FLUSH-type snapshot will be + * taken. If the table is disabled, an offline snapshot is taken. *

* Snapshots are considered unique based on the name of the snapshot. Attempts to take a * snapshot with the same name (even a different type or with different parameters) will fail with @@ -2206,9 +2207,6 @@ */ public void snapshot(SnapshotDescription snapshot) throws IOException, SnapshotCreationException, IllegalArgumentException { - // make sure the snapshot is valid - SnapshotDescriptionUtils.assertSnapshotRequestIsValid(snapshot); - // actually take the snapshot TakeSnapshotResponse response = takeSnapshotAsync(snapshot); final IsSnapshotDoneRequest request = IsSnapshotDoneRequest.newBuilder().setSnapshot(snapshot) @@ -2226,9 +2224,9 @@ try { // sleep a backoff <= pauseTime amount long sleep = getPauseTime(tries++); - LOG.debug("Found sleep:" + sleep); sleep = sleep > maxPauseTime ? maxPauseTime : sleep; - LOG.debug(tries + ") Sleeping: " + sleep + " ms while we wait for snapshot to complete."); + LOG.debug("(#" + tries + ") Sleeping: " + sleep + + "ms while waiting for snapshot completion."); Thread.sleep(sleep); } catch (InterruptedException e) { @@ -2242,8 +2240,7 @@ return masterAdmin.isSnapshotDone(null, request); } }); - } - ; + }; if (!done.getDone()) { throw new SnapshotCreationException("Snapshot '" + snapshot.getName() + "' wasn't completed in expectedTime:" + max + " ms", snapshot); @@ -2251,7 +2248,7 @@ } /** - * Take a snapshot and wait for the server to complete that snapshot (asynchronous) + * Take a snapshot without waiting for the server to complete that snapshot (asynchronous) *

* Only a single snapshot should be taken at a time, or results may be undefined. * @param snapshot snapshot to take @@ -2309,7 +2306,7 @@ /** * Restore the specified snapshot on the original table. (The table must be disabled) * Before restoring the table, a new snapshot with the current table state is created. - * In case of failure, the table will be rolled back to the its original state. + * In case of failure, the table will be rolled back to its original state. * * @param snapshotName name of the snapshot to restore * @throws IOException if a remote or network exception occurs @@ -2358,7 +2355,7 @@ // Try to rollback try { String msg = "Restore snapshot=" + snapshotName + - " failed. Rollback to snapshot=" + rollbackSnapshot + " succeded."; + " failed. Rollback to snapshot=" + rollbackSnapshot + " succeeded."; LOG.error(msg, e); internalRestoreSnapshot(rollbackSnapshot, tableName); throw new RestoreSnapshotException(msg, e); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java (working copy) @@ -34,7 +34,7 @@ import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; /** - * This online snapshot implementation forces uses the distributed procedure framework to force a + * This online snapshot implementation uses the distributed procedure framework to force a * store flush and then records the hfiles. Its enter stage does nothing. Its leave stage then * flushes the memstore, builds the region server's snapshot manifest from its hfiles list, and * copies .regioninfos into the snapshot working directory. At the master side, there is an atomic @@ -102,9 +102,9 @@ if (taskManager.hasTasks()) { throw new IllegalStateException("Attempting to take snapshot " + SnapshotDescriptionUtils.toString(snapshot) - + " but we have currently have outstanding tasks"); + + " but we currently have outstanding tasks"); } - + // Add all hfiles already existing in region. for (HRegion region : regions) { // submit one task per region for parallelize by region. Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java (working copy) @@ -73,7 +73,7 @@ public class RegionServerSnapshotManager { private static final Log LOG = LogFactory.getLog(RegionServerSnapshotManager.class); - /** Maximum number of concurrent snapshot region tasks that can run concurrently */ + /** Maximum number of snapshot region tasks that can run concurrently */ private static final String CONCURENT_SNAPSHOT_TASKS_KEY = "hbase.snapshot.region.concurrentTasks"; private static final int DEFAULT_CONCURRENT_SNAPSHOT_TASKS = 3; @@ -212,8 +212,8 @@ /** * Determine if the snapshot should be handled on this server * - * NOTE: This is racy -- the master expects a list of regionservers, but the regions get the - * regions. This means if a region moves somewhere between the calls we'll miss some regions. + * NOTE: This is racy -- the master expects a list of regionservers. + * This means if a region moves somewhere between the calls we'll miss some regions. * For example, a region move during a snapshot could result in a region to be skipped or done * twice. This is manageable because the {@link MasterSnapshotVerifier} will double check the * region lists after the online portion of the snapshot completes and will explicitly fail the @@ -297,7 +297,7 @@ /** * Wait for all of the currently outstanding tasks submitted via {@link #submitTask(Callable)}. - * This *must* be called to after all tasks are submitted via submitTask. + * This *must* be called after all tasks are submitted via submitTask. * * @return true on success, false otherwise * @throws InterruptedException @@ -313,7 +313,7 @@ Future f = taskPool.take(); f.get(); if (!futures.remove(f)) { - LOG.warn("unexpected future"); + LOG.warn("unexpected future" + f); } LOG.debug("Completed " + (i+1) + "/" + sz + " local region snapshots."); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java (working copy) @@ -39,7 +39,7 @@ /** * Handle the master side of taking a snapshot of an online table, regardless of snapshot type. - * Uses a {@link Procedure} to run the snapshot across all the involved regions. + * Uses a {@link Procedure} to run the snapshot across all the involved region servers. * @see ProcedureCoordinator */ @InterfaceAudience.Private @@ -84,7 +84,7 @@ // wait for the snapshot to complete. A timer thread is kicked off that should cancel this // if it takes too long. proc.waitForCompleted(); - LOG.info("Done waiting - snapshot finished!"); + LOG.info("Done waiting - snapshot for " + this.snapshot.getName() + " finished!"); } catch (InterruptedException e) { ForeignException ee = new ForeignException("Interrupted while waiting for snapshot to finish", e); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java (working copy) @@ -80,10 +80,11 @@ /** * The restore table is executed in place. - * - The on-disk data will be restored - * - [if something fail here: you need to delete the table and re-run the restore] + * - The on-disk data will be restored - reference files are put in place without moving data + * - [if something fail here: you need to delete the table and re-run the restore] * - META will be updated - * - [if something fail here: you need to run hbck to fix META entries] + * - [if something fail here: you need to run hbck to fix META entries] + * The passed in list gets changed in this method */ @Override protected void handleTableOperation(List hris) throws IOException { Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java (working copy) @@ -44,7 +44,8 @@ * Conf key for the frequency to attempt to refresh the cache of hfiles currently used in * snapshots (ms) */ - public static final String HFILE_CACHE_REFRESH_PERIOD_CONF_KEY = "hbase.master.hfilecleaner.plugins.snapshot.period"; + public static final String HFILE_CACHE_REFRESH_PERIOD_CONF_KEY = + "hbase.master.hfilecleaner.plugins.snapshot.period"; /** Refresh cache, by default, every 5 minutes */ private static final long DEFAULT_HFILE_CACHE_REFRESH_PERIOD = 300000; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java (working copy) @@ -52,6 +52,8 @@ * A handler for taking snapshots from the master. * * This is not a subclass of TableEventHandler because using that would incur an extra META scan. + * + * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor. */ @InterfaceAudience.Private public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel, @@ -91,6 +93,8 @@ this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); this.monitor = new ForeignExceptionDispatcher(); + loadTableDescriptor(); // check that .tableinfo is present + // prepare the verify this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir); } @@ -107,19 +111,13 @@ } /** - * Execute the core common portions of taking a snapshot. the {@link #snapshotRegions(List)} + * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} * call should get implemented for each snapshot flavor. */ @Override public void process() { LOG.info("Running table snapshot operation " + eventType + " on table " + snapshot.getTable()); try { - loadTableDescriptor(); // check that .tableinfo is present - - byte[] ssbytes = Bytes.toBytes(snapshot.getTable()); - List> regionsAndLocations = MetaReader.getTableRegionsAndLocations( - this.server.getCatalogTracker(), ssbytes, true); - // If regions move after this meta scan, the region specific snapshot should fail, triggering // an external exception that gets captured here. @@ -128,6 +126,10 @@ new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call(); monitor.rethrowException(); + List> regionsAndLocations = + MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(), + Bytes.toBytes(snapshot.getTable()), true); + // run the snapshot snapshotRegions(regionsAndLocations); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java (working copy) @@ -58,10 +58,10 @@ * Further, the cache is periodically refreshed ensure that files in snapshots that were deleted are * also removed from the cache. *

- * A {@link SnapshotFileInspector} must be passed when creating this to allow extraction of files - * under the /hbase/.snapshot/[snapshot name] directory, for each snapshot. This allows you to only - * cache files under, for instance, all the logs in the .logs directory or all the files under all - * the regions. + * A {@link SnapshotFileInspector} must be passed when creating this to allow extraction + * of files under the /hbase/.snapshot/[snapshot name] directory, for each snapshot. + * This allows you to only cache files under, for instance, all the logs in the .logs directory or + * all the files under all the regions. *

* this also considers all running snapshots (those under /hbase/.snapshot/.tmp) as valid * snapshots and will attempt to cache files from those snapshots as well. @@ -71,7 +71,7 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class SnapshotFileCache implements Stoppable { - public interface SnapshotFileInspector { + interface SnapshotFileInspector { /** * Returns a collection of file names needed by the snapshot. * @param snapshotDir {@link Path} to the snapshot directory to scan. @@ -90,7 +90,8 @@ * This is a helper map of information about the snapshot directories so we don't need to rescan * them if they haven't changed since the last time we looked. */ - private final Map snapshots = new HashMap(); + private final Map snapshots = + new HashMap(); private final Timer refreshTimer; private long lastModifiedTime = Long.MIN_VALUE; @@ -118,7 +119,7 @@ * filesystem * @param fs {@link FileSystem} where the snapshots are stored * @param rootDir hbase root directory - * @param cacheRefreshPeriod frequency (ms) with which the cache should be refreshed + * @param cacheRefreshPeriod period (ms) with which the cache should be refreshed * @param cacheRefreshDelay amount of time to wait for the cache to be refreshed * @param refreshThreadName name of the cache refresh thread * @param inspectSnapshotFiles Filter to apply to each snapshot to extract the files. @@ -143,8 +144,11 @@ * Exposed for TESTING. */ public void triggerCacheRefreshForTesting() { - LOG.debug("Triggering cache refresh"); - new RefreshCacheTask().run(); + try { + SnapshotFileCache.this.refreshCache(); + } catch (IOException e) { + LOG.warn("Failed to refresh snapshot hfile cache!", e); + } LOG.debug("Current cache:" + cache); } @@ -184,7 +188,7 @@ try { status = fs.getFileStatus(snapshotDir); } catch (FileNotFoundException e) { - LOG.warn("Snapshot directory: " + snapshotDir + " doesn't exist"); + LOG.error("Snapshot directory: " + snapshotDir + " doesn't exist"); return; } // if the snapshot directory wasn't modified since we last check, we are done Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/MasterSnapshotVerifier.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/MasterSnapshotVerifier.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/MasterSnapshotVerifier.java (working copy) @@ -84,7 +84,6 @@ private MasterServices services; /** - * Build a util for the given snapshot * @param services services for the master * @param snapshot snapshot to check * @param rootDir root directory of the hbase installation. @@ -137,7 +136,7 @@ } /** - * Check that all the regions in the the snapshot are valid, and accounted for. + * Check that all the regions in the snapshot are valid, and accounted for. * @param snapshotDir snapshot directory to check * @throws IOException if we can't reach .META. or read the files from the FS */ @@ -146,7 +145,7 @@ Bytes.toBytes(tableName)); for (HRegionInfo region : regions) { // if offline split parent, skip it - if (region.isOffline() || region.isSplit() || region.isSplitParent()) { + if (region.isOffline() && (region.isSplit() || region.isSplitParent())) { continue; } @@ -156,6 +155,7 @@ /** * Verify that the region (regioninfo, hfiles) are valid + * @param fs the FileSystem instance * @param snapshotDir snapshot directory to check * @param region the region to check */ @@ -174,10 +174,15 @@ throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot); } FSDataInputStream in = fs.open(regionInfo); - HRegionInfo found = HRegionInfo.parseFrom(in); - if (!region.equals(found)) { - throw new CorruptedSnapshotException("Found region info (" + found + HRegionInfo found; + try { + found = HRegionInfo.parseFrom(in); + if (!region.equals(found)) { + throw new CorruptedSnapshotException("Found region info (" + found + ") doesn't match expected region:" + region, snapshot); + } + } finally { + in.close(); } // make sure we have the expected recovered edits files @@ -225,20 +230,4 @@ } } } - - /** - * Check that the logs stored in the log directory for the snapshot are valid - it contains all - * the expected logs for all servers involved in the snapshot. - * @param snapshotDir snapshot directory to check - * @param snapshotServers list of the names of servers involved in the snapshot. - * @throws CorruptedSnapshotException if the hlogs in the snapshot are not correct - * @throws IOException if we can't reach the filesystem - */ - private void verifyLogs(Path snapshotDir, Set snapshotServers) - throws CorruptedSnapshotException, IOException { - Path snapshotLogDir = new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME); - Path logsDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); - TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logsDir, snapshotServers, snapshot, - snapshotLogDir); - } } \ No newline at end of file Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (working copy) @@ -77,7 +77,7 @@ *

* The class provides methods for monitoring in-progress snapshot actions. *

- * Note: Currently there can only one snapshot being taken at a time over the cluster. This is a + * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a * simplification in the current implementation. */ @InterfaceAudience.Private @@ -86,7 +86,7 @@ private static final Log LOG = LogFactory.getLog(SnapshotManager.class); /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */ - public static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500; + private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500; /** Enable or disable snapshot support */ public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled"; @@ -95,16 +95,16 @@ * Conf key for # of ms elapsed between checks for snapshot errors while waiting for * completion. */ - public static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis"; + private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis"; /** By default, check to see if the snapshot is complete (ms) */ - public static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 5000; + private static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 5000; /** * Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for * completion. */ - public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis"; + private static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis"; /** Name of the operation to use in the controller */ public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot"; @@ -230,7 +230,7 @@ void resetTempDir() throws IOException { // cleanup any existing snapshots. Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir); - if (master.getMasterFileSystem().getFileSystem().delete(tmpdir, true)) { + if (!master.getMasterFileSystem().getFileSystem().delete(tmpdir, true)) { LOG.warn("Couldn't delete working snapshot directory: " + tmpdir); } } @@ -277,7 +277,7 @@ * @param snapshot * @return null if doesn't match, else a live handler. */ - TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) { + private synchronized TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) { TakeSnapshotHandler h = this.handler; if (h == null) { return null; @@ -397,7 +397,7 @@ } try { - // delete the working directory, since we aren't running the snapshot. Likely leftovers + // delete the working directory, since we aren't running the snapshot. Likely leftovers // from a failed attempt. fs.delete(workingDir, true); @@ -415,7 +415,7 @@ } /** - * Take a snapshot of a enabled table. + * Take a snapshot of an enabled table. *

* The thread limitation on the executorService's thread pool for snapshots ensures the * snapshot won't be started if there is another snapshot already running. Does @@ -434,7 +434,7 @@ // cleanup the working directory by trying to delete it from the fs. Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); try { - if (this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) { + if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) { LOG.warn("Couldn't delete working directory (" + workingDir + " for snapshot:" + SnapshotDescriptionUtils.toString(snapshot)); } @@ -454,7 +454,7 @@ * @throws HBaseSnapshotException when a snapshot specific exception occurs. * @throws IOException when some sort of generic IO exception occurs. */ - public void takeSnapshot(SnapshotDescription snapshot) throws HBaseSnapshotException, IOException { + public void takeSnapshot(SnapshotDescription snapshot) throws IOException { // check to see if we already completed the snapshot if (isSnapshotCompleted(snapshot)) { throw new SnapshotExistsException("Snapshot '" + snapshot.getName() @@ -543,7 +543,7 @@ // cleanup the working directory by trying to delete it from the fs. Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); try { - if (this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) { + if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) { LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" + SnapshotDescriptionUtils.toString(snapshot)); } @@ -596,8 +596,8 @@ } /** - * Restore the specified snapshot. - * The restore will fail if the destination table has a snapshot or restore in progress. + * Clone the specified snapshot into a new table. + * The operation will fail if the destination table has a snapshot or restore in progress. * * @param snapshot Snapshot Descriptor * @param hTableDescriptor Table Descriptor of the table to create @@ -722,7 +722,7 @@ } /** - * Verify if the the restore of the specified table is in progress. + * Verify if the restore of the specified table is in progress. * * @param tableName table under restore * @return true if there is a restore in progress of the specified table. @@ -736,7 +736,7 @@ * Returns status of a restore request, specifically comparing source snapshot and target table * names. Throws exception if not a known snapshot. * @param snapshot - * @return true if in progress, false if is not. + * @return true if in progress, false if snapshot is completed. * @throws UnknownSnapshotException if specified source snapshot does not exit. * @throws IOException if there was some sort of IO failure */ @@ -792,7 +792,7 @@ /** * Scan the restore handlers and remove the finished ones. */ - private void cleanupRestoreSentinels() { + private synchronized void cleanupRestoreSentinels() { Iterator> it = restoreHandlers.entrySet().iterator(); while (it.hasNext()) { Map.Entry entry = it.next(); @@ -852,7 +852,7 @@ */ private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs) throws IOException, UnsupportedOperationException { - // Verify if snapshot are disabled by the user + // Verify if snapshot is disabled by the user String enabled = conf.get(HBASE_SNAPSHOT_ENABLED); boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false); boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled); @@ -894,7 +894,7 @@ } } - // Mark snapshot feature as enabled if cleaners are present and user as not disabled it. + // Mark snapshot feature as enabled if cleaners are present and user has not disabled it. this.isSnapshotSupported = snapshotEnabled && !userDisabled; // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotLogCleaner.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotLogCleaner.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotLogCleaner.java (working copy) @@ -44,7 +44,8 @@ * Conf key for the frequency to attempt to refresh the cache of hfiles currently used in * snapshots (ms) */ - static final String HLOG_CACHE_REFRESH_PERIOD_CONF_KEY = "hbase.master.hlogcleaner.plugins.snapshot.period"; + static final String HLOG_CACHE_REFRESH_PERIOD_CONF_KEY = + "hbase.master.hlogcleaner.plugins.snapshot.period"; /** Refresh cache, by default, every 5 minutes */ private static final long DEFAULT_HLOG_CACHE_REFRESH_PERIOD = 300000; @@ -54,6 +55,7 @@ @Override public synchronized boolean isFileDeletable(Path filePath) { try { + if (null == cache) return false; return !cache.contains(filePath.getName()); } catch (IOException e) { LOG.error("Exception while checking if:" + filePath + " was valid, keeping it just in case.", Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java (working copy) @@ -107,10 +107,11 @@ } // 3. write the table info to disk - LOG.info("Starting to copy tableinfo for offline snapshot: " + SnapshotDescriptionUtils.toString(snapshot)); - TableInfoCopyTask tableInfo = new TableInfoCopyTask(this.monitor, snapshot, fs, + LOG.info("Starting to copy tableinfo for offline snapshot: " + + SnapshotDescriptionUtils.toString(snapshot)); + TableInfoCopyTask tableInfoCopyTask = new TableInfoCopyTask(this.monitor, snapshot, fs, FSUtils.getRootDir(conf)); - tableInfo.call(); + tableInfoCopyTask.call(); monitor.rethrowException(); } catch (Exception e) { // make sure we capture the exception to propagate back to the client later Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (working copy) @@ -91,13 +91,15 @@ // 4. Delete regions from FS (temp directory) FileSystem fs = mfs.getFileSystem(); for (HRegionInfo hri: regions) { - LOG.debug("Deleting region " + hri.getRegionNameAsString() + " from FS"); + LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS"); HFileArchiver.archiveRegion(fs, mfs.getRootDir(), tempTableDir, new Path(tempTableDir, hri.getEncodedName())); } // 5. Delete table from FS (temp directory) - fs.delete(tempTableDir, true); + if (!fs.delete(tempTableDir, true)) { + LOG.error("Couldn't delete " + tempTableDir); + } } finally { // 6. Update table descriptor cache this.masterServices.getTableDescriptors().remove(Bytes.toString(tableName)); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy) @@ -205,7 +205,7 @@ try { assignmentManager.getZKTable().setEnabledTable(tableName); } catch (KeeperException e) { - throw new IOException("Unable to ensure that the table will be" + + throw new IOException("Unable to ensure that " + tableName + " will be" + " enabled because of a ZooKeeper issue", e); } } @@ -216,7 +216,8 @@ * @param tableName name of the table under construction * @return the list of regions created */ - protected List handleCreateHdfsRegions(final Path tableRootDir, final String tableName) + protected List handleCreateHdfsRegions(final Path tableRootDir, + final String tableName) throws IOException { int regionNumber = newRegions.length; ThreadPoolExecutor regionOpenAndInitThreadPool = getRegionOpenAndInitThreadPool( Index: hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java (working copy) @@ -75,7 +75,7 @@ HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFile.HFILE_NAME_REGEX)); /** - * The link should be used for hfile and reference links + * The pattern should be used for hfile and reference links * that can be found in /hbase/table/region/family/ */ private static final Pattern REF_OR_HFILE_LINK_PATTERN = Index: hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (working copy) @@ -154,7 +154,7 @@ * @param deletes Deletes to add to .META. This list should support #remove. * @throws IOException */ - static void deleteFromMetaTable(final CatalogTracker ct, final List deletes) + public static void deleteFromMetaTable(final CatalogTracker ct, final List deletes) throws IOException { HTable t = MetaReader.getMetaHTable(ct); try { Index: hbase-server/src/main/ruby/shell/commands/clone_snapshot.rb =================================================================== --- hbase-server/src/main/ruby/shell/commands/clone_snapshot.rb (revision 1446711) +++ hbase-server/src/main/ruby/shell/commands/clone_snapshot.rb (working copy) @@ -21,10 +21,11 @@ class CloneSnapshot < Command def help return <<-EOF -Create a new table by cloning the snapshot content. Examples: +Create a new table by cloning the snapshot content. There're no copies of data involved. And writing on the newly created table will not influence the snapshot data. +Examples: hbase> clone_snapshot 'snapshotName', 'tableName' EOF end Index: hbase-server/src/main/resources/hbase-default.xml =================================================================== --- hbase-server/src/main/resources/hbase-default.xml (revision 1446711) +++ hbase-server/src/main/resources/hbase-default.xml (working copy) @@ -964,7 +964,6 @@ The thread pool always has at least these number of threads so the REST server is ready to serve incoming requests. The default is 2. ->>>>>>> apache/trunk Index: hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1446711) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -749,7 +749,7 @@ public static final String HFILE_ARCHIVE_DIRECTORY = ".archive"; /** - * Name of the directory to store snapshots all snapshots. See SnapshotDescriptionUtils for + * Name of the directory to store all snapshots. See SnapshotDescriptionUtils for * remaining snapshot constants; this is here to keep HConstants dependencies at a minimum and * uni-directional. */