Index: hbase-protocol/src/main/protobuf/hbase.proto =================================================================== --- hbase-protocol/src/main/protobuf/hbase.proto (revision 1446711) +++ hbase-protocol/src/main/protobuf/hbase.proto (working copy) @@ -267,15 +267,15 @@ * Description of the snapshot to take */ message SnapshotDescription { - required string name = 1; - optional string table = 2; // not needed for delete, but checked for in taking snapshot - optional int64 creationTime = 3 [default = 0]; - enum Type { - DISABLED = 0; - FLUSH = 1; - } - optional Type type = 4 [default = FLUSH]; - optional int32 version = 5; + required string name = 1; + optional string table = 2; // not needed for delete, but checked for in taking snapshot + optional int64 creationTime = 3 [default = 0]; + enum Type { + DISABLED = 0; + FLUSH = 1; + } + optional Type type = 4 [default = FLUSH]; + optional int32 version = 5; } message EmptyMsg { Index: hbase-protocol/src/main/protobuf/ErrorHandling.proto =================================================================== --- hbase-protocol/src/main/protobuf/ErrorHandling.proto (revision 1446711) +++ hbase-protocol/src/main/protobuf/ErrorHandling.proto (working copy) @@ -16,7 +16,7 @@ * limitations under the License. */ -// This file contains protocol buffers that used to error handling +// This file contains protocol buffers that are used for error handling option java_package = "org.apache.hadoop.hbase.protobuf.generated"; option java_outer_classname = "ErrorHandlingProtos"; Index: hbase-protocol/src/main/protobuf/MasterAdmin.proto =================================================================== --- hbase-protocol/src/main/protobuf/MasterAdmin.proto (revision 1446711) +++ hbase-protocol/src/main/protobuf/MasterAdmin.proto (working copy) @@ -330,8 +330,8 @@ rpc execMasterService(CoprocessorServiceRequest) returns(CoprocessorServiceResponse); - /** - * Create a snapshot for the given table. + /** + * Create a snapshot for the given table. * @param snapshot description of the snapshot to take */ rpc snapshot(TakeSnapshotRequest) returns(TakeSnapshotResponse); @@ -343,7 +343,7 @@ rpc listSnapshots(ListSnapshotRequest) returns(ListSnapshotResponse); /** - * Delete an existing snapshot. This method can also be used to clean up a aborted snapshot. + * Delete an existing snapshot. This method can also be used to clean up an aborted snapshot. * @param snapshotName snapshot to delete */ rpc deleteSnapshot(DeleteSnapshotRequest) returns(DeleteSnapshotResponse); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (working copy) @@ -44,7 +44,8 @@ public class ForeignExceptionDispatcher implements ForeignExceptionListener, ForeignExceptionSnare { public static final Log LOG = LogFactory.getLog(ForeignExceptionDispatcher.class); protected final String name; - protected final List listeners = new ArrayList(); + protected final List listeners = + new ArrayList(); private ForeignException exception; public ForeignExceptionDispatcher(String name) { @@ -69,7 +70,7 @@ if (e != null) { exception = e; } else { - exception = new ForeignException(name, e); + exception = new ForeignException(name, ""); } // notify all the listeners @@ -77,16 +78,16 @@ } @Override - public void rethrowException() throws ForeignException { + public synchronized void rethrowException() throws ForeignException { if (exception != null) { // This gets the stack where this is caused, (instead of where it was deserialized). - // This which is much more useful for debugging + // This is much more useful for debugging throw new ForeignException(exception.getSource(), exception.getCause()); } } @Override - public boolean hasException() { + public synchronized boolean hasException() { return exception != null; } @@ -102,7 +103,6 @@ */ private void dispatch(ForeignException e) { // update all the listeners with the passed error - LOG.debug(name + " Recieved error, notifying listeners..."); for (ForeignExceptionListener l: listeners) { l.receive(e); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java (working copy) @@ -21,7 +21,7 @@ import org.apache.hadoop.classification.InterfaceStability; /** - * Exception for a timeout of a task. + * Exception for timeout of a task. * @see TimeoutExceptionInjector */ @InterfaceAudience.Public Index: hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java (working copy) @@ -52,20 +52,6 @@ private final String source; /** - * Create a new ForeignException that can be serialized. It is assumed that this came from a - * remote source. - * @param source - * @param cause - */ - private ForeignException(String source, String clazz, ProxyThrowable cause) { - super(cause); - assert source != null; - assert cause != null; - assert clazz != null; - this.source = source; - } - - /** * Create a new ForeignException that can be serialized. It is assumed that this came form a * local source. * @param source @@ -114,7 +100,7 @@ /** * Convert a stack trace to list of {@link StackTraceElement}. - * @param stackTrace the stack trace to convert to protobuf message + * @param trace the stack trace to convert to protobuf message * @return null if the passed stack is null. */ private static List toStackTraceElementMessages( @@ -146,10 +132,10 @@ } /** - * Converts an ForeignException to a array of bytes. + * Converts a ForeignException to an array of bytes. * @param source the name of the external exception source * @param t the "local" external exception (local) - * @return protobuf serialized version of ForeignThreadException + * @return protobuf serialized version of ForeignException */ public static byte[] serialize(String source, Throwable t) { GenericExceptionMessage.Builder gemBuilder = GenericExceptionMessage.newBuilder(); @@ -158,7 +144,8 @@ gemBuilder.setMessage(t.getMessage()); } // set the stack trace, if there is one - List stack = ForeignException.toStackTraceElementMessages(t.getStackTrace()); + List stack = + ForeignException.toStackTraceElementMessages(t.getStackTrace()); if (stack != null) { gemBuilder.addAllTrace(stack); } @@ -172,16 +159,16 @@ /** * Takes a series of bytes and tries to generate an ForeignException instance for it. * @param bytes - * @return the ExternalExcpetion instance + * @return the ForeignExcpetion instance * @throws InvalidProtocolBufferException if there was deserialization problem this is thrown. */ public static ForeignException deserialize(byte[] bytes) throws InvalidProtocolBufferException { // figure out the data we need to pass ForeignExceptionMessage eem = ForeignExceptionMessage.parseFrom(bytes); GenericExceptionMessage gem = eem.getGenericException(); - StackTraceElement [] trace = ForeignException.toStack(gem.getTraceList()); + StackTraceElement [] trace = ForeignException.toStackTrace(gem.getTraceList()); ProxyThrowable dfe = new ProxyThrowable(gem.getMessage(), trace); - ForeignException e = new ForeignException(eem.getSource(), gem.getClassName(), dfe); + ForeignException e = new ForeignException(eem.getSource(), dfe); return e; } @@ -192,7 +179,7 @@ * @return the deserialized list or null if it couldn't be unwound (e.g. wasn't set on * the sender). */ - private static StackTraceElement[] toStack(List traceList) { + private static StackTraceElement[] toStackTrace(List traceList) { if (traceList == null || traceList.size() == 0) { return new StackTraceElement[0]; // empty array } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (working copy) @@ -168,6 +168,7 @@ Subprocedure subproc = subprocs.get(procName); if (subproc == null) { LOG.warn("Unexpected reached glabal barrier message for Procedure '" + procName + "'"); + return; } subproc.receiveReachedGlobalBarrier(); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (working copy) @@ -145,7 +145,8 @@ LOG.debug("Checking for aborted procedures on node: '" + zkController.getAbortZnode() + "'"); try { // this is the list of the currently aborted procedues - for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), zkController.getAbortZnode())) { + for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAbortZnode())) { String abortNode = ZKUtil.joinZNode(zkController.getAbortZnode(), node); abort(abortNode); } @@ -157,10 +158,11 @@ private void waitForNewProcedures() { // watch for new procedues that we need to start subprocedures for - LOG.debug("Looking for new procedures under znode: '" + zkController.getAcquiredBarrier() + "'"); + LOG.debug("Looking for new procedures under znode:'" + zkController.getAcquiredBarrier() + "'"); List runningProcedure = null; try { - runningProcedure = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), zkController.getAcquiredBarrier()); + runningProcedure = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAcquiredBarrier()); if (runningProcedure == null) { LOG.debug("No running procedures."); return; @@ -169,6 +171,10 @@ member.controllerConnectionFailure("General failure when watching for new procedures", new IOException(e)); } + if (runningProcedure == null) { + LOG.debug("No running procedures."); + return; + } for (String procName : runningProcedure) { // then read in the procedure information String path = ZKUtil.joinZNode(zkController.getAcquiredBarrier(), procName); @@ -238,7 +244,8 @@ try { LOG.debug("Member: '" + memberName + "' joining acquired barrier for procedure (" + procName + ") in zk"); - String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode(zkController, procName), memberName); + String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode( + zkController, procName), memberName); ZKUtil.createAndFailSilent(zkController.getWatcher(), acquiredZNode); // watch for the complete node for this snapshot @@ -278,12 +285,12 @@ public void sendMemberAborted(Subprocedure sub, ForeignException ee) { if (sub == null) { LOG.error("Failed due to null subprocedure", ee); + return; } String procName = sub.getName(); LOG.debug("Aborting procedure (" + procName + ") in zk"); String procAbortZNode = zkController.getAbortZNode(procName); try { - LOG.debug("Creating abort znode:" + procAbortZNode); String source = (ee.getSource() == null) ? memberName: ee.getSource(); byte[] errorInfo = ProtobufUtil.prependPBMagic(ForeignException.serialize(source, ee)); ZKUtil.createAndFailSilent(zkController.getWatcher(), procAbortZNode, errorInfo); @@ -316,9 +323,10 @@ LOG.error(msg); // we got a remote exception, but we can't describe it so just return exn from here ee = new ForeignException(getMemberName(), new IllegalArgumentException(msg)); + } else { + data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); + ee = ForeignException.deserialize(data); } - data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); - ee = ForeignException.deserialize(data); } catch (InvalidProtocolBufferException e) { LOG.warn("Got an error notification for op:" + opName + " but we can't read the information. Killing the procedure."); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (working copy) @@ -2116,7 +2116,8 @@ } /** - * Create a timestamp consistent snapshot for the given table. + * Take a snapshot for the given table. If the table is enabled, a FLUSH-type snapshot will be + * taken. If the table is disabled, an offline snapshot is taken. *

* Snapshots are considered unique based on the name of the snapshot. Attempts to take a * snapshot with the same name (even a different type or with different parameters) will fail with @@ -2206,9 +2207,6 @@ */ public void snapshot(SnapshotDescription snapshot) throws IOException, SnapshotCreationException, IllegalArgumentException { - // make sure the snapshot is valid - SnapshotDescriptionUtils.assertSnapshotRequestIsValid(snapshot); - // actually take the snapshot TakeSnapshotResponse response = takeSnapshotAsync(snapshot); final IsSnapshotDoneRequest request = IsSnapshotDoneRequest.newBuilder().setSnapshot(snapshot) @@ -2226,9 +2224,9 @@ try { // sleep a backoff <= pauseTime amount long sleep = getPauseTime(tries++); - LOG.debug("Found sleep:" + sleep); sleep = sleep > maxPauseTime ? maxPauseTime : sleep; - LOG.debug(tries + ") Sleeping: " + sleep + " ms while we wait for snapshot to complete."); + LOG.debug("(#" + tries + ") Sleeping: " + sleep + + "ms while waiting for snapshot completion."); Thread.sleep(sleep); } catch (InterruptedException e) { @@ -2242,8 +2240,7 @@ return masterAdmin.isSnapshotDone(null, request); } }); - } - ; + }; if (!done.getDone()) { throw new SnapshotCreationException("Snapshot '" + snapshot.getName() + "' wasn't completed in expectedTime:" + max + " ms", snapshot); @@ -2251,7 +2248,7 @@ } /** - * Take a snapshot and wait for the server to complete that snapshot (asynchronous) + * Take a snapshot without waiting for the server to complete that snapshot (asynchronous) *

* Only a single snapshot should be taken at a time, or results may be undefined. * @param snapshot snapshot to take @@ -2309,7 +2306,7 @@ /** * Restore the specified snapshot on the original table. (The table must be disabled) * Before restoring the table, a new snapshot with the current table state is created. - * In case of failure, the table will be rolled back to the its original state. + * In case of failure, the table will be rolled back to its original state. * * @param snapshotName name of the snapshot to restore * @throws IOException if a remote or network exception occurs @@ -2358,7 +2355,7 @@ // Try to rollback try { String msg = "Restore snapshot=" + snapshotName + - " failed. Rollback to snapshot=" + rollbackSnapshot + " succeded."; + " failed. Rollback to snapshot=" + rollbackSnapshot + " succeeded."; LOG.error(msg, e); internalRestoreSnapshot(rollbackSnapshot, tableName); throw new RestoreSnapshotException(msg, e); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java (working copy) @@ -39,7 +39,7 @@ /** * Handle the master side of taking a snapshot of an online table, regardless of snapshot type. - * Uses a {@link Procedure} to run the snapshot across all the involved regions. + * Uses a {@link Procedure} to run the snapshot across all the involved region servers. * @see ProcedureCoordinator */ @InterfaceAudience.Private @@ -84,7 +84,7 @@ // wait for the snapshot to complete. A timer thread is kicked off that should cancel this // if it takes too long. proc.waitForCompleted(); - LOG.info("Done waiting - snapshot finished!"); + LOG.info("Done waiting - snapshot for " + this.snapshot.getName() + " finished!"); } catch (InterruptedException e) { ForeignException ee = new ForeignException("Interrupted while waiting for snapshot to finish", e); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (working copy) @@ -277,7 +277,7 @@ * @param snapshot * @return null if doesn't match, else a live handler. */ - TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) { + synchronized TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) { TakeSnapshotHandler h = this.handler; if (h == null) { return null; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java (working copy) @@ -107,10 +107,11 @@ } // 3. write the table info to disk - LOG.info("Starting to copy tableinfo for offline snapshot: " + SnapshotDescriptionUtils.toString(snapshot)); - TableInfoCopyTask tableInfo = new TableInfoCopyTask(this.monitor, snapshot, fs, + LOG.info("Starting to copy tableinfo for offline snapshot: " + + SnapshotDescriptionUtils.toString(snapshot)); + TableInfoCopyTask tableInfoCopyTask = new TableInfoCopyTask(this.monitor, snapshot, fs, FSUtils.getRootDir(conf)); - tableInfo.call(); + tableInfoCopyTask.call(); monitor.rethrowException(); } catch (Exception e) { // make sure we capture the exception to propagate back to the client later Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (working copy) @@ -91,13 +91,15 @@ // 4. Delete regions from FS (temp directory) FileSystem fs = mfs.getFileSystem(); for (HRegionInfo hri: regions) { - LOG.debug("Deleting region " + hri.getRegionNameAsString() + " from FS"); + LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS"); HFileArchiver.archiveRegion(fs, mfs.getRootDir(), tempTableDir, new Path(tempTableDir, hri.getEncodedName())); } // 5. Delete table from FS (temp directory) - fs.delete(tempTableDir, true); + if (!fs.delete(tempTableDir, true)) { + LOG.error("Couldn't delete " + tempTableDir); + } } finally { // 6. Update table descriptor cache this.masterServices.getTableDescriptors().remove(Bytes.toString(tableName)); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy) @@ -205,7 +205,7 @@ try { assignmentManager.getZKTable().setEnabledTable(tableName); } catch (KeeperException e) { - throw new IOException("Unable to ensure that the table will be" + + throw new IOException("Unable to ensure that " + tableName + " will be" + " enabled because of a ZooKeeper issue", e); } } @@ -216,7 +216,8 @@ * @param tableName name of the table under construction * @return the list of regions created */ - protected List handleCreateHdfsRegions(final Path tableRootDir, final String tableName) + protected List handleCreateHdfsRegions(final Path tableRootDir, + final String tableName) throws IOException { int regionNumber = newRegions.length; ThreadPoolExecutor regionOpenAndInitThreadPool = getRegionOpenAndInitThreadPool( Index: hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/HFileLink.java (working copy) @@ -75,7 +75,7 @@ HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFile.HFILE_NAME_REGEX)); /** - * The link should be used for hfile and reference links + * The pattern should be used for hfile and reference links * that can be found in /hbase/table/region/family/ */ private static final Pattern REF_OR_HFILE_LINK_PATTERN = Index: hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1446711) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -749,7 +749,7 @@ public static final String HFILE_ARCHIVE_DIRECTORY = ".archive"; /** - * Name of the directory to store snapshots all snapshots. See SnapshotDescriptionUtils for + * Name of the directory to store all snapshots. See SnapshotDescriptionUtils for * remaining snapshot constants; this is here to keep HConstants dependencies at a minimum and * uni-directional. */