Index: hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (working copy) @@ -44,7 +44,8 @@ public class ForeignExceptionDispatcher implements ForeignExceptionListener, ForeignExceptionSnare { public static final Log LOG = LogFactory.getLog(ForeignExceptionDispatcher.class); protected final String name; - protected final List listeners = new ArrayList(); + protected final List listeners = + new ArrayList(); private ForeignException exception; public ForeignExceptionDispatcher(String name) { @@ -69,7 +70,7 @@ if (e != null) { exception = e; } else { - exception = new ForeignException(name, e); + exception = new ForeignException(name, ""); } // notify all the listeners @@ -77,7 +78,7 @@ } @Override - public void rethrowException() throws ForeignException { + public synchronized void rethrowException() throws ForeignException { if (exception != null) { // This gets the stack where this is caused, (instead of where it was deserialized). // This which is much more useful for debugging @@ -86,7 +87,7 @@ } @Override - public boolean hasException() { + public synchronized boolean hasException() { return exception != null; } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (working copy) @@ -168,6 +168,7 @@ Subprocedure subproc = subprocs.get(procName); if (subproc == null) { LOG.warn("Unexpected reached glabal barrier message for Procedure '" + procName + "'"); + return; } subproc.receiveReachedGlobalBarrier(); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (working copy) @@ -145,7 +145,8 @@ LOG.debug("Checking for aborted procedures on node: '" + zkController.getAbortZnode() + "'"); try { // this is the list of the currently aborted procedues - for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), zkController.getAbortZnode())) { + for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAbortZnode())) { String abortNode = ZKUtil.joinZNode(zkController.getAbortZnode(), node); abort(abortNode); } @@ -157,10 +158,11 @@ private void waitForNewProcedures() { // watch for new procedues that we need to start subprocedures for - LOG.debug("Looking for new procedures under znode: '" + zkController.getAcquiredBarrier() + "'"); + LOG.debug("Looking for new procedures under znode:'" + zkController.getAcquiredBarrier() + "'"); List runningProcedure = null; try { - runningProcedure = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), zkController.getAcquiredBarrier()); + runningProcedure = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAcquiredBarrier()); if (runningProcedure == null) { LOG.debug("No running procedures."); return; @@ -169,6 +171,10 @@ member.controllerConnectionFailure("General failure when watching for new procedures", new IOException(e)); } + if (runningProcedure == null) { + LOG.debug("No running procedures."); + return; + } for (String procName : runningProcedure) { // then read in the procedure information String path = ZKUtil.joinZNode(zkController.getAcquiredBarrier(), procName); @@ -238,7 +244,8 @@ try { LOG.debug("Member: '" + memberName + "' joining acquired barrier for procedure (" + procName + ") in zk"); - String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode(zkController, procName), memberName); + String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode( + zkController, procName), memberName); ZKUtil.createAndFailSilent(zkController.getWatcher(), acquiredZNode); // watch for the complete node for this snapshot @@ -278,12 +285,12 @@ public void sendMemberAborted(Subprocedure sub, ForeignException ee) { if (sub == null) { LOG.error("Failed due to null subprocedure", ee); + return; } String procName = sub.getName(); LOG.debug("Aborting procedure (" + procName + ") in zk"); String procAbortZNode = zkController.getAbortZNode(procName); try { - LOG.debug("Creating abort znode:" + procAbortZNode); String source = (ee.getSource() == null) ? memberName: ee.getSource(); byte[] errorInfo = ProtobufUtil.prependPBMagic(ForeignException.serialize(source, ee)); ZKUtil.createAndFailSilent(zkController.getWatcher(), procAbortZNode, errorInfo); @@ -316,9 +323,10 @@ LOG.error(msg); // we got a remote exception, but we can't describe it so just return exn from here ee = new ForeignException(getMemberName(), new IllegalArgumentException(msg)); + } else { + data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); + ee = ForeignException.deserialize(data); } - data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); - ee = ForeignException.deserialize(data); } catch (InvalidProtocolBufferException e) { LOG.warn("Got an error notification for op:" + opName + " but we can't read the information. Killing the procedure."); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (revision 1446711) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (working copy) @@ -277,7 +277,7 @@ * @param snapshot * @return null if doesn't match, else a live handler. */ - TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) { + synchronized TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) { TakeSnapshotHandler h = this.handler; if (h == null) { return null;