Index: security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java =================================================================== --- security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java (revision 1451296) +++ security/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java (working copy) @@ -113,6 +113,11 @@ public static void setupBeforeClass() throws Exception { // setup configuration conf = TEST_UTIL.getConfiguration(); + conf.set("hbase.master.hfilecleaner.plugins", + "org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner," + + "org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner"); + conf.set("hbase.master.logcleaner.plugins", + "org.apache.hadoop.hbase.master.snapshot.SnapshotLogCleaner"); SecureTestUtil.enableSecurity(conf); TEST_UTIL.startMiniCluster(); @@ -1721,4 +1726,51 @@ verifyDenied(action, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER); } + + @Test + public void testSnapshot() throws Exception { + PrivilegedExceptionAction snapshotAction = new PrivilegedExceptionAction() { + public Object run() throws Exception { + ACCESS_CONTROLLER.preSnapshot(ObserverContext.createAndPrepare(CP_ENV, null), + null, null); + return null; + } + }; + + PrivilegedExceptionAction deleteAction = new PrivilegedExceptionAction() { + public Object run() throws Exception { + ACCESS_CONTROLLER.preDeleteSnapshot(ObserverContext.createAndPrepare(CP_ENV, null), + null); + return null; + } + }; + + PrivilegedExceptionAction restoreAction = new PrivilegedExceptionAction() { + public Object run() throws Exception { + ACCESS_CONTROLLER.preRestoreSnapshot(ObserverContext.createAndPrepare(CP_ENV, null), + null, null); + return null; + } + }; + + PrivilegedExceptionAction cloneAction = new PrivilegedExceptionAction() { + public Object run() throws Exception { + ACCESS_CONTROLLER.preCloneSnapshot(ObserverContext.createAndPrepare(CP_ENV, null), + null, null); + return null; + } + }; + + verifyAllowed(snapshotAction, SUPERUSER, USER_ADMIN); + verifyDenied(snapshotAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER); + + verifyAllowed(cloneAction, SUPERUSER, USER_ADMIN); + verifyDenied(deleteAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER); + + verifyAllowed(restoreAction, SUPERUSER, USER_ADMIN); + verifyDenied(restoreAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER); + + verifyAllowed(deleteAction, SUPERUSER, USER_ADMIN); + verifyDenied(cloneAction, USER_CREATE, USER_RW, USER_RO, USER_NONE, USER_OWNER); + } } Index: security/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java =================================================================== --- security/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java (revision 1451296) +++ security/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java (working copy) @@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.ipc.HBaseRPC; import org.apache.hadoop.hbase.ipc.ProtocolSignature; import org.apache.hadoop.hbase.ipc.RequestContext; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.InternalScanner; import org.apache.hadoop.hbase.regionserver.RegionScanner; @@ -730,7 +731,56 @@ AccessControlLists.init(ctx.getEnvironment().getMasterServices()); } + @Override + public void preSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + requirePermission("snapshot", Permission.Action.ADMIN); + } + @Override + public void postSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void preCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + requirePermission("cloneSnapshot", Permission.Action.ADMIN); + } + + @Override + public void postCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void preRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + requirePermission("restoreSnapshot", Permission.Action.ADMIN); + } + + @Override + public void postRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void preDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException { + requirePermission("deleteSnapshot", Permission.Action.ADMIN); + } + + @Override + public void postDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException { + } + /* ---- RegionObserver implementation ---- */ @Override Index: src/test/java/org/apache/hadoop/hbase/errorhandling/TestForeignExceptionDispatcher.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/errorhandling/TestForeignExceptionDispatcher.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/errorhandling/TestForeignExceptionDispatcher.java (revision 0) @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +/** + * Test that we propagate errors through an dispatcher exactly once via different failure + * injection mechanisms. + */ +@Category(SmallTests.class) +public class TestForeignExceptionDispatcher { + private static final Log LOG = LogFactory.getLog(TestForeignExceptionDispatcher.class); + + /** + * Exception thrown from the test + */ + final ForeignException EXTEXN = new ForeignException("FORTEST", new IllegalArgumentException("FORTEST")); + final ForeignException EXTEXN2 = new ForeignException("FORTEST2", new IllegalArgumentException("FORTEST2")); + + /** + * Tests that a dispatcher only dispatches only the first exception, and does not propagate + * subsequent exceptions. + */ + @Test + public void testErrorPropagation() { + ForeignExceptionListener listener1 = Mockito.mock(ForeignExceptionListener.class); + ForeignExceptionListener listener2 = Mockito.mock(ForeignExceptionListener.class); + ForeignExceptionDispatcher dispatcher = new ForeignExceptionDispatcher(); + + // add the listeners + dispatcher.addListener(listener1); + dispatcher.addListener(listener2); + + // create an artificial error + dispatcher.receive(EXTEXN); + + // make sure the listeners got the error + Mockito.verify(listener1, Mockito.times(1)).receive(EXTEXN); + Mockito.verify(listener2, Mockito.times(1)).receive(EXTEXN); + + // make sure that we get an exception + try { + dispatcher.rethrowException(); + fail("Monitor should have thrown an exception after getting error."); + } catch (ForeignException ex) { + assertTrue("Got an unexpected exception:" + ex, ex.getCause() == EXTEXN.getCause()); + LOG.debug("Got the testing exception!"); + } + + // push another error, which should be not be passed to listeners + dispatcher.receive(EXTEXN2); + Mockito.verify(listener1, Mockito.never()).receive(EXTEXN2); + Mockito.verify(listener2, Mockito.never()).receive(EXTEXN2); + } + + @Test + public void testSingleDispatcherWithTimer() { + ForeignExceptionListener listener1 = Mockito.mock(ForeignExceptionListener.class); + ForeignExceptionListener listener2 = Mockito.mock(ForeignExceptionListener.class); + + ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(); + + // add the listeners + monitor.addListener(listener1); + monitor.addListener(listener2); + + TimeoutExceptionInjector timer = new TimeoutExceptionInjector(monitor, 1000); + timer.start(); + timer.trigger(); + + assertTrue("Monitor didn't get timeout", monitor.hasException()); + + // verify that that we propagated the error + Mockito.verify(listener1).receive(Mockito.any(ForeignException.class)); + Mockito.verify(listener2).receive(Mockito.any(ForeignException.class)); + } + + /** + * Test that the dispatcher can receive an error via the timer mechanism. + */ + @Test + public void testAttemptTimer() { + ForeignExceptionListener listener1 = Mockito.mock(ForeignExceptionListener.class); + ForeignExceptionListener listener2 = Mockito.mock(ForeignExceptionListener.class); + ForeignExceptionDispatcher orchestrator = new ForeignExceptionDispatcher(); + + // add the listeners + orchestrator.addListener(listener1); + orchestrator.addListener(listener2); + + // now create a timer and check for that error + TimeoutExceptionInjector timer = new TimeoutExceptionInjector(orchestrator, 1000); + timer.start(); + timer.trigger(); + // make sure that we got the timer error + Mockito.verify(listener1, Mockito.times(1)).receive(Mockito.any(ForeignException.class)); + Mockito.verify(listener2, Mockito.times(1)).receive(Mockito.any(ForeignException.class)); + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/errorhandling/TestTimeoutExceptionInjector.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/errorhandling/TestTimeoutExceptionInjector.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/errorhandling/TestTimeoutExceptionInjector.java (revision 0) @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import static org.junit.Assert.fail; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +/** + * Test the {@link TimeoutExceptionInjector} to ensure we fulfill contracts + */ +@Category(SmallTests.class) +public class TestTimeoutExceptionInjector { + + private static final Log LOG = LogFactory.getLog(TestTimeoutExceptionInjector.class); + + /** + * Test that a manually triggered timer fires an exception. + */ + @Test(timeout = 1000) + public void testTimerTrigger() { + final long time = 10000000; // pick a value that is very far in the future + ForeignExceptionListener listener = Mockito.mock(ForeignExceptionListener.class); + TimeoutExceptionInjector timer = new TimeoutExceptionInjector(listener, time); + timer.start(); + timer.trigger(); + Mockito.verify(listener, Mockito.times(1)).receive(Mockito.any(ForeignException.class)); + } + + /** + * Test that a manually triggered exception with data fires with the data in receiveError. + */ + @Test + public void testTimerPassesOnErrorInfo() { + final long time = 1000000; + ForeignExceptionListener listener = Mockito.mock(ForeignExceptionListener.class); + TimeoutExceptionInjector timer = new TimeoutExceptionInjector(listener, time); + timer.start(); + timer.trigger(); + Mockito.verify(listener).receive(Mockito.any(ForeignException.class)); + } + + /** + * Demonstrate TimeoutExceptionInjector semantics -- completion means no more exceptions passed to + * error listener. + */ + @Test(timeout = 1000) + public void testStartAfterComplete() throws InterruptedException { + final long time = 10; + ForeignExceptionListener listener = Mockito.mock(ForeignExceptionListener.class); + TimeoutExceptionInjector timer = new TimeoutExceptionInjector(listener, time); + timer.complete(); + try { + timer.start(); + fail("Timer should fail to start after complete."); + } catch (IllegalStateException e) { + LOG.debug("Correctly failed timer: " + e.getMessage()); + } + Thread.sleep(time + 1); + Mockito.verifyZeroInteractions(listener); + } + + /** + * Demonstrate TimeoutExceptionInjector semantics -- triggering fires exception and completes + * the timer. + */ + @Test(timeout = 1000) + public void testStartAfterTrigger() throws InterruptedException { + final long time = 10; + ForeignExceptionListener listener = Mockito.mock(ForeignExceptionListener.class); + TimeoutExceptionInjector timer = new TimeoutExceptionInjector(listener, time); + timer.trigger(); + try { + timer.start(); + fail("Timer should fail to start after complete."); + } catch (IllegalStateException e) { + LOG.debug("Correctly failed timer: " + e.getMessage()); + } + Thread.sleep(time * 2); + Mockito.verify(listener, Mockito.times(1)).receive(Mockito.any(ForeignException.class)); + Mockito.verifyNoMoreInteractions(listener); + } +} Index: src/test/java/org/apache/hadoop/hbase/errorhandling/TestForeignExceptionSerialization.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/errorhandling/TestForeignExceptionSerialization.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/errorhandling/TestForeignExceptionSerialization.java (revision 0) @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.google.protobuf.InvalidProtocolBufferException; + +/** + * Test that we correctly serialize exceptions from a remote source + */ +@Category(SmallTests.class) +public class TestForeignExceptionSerialization { + private static final String srcName = "someNode"; + + /** + * Verify that we get back similar stack trace information before an after serialization. + * @throws InvalidProtocolBufferException + */ + @Test + public void testSimpleException() throws InvalidProtocolBufferException { + String data = "some bytes"; + ForeignException in = new ForeignException("SRC", new IllegalArgumentException(data)); + // check that we get the data back out + ForeignException e = ForeignException.deserialize(ForeignException.serialize(srcName, in)); + assertNotNull(e); + + // now check that we get the right stack trace + StackTraceElement elem = new StackTraceElement(this.getClass().toString(), "method", "file", 1); + in.setStackTrace(new StackTraceElement[] { elem }); + e = ForeignException.deserialize(ForeignException.serialize(srcName, in)); + + assertNotNull(e); + assertEquals("Stack trace got corrupted", elem, e.getCause().getStackTrace()[0]); + assertEquals("Got an unexpectedly long stack trace", 1, e.getCause().getStackTrace().length); + } + + /** + * Compare that a generic exception's stack trace has the same stack trace elements after + * serialization and deserialization + * @throws InvalidProtocolBufferException + */ + @Test + public void testRemoteFromLocal() throws InvalidProtocolBufferException { + String errorMsg = "some message"; + Exception generic = new Exception(errorMsg); + generic.printStackTrace(); + assertTrue(generic.getMessage().contains(errorMsg)); + + ForeignException e = ForeignException.deserialize(ForeignException.serialize(srcName, generic)); + assertArrayEquals("Local stack trace got corrupted", generic.getStackTrace(), e.getCause().getStackTrace()); + + e.printStackTrace(); // should have ForeignException and source node in it. + assertTrue(e.getCause().getCause() == null); + + // verify that original error message is present in Foreign exception message + assertTrue(e.getCause().getMessage().contains(errorMsg)); + } + +} Index: src/test/java/org/apache/hadoop/hbase/procedure/TestProcedureMember.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/procedure/TestProcedureMember.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/procedure/TestProcedureMember.java (revision 0) @@ -0,0 +1,444 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyZeroInteractions; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.concurrent.ThreadPoolExecutor; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.errorhandling.TimeoutException; +import org.apache.hadoop.hbase.procedure.Subprocedure.SubprocedureImpl; +import org.junit.After; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.InOrder; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +/** + * Test the procedure member, and it's error handling mechanisms. + */ +@Category(SmallTests.class) +public class TestProcedureMember { + private static final long WAKE_FREQUENCY = 100; + private static final long TIMEOUT = 100000; + private static final long POOL_KEEP_ALIVE = 1; + + private final String op = "some op"; + private final byte[] data = new byte[0]; + private final ForeignExceptionDispatcher mockListener = Mockito + .spy(new ForeignExceptionDispatcher()); + private final SubprocedureFactory mockBuilder = mock(SubprocedureFactory.class); + private final ProcedureMemberRpcs mockMemberComms = Mockito + .mock(ProcedureMemberRpcs.class); + private ProcedureMember member; + private ForeignExceptionDispatcher dispatcher; + Subprocedure spySub; + + /** + * Reset all the mock objects + */ + @After + public void resetTest() { + reset(mockListener, mockBuilder, mockMemberComms); + if (member != null) + try { + member.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * Build a member using the class level mocks + * @return member to use for tests + */ + private ProcedureMember buildCohortMember() { + String name = "node"; + ThreadPoolExecutor pool = ProcedureMember.defaultPool(WAKE_FREQUENCY, POOL_KEEP_ALIVE, 1, name); + return new ProcedureMember(mockMemberComms, pool, mockBuilder); + } + + /** + * Setup a procedure member that returns the spied-upon {@link Subprocedure}. + */ + private void buildCohortMemberPair() throws IOException { + dispatcher = new ForeignExceptionDispatcher(); + String name = "node"; + ThreadPoolExecutor pool = ProcedureMember.defaultPool(WAKE_FREQUENCY, POOL_KEEP_ALIVE, 1, name); + member = new ProcedureMember(mockMemberComms, pool, mockBuilder); + when(mockMemberComms.getMemberName()).thenReturn("membername"); // needed for generating exception + Subprocedure subproc = new EmptySubprocedure(member, dispatcher); + spySub = spy(subproc); + when(mockBuilder.buildSubprocedure(op, data)).thenReturn(spySub); + addCommitAnswer(); + } + + + /** + * Add a 'in barrier phase' response to the mock controller when it gets a acquired notification + */ + private void addCommitAnswer() throws IOException { + doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + member.receivedReachedGlobalBarrier(op); + return null; + } + }).when(mockMemberComms).sendMemberAcquired(any(Subprocedure.class)); + } + + /** + * Test the normal sub procedure execution case. + */ + @Test(timeout = 500) + public void testSimpleRun() throws Exception { + member = buildCohortMember(); + EmptySubprocedure subproc = new EmptySubprocedure(member, mockListener); + EmptySubprocedure spy = spy(subproc); + when(mockBuilder.buildSubprocedure(op, data)).thenReturn(spy); + + // when we get a prepare, then start the commit phase + addCommitAnswer(); + + // run the operation + // build a new operation + Subprocedure subproc1 = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc1); + // and wait for it to finish + subproc.waitForLocallyCompleted(); + + // make sure everything ran in order + InOrder order = inOrder(mockMemberComms, spy); + order.verify(spy).acquireBarrier(); + order.verify(mockMemberComms).sendMemberAcquired(eq(spy)); + order.verify(spy).insideBarrier(); + order.verify(mockMemberComms).sendMemberCompleted(eq(spy)); + order.verify(mockMemberComms, never()).sendMemberAborted(eq(spy), + any(ForeignException.class)); + } + + /** + * Make sure we call cleanup etc, when we have an exception during + * {@link Subprocedure#acquireBarrier()}. + */ + @Test(timeout = 1000) + public void testMemberPrepareException() throws Exception { + buildCohortMemberPair(); + + // mock an exception on Subprocedure's prepare + doAnswer( + new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + throw new IOException("Forced IOException in member acquireBarrier"); + } + }).when(spySub).acquireBarrier(); + + // run the operation + // build a new operation + Subprocedure subproc = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc); + // if the operation doesn't die properly, then this will timeout + member.closeAndWait(TIMEOUT); + + // make sure everything ran in order + InOrder order = inOrder(mockMemberComms, spySub); + order.verify(spySub).acquireBarrier(); + // Later phases not run + order.verify(mockMemberComms, never()).sendMemberAcquired(eq(spySub)); + order.verify(spySub, never()).insideBarrier(); + order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub)); + // error recovery path exercised + order.verify(spySub).cancel(anyString(), any(Exception.class)); + order.verify(spySub).cleanup(any(Exception.class)); + } + + /** + * Make sure we call cleanup etc, when we have an exception during prepare. + */ + @Test(timeout = 1000) + public void testSendMemberAcquiredCommsFailure() throws Exception { + buildCohortMemberPair(); + + // mock an exception on Subprocedure's prepare + doAnswer( + new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + throw new IOException("Forced IOException in memeber prepare"); + } + }).when(mockMemberComms).sendMemberAcquired(any(Subprocedure.class)); + + // run the operation + // build a new operation + Subprocedure subproc = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc); + // if the operation doesn't die properly, then this will timeout + member.closeAndWait(TIMEOUT); + + // make sure everything ran in order + InOrder order = inOrder(mockMemberComms, spySub); + order.verify(spySub).acquireBarrier(); + order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); + + // Later phases not run + order.verify(spySub, never()).insideBarrier(); + order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub)); + // error recovery path exercised + order.verify(spySub).cancel(anyString(), any(Exception.class)); + order.verify(spySub).cleanup(any(Exception.class)); + } + + /** + * Fail correctly if coordinator aborts the procedure. The subprocedure will not interrupt a + * running {@link Subprocedure#prepare} -- prepare needs to finish first, and the the abort + * is checked. Thus, the {@link Subprocedure#prepare} should succeed but later get rolled back + * via {@link Subprocedure#cleanup}. + */ + @Test(timeout = 1000) + public void testCoordinatorAbort() throws Exception { + buildCohortMemberPair(); + + // mock that another node timed out or failed to prepare + final TimeoutException oate = new TimeoutException("bogus timeout", 1,2,0); + doAnswer( + new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + // inject a remote error (this would have come from an external thread) + spySub.cancel("bogus message", oate); + // sleep the wake frequency since that is what we promised + Thread.sleep(WAKE_FREQUENCY); + return null; + } + }).when(spySub).waitForReachedGlobalBarrier(); + + // run the operation + // build a new operation + Subprocedure subproc = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc); + // if the operation doesn't die properly, then this will timeout + member.closeAndWait(TIMEOUT); + + // make sure everything ran in order + InOrder order = inOrder(mockMemberComms, spySub); + order.verify(spySub).acquireBarrier(); + order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); + // Later phases not run + order.verify(spySub, never()).insideBarrier(); + order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub)); + // error recovery path exercised + order.verify(spySub).cancel(anyString(), any(Exception.class)); + order.verify(spySub).cleanup(any(Exception.class)); + } + + /** + * Handle failures if a member's commit phase fails. + * + * NOTE: This is the core difference that makes this different from traditional 2PC. In true + * 2PC the transaction is committed just before the coordinator sends commit messages to the + * member. Members are then responsible for reading its TX log. This implementation actually + * rolls back, and thus breaks the normal TX guarantees. + */ + @Test(timeout = 1000) + public void testMemberCommitException() throws Exception { + buildCohortMemberPair(); + + // mock an exception on Subprocedure's prepare + doAnswer( + new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + throw new IOException("Forced IOException in memeber prepare"); + } + }).when(spySub).insideBarrier(); + + // run the operation + // build a new operation + Subprocedure subproc = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc); + // if the operation doesn't die properly, then this will timeout + member.closeAndWait(TIMEOUT); + + // make sure everything ran in order + InOrder order = inOrder(mockMemberComms, spySub); + order.verify(spySub).acquireBarrier(); + order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); + order.verify(spySub).insideBarrier(); + + // Later phases not run + order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub)); + // error recovery path exercised + order.verify(spySub).cancel(anyString(), any(Exception.class)); + order.verify(spySub).cleanup(any(Exception.class)); + } + + /** + * Handle Failures if a member's commit phase succeeds but notification to coordinator fails + * + * NOTE: This is the core difference that makes this different from traditional 2PC. In true + * 2PC the transaction is committed just before the coordinator sends commit messages to the + * member. Members are then responsible for reading its TX log. This implementation actually + * rolls back, and thus breaks the normal TX guarantees. + */ + @Test(timeout = 1000) + public void testMemberCommitCommsFailure() throws Exception { + buildCohortMemberPair(); + final TimeoutException oate = new TimeoutException("bogus timeout",1,2,0); + doAnswer( + new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + // inject a remote error (this would have come from an external thread) + spySub.cancel("commit comms fail", oate); + // sleep the wake frequency since that is what we promised + Thread.sleep(WAKE_FREQUENCY); + return null; + } + }).when(mockMemberComms).sendMemberCompleted(any(Subprocedure.class)); + + // run the operation + // build a new operation + Subprocedure subproc = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc); + // if the operation doesn't die properly, then this will timeout + member.closeAndWait(TIMEOUT); + + // make sure everything ran in order + InOrder order = inOrder(mockMemberComms, spySub); + order.verify(spySub).acquireBarrier(); + order.verify(mockMemberComms).sendMemberAcquired(eq(spySub)); + order.verify(spySub).insideBarrier(); + order.verify(mockMemberComms).sendMemberCompleted(eq(spySub)); + // error recovery path exercised + order.verify(spySub).cancel(anyString(), any(Exception.class)); + order.verify(spySub).cleanup(any(Exception.class)); + } + + /** + * Fail correctly on getting an external error while waiting for the prepared latch + * @throws Exception on failure + */ + @Test(timeout = 1000) + public void testPropagateConnectionErrorBackToManager() throws Exception { + // setup the operation + member = buildCohortMember(); + ProcedureMember memberSpy = spy(member); + + // setup the commit and the spy + final ForeignExceptionDispatcher dispatcher = new ForeignExceptionDispatcher(); + ForeignExceptionDispatcher dispSpy = spy(dispatcher); + Subprocedure commit = new EmptySubprocedure(member, dispatcher); + Subprocedure spy = spy(commit); + when(mockBuilder.buildSubprocedure(op, data)).thenReturn(spy); + + // fail during the prepare phase + doThrow(new ForeignException("SRC", "prepare exception")).when(spy).acquireBarrier(); + // and throw a connection error when we try to tell the controller about it + doThrow(new IOException("Controller is down!")).when(mockMemberComms) + .sendMemberAborted(eq(spy), any(ForeignException.class)); + + + // run the operation + // build a new operation + Subprocedure subproc = memberSpy.createSubprocedure(op, data); + memberSpy.submitSubprocedure(subproc); + // if the operation doesn't die properly, then this will timeout + memberSpy.closeAndWait(TIMEOUT); + + // make sure everything ran in order + InOrder order = inOrder(mockMemberComms, spy, dispSpy); + // make sure we acquire. + order.verify(spy).acquireBarrier(); + order.verify(mockMemberComms, never()).sendMemberAcquired(spy); + + // TODO Need to do another refactor to get this to propagate to the coordinator. + // make sure we pass a remote exception back the controller +// order.verify(mockMemberComms).sendMemberAborted(eq(spy), +// any(ExternalException.class)); +// order.verify(dispSpy).receiveError(anyString(), +// any(ExternalException.class), any()); + } + + /** + * Test that the cohort member correctly doesn't attempt to start a task when the builder cannot + * correctly build a new task for the requested operation + * @throws Exception on failure + */ + @Test + public void testNoTaskToBeRunFromRequest() throws Exception { + ThreadPoolExecutor pool = mock(ThreadPoolExecutor.class); + when(mockBuilder.buildSubprocedure(op, data)).thenReturn(null) + .thenThrow(new IllegalStateException("Wrong state!"), new IllegalArgumentException("can't understand the args")); + member = new ProcedureMember(mockMemberComms, pool, mockBuilder); + // builder returns null + // build a new operation + Subprocedure subproc = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc); + // throws an illegal state exception + try { + // build a new operation + Subprocedure subproc2 = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc2); + } catch (IllegalStateException ise) { + } + // throws an illegal argument exception + try { + // build a new operation + Subprocedure subproc3 = member.createSubprocedure(op, data); + member.submitSubprocedure(subproc3); + } catch (IllegalArgumentException iae) { + } + + // no request should reach the pool + verifyZeroInteractions(pool); + // get two abort requests + // TODO Need to do another refactor to get this to propagate to the coordinator. + // verify(mockMemberComms, times(2)).sendMemberAborted(any(Subprocedure.class), any(ExternalException.class)); + } + + /** + * Helper {@link Procedure} who's phase for each step is just empty + */ + public class EmptySubprocedure extends SubprocedureImpl { + public EmptySubprocedure(ProcedureMember member, ForeignExceptionDispatcher dispatcher) { + super( member, op, dispatcher, + // TODO 1000000 is an arbitrary number that I picked. + WAKE_FREQUENCY, TIMEOUT); + } + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/procedure/TestZKProcedureControllers.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/procedure/TestZKProcedureControllers.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/procedure/TestZKProcedureControllers.java (revision 0) @@ -0,0 +1,429 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import org.mockito.verification.VerificationMode; + +import com.google.common.collect.Lists; + +/** + * Test zookeeper-based, procedure controllers + */ +@Category(MediumTests.class) +public class TestZKProcedureControllers { + + static final Log LOG = LogFactory.getLog(TestZKProcedureControllers.class); + private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final String COHORT_NODE_NAME = "expected"; + private static final String CONTROLLER_NODE_NAME = "controller"; + private static final VerificationMode once = Mockito.times(1); + + @BeforeClass + public static void setupTest() throws Exception { + UTIL.startMiniZKCluster(); + } + + @AfterClass + public static void cleanupTest() throws Exception { + UTIL.shutdownMiniZKCluster(); + } + + /** + * Smaller test to just test the actuation on the cohort member + * @throws Exception on failure + */ + @Test(timeout = 15000) + public void testSimpleZKCohortMemberController() throws Exception { + ZooKeeperWatcher watcher = HBaseTestingUtility.getZooKeeperWatcher(UTIL); + final String operationName = "instanceTest"; + + final Subprocedure sub = Mockito.mock(Subprocedure.class); + Mockito.when(sub.getName()).thenReturn(operationName); + + final byte[] data = new byte[] { 1, 2, 3 }; + final CountDownLatch prepared = new CountDownLatch(1); + final CountDownLatch committed = new CountDownLatch(1); + + final ForeignExceptionDispatcher monitor = spy(new ForeignExceptionDispatcher()); + final ZKProcedureMemberRpcs controller = new ZKProcedureMemberRpcs( + watcher, "testSimple", COHORT_NODE_NAME); + + // mock out cohort member callbacks + final ProcedureMember member = Mockito + .mock(ProcedureMember.class); + Mockito.doReturn(sub).when(member).createSubprocedure(operationName, data); + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + controller.sendMemberAcquired(sub); + prepared.countDown(); + return null; + } + }).when(member).submitSubprocedure(sub); + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + controller.sendMemberCompleted(sub); + committed.countDown(); + return null; + } + }).when(member).receivedReachedGlobalBarrier(operationName); + + // start running the listener + controller.start(member); + + // set a prepare node from a 'coordinator' + String prepare = ZKProcedureUtil.getAcquireBarrierNode(controller.getZkController(), operationName); + ZKUtil.createSetData(watcher, prepare, ProtobufUtil.prependPBMagic(data)); + // wait for the operation to be prepared + prepared.await(); + + // create the commit node so we update the operation to enter the commit phase + String commit = ZKProcedureUtil.getReachedBarrierNode(controller.getZkController(), operationName); + LOG.debug("Found prepared, posting commit node:" + commit); + ZKUtil.createAndFailSilent(watcher, commit); + LOG.debug("Commit node:" + commit + ", exists:" + ZKUtil.checkExists(watcher, commit)); + committed.await(); + + verify(monitor, never()).receive(Mockito.any(ForeignException.class)); + // XXX: broken due to composition. +// verify(member, never()).getManager().controllerConnectionFailure(Mockito.anyString(), +// Mockito.any(IOException.class)); + // cleanup after the test + ZKUtil.deleteNodeRecursively(watcher, controller.getZkController().getBaseZnode()); + assertEquals("Didn't delete prepare node", -1, ZKUtil.checkExists(watcher, prepare)); + assertEquals("Didn't delete commit node", -1, ZKUtil.checkExists(watcher, commit)); + } + + @Test(timeout = 15000) + public void testZKCoordinatorControllerWithNoCohort() throws Exception { + final String operationName = "no cohort controller test"; + final byte[] data = new byte[] { 1, 2, 3 }; + + runMockCommitWithOrchestratedControllers(startCoordinatorFirst, operationName, data); + runMockCommitWithOrchestratedControllers(startCohortFirst, operationName, data); + } + + @Test(timeout = 15000) + public void testZKCoordinatorControllerWithSingleMemberCohort() throws Exception { + final String operationName = "single member controller test"; + final byte[] data = new byte[] { 1, 2, 3 }; + + runMockCommitWithOrchestratedControllers(startCoordinatorFirst, operationName, data, "cohort"); + runMockCommitWithOrchestratedControllers(startCohortFirst, operationName, data, "cohort"); + } + + @Test(timeout = 15000) + public void testZKCoordinatorControllerMultipleCohort() throws Exception { + final String operationName = "multi member controller test"; + final byte[] data = new byte[] { 1, 2, 3 }; + + runMockCommitWithOrchestratedControllers(startCoordinatorFirst, operationName, data, "cohort", + "cohort2", "cohort3"); + runMockCommitWithOrchestratedControllers(startCohortFirst, operationName, data, "cohort", + "cohort2", "cohort3"); + } + + private void runMockCommitWithOrchestratedControllers(StartControllers controllers, + String operationName, byte[] data, String... cohort) throws Exception { + ZooKeeperWatcher watcher = HBaseTestingUtility.getZooKeeperWatcher(UTIL); + List expected = Lists.newArrayList(cohort); + + final Subprocedure sub = Mockito.mock(Subprocedure.class); + Mockito.when(sub.getName()).thenReturn(operationName); + + CountDownLatch prepared = new CountDownLatch(expected.size()); + CountDownLatch committed = new CountDownLatch(expected.size()); + // mock out coordinator so we can keep track of zk progress + ProcedureCoordinator coordinator = setupMockCoordinator(operationName, + prepared, committed); + + ProcedureMember member = Mockito.mock(ProcedureMember.class); + + Pair> pair = controllers + .start(watcher, operationName, coordinator, CONTROLLER_NODE_NAME, member, expected); + ZKProcedureCoordinatorRpcs controller = pair.getFirst(); + List cohortControllers = pair.getSecond(); + // start the operation + Procedure p = Mockito.mock(Procedure.class); + Mockito.when(p.getName()).thenReturn(operationName); + + controller.sendGlobalBarrierAcquire(p, data, expected); + + // post the prepare node for each expected node + for (ZKProcedureMemberRpcs cc : cohortControllers) { + cc.sendMemberAcquired(sub); + } + + // wait for all the notifications to reach the coordinator + prepared.await(); + // make sure we got the all the nodes and no more + Mockito.verify(coordinator, times(expected.size())).memberAcquiredBarrier(Mockito.eq(operationName), + Mockito.anyString()); + + // kick off the commit phase + controller.sendGlobalBarrierReached(p, expected); + + // post the committed node for each expected node + for (ZKProcedureMemberRpcs cc : cohortControllers) { + cc.sendMemberCompleted(sub); + } + + // wait for all commit notifications to reach the coordinator + committed.await(); + // make sure we got the all the nodes and no more + Mockito.verify(coordinator, times(expected.size())).memberFinishedBarrier(Mockito.eq(operationName), + Mockito.anyString()); + + controller.resetMembers(p); + + // verify all behavior + verifyZooKeeperClean(operationName, watcher, controller.getZkProcedureUtil()); + verifyCohort(member, cohortControllers.size(), operationName, data); + verifyCoordinator(operationName, coordinator, expected); + } + + // TODO Broken by composition. +// @Test +// public void testCoordinatorControllerHandlesEarlyPrepareNodes() throws Exception { +// runEarlyPrepareNodes(startCoordinatorFirst, "testEarlyPreparenodes", new byte[] { 1, 2, 3 }, +// "cohort1", "cohort2"); +// runEarlyPrepareNodes(startCohortFirst, "testEarlyPreparenodes", new byte[] { 1, 2, 3 }, +// "cohort1", "cohort2"); +// } + + public void runEarlyPrepareNodes(StartControllers controllers, String operationName, byte[] data, + String... cohort) throws Exception { + ZooKeeperWatcher watcher = HBaseTestingUtility.getZooKeeperWatcher(UTIL); + List expected = Lists.newArrayList(cohort); + + final Subprocedure sub = Mockito.mock(Subprocedure.class); + Mockito.when(sub.getName()).thenReturn(operationName); + + final CountDownLatch prepared = new CountDownLatch(expected.size()); + final CountDownLatch committed = new CountDownLatch(expected.size()); + // mock out coordinator so we can keep track of zk progress + ProcedureCoordinator coordinator = setupMockCoordinator(operationName, + prepared, committed); + + ProcedureMember member = Mockito.mock(ProcedureMember.class); + Procedure p = Mockito.mock(Procedure.class); + Mockito.when(p.getName()).thenReturn(operationName); + + Pair> pair = controllers + .start(watcher, operationName, coordinator, CONTROLLER_NODE_NAME, member, expected); + ZKProcedureCoordinatorRpcs controller = pair.getFirst(); + List cohortControllers = pair.getSecond(); + + // post 1/2 the prepare nodes early + for (int i = 0; i < cohortControllers.size() / 2; i++) { + cohortControllers.get(i).sendMemberAcquired(sub); + } + + // start the operation + controller.sendGlobalBarrierAcquire(p, data, expected); + + // post the prepare node for each expected node + for (ZKProcedureMemberRpcs cc : cohortControllers) { + cc.sendMemberAcquired(sub); + } + + // wait for all the notifications to reach the coordinator + prepared.await(); + // make sure we got the all the nodes and no more + Mockito.verify(coordinator, times(expected.size())).memberAcquiredBarrier(Mockito.eq(operationName), + Mockito.anyString()); + + // kick off the commit phase + controller.sendGlobalBarrierReached(p, expected); + + // post the committed node for each expected node + for (ZKProcedureMemberRpcs cc : cohortControllers) { + cc.sendMemberCompleted(sub); + } + + // wait for all commit notifications to reach the coordiantor + committed.await(); + // make sure we got the all the nodes and no more + Mockito.verify(coordinator, times(expected.size())).memberFinishedBarrier(Mockito.eq(operationName), + Mockito.anyString()); + + controller.resetMembers(p); + + // verify all behavior + verifyZooKeeperClean(operationName, watcher, controller.getZkProcedureUtil()); + verifyCohort(member, cohortControllers.size(), operationName, data); + verifyCoordinator(operationName, coordinator, expected); + } + + /** + * @return a mock {@link ProcedureCoordinator} that just counts down the + * prepared and committed latch for called to the respective method + */ + private ProcedureCoordinator setupMockCoordinator(String operationName, + final CountDownLatch prepared, final CountDownLatch committed) { + ProcedureCoordinator coordinator = Mockito + .mock(ProcedureCoordinator.class); + Mockito.mock(ProcedureCoordinator.class); + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + prepared.countDown(); + return null; + } + }).when(coordinator).memberAcquiredBarrier(Mockito.eq(operationName), Mockito.anyString()); + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + committed.countDown(); + return null; + } + }).when(coordinator).memberFinishedBarrier(Mockito.eq(operationName), Mockito.anyString()); + return coordinator; + } + + /** + * Verify that the prepare, commit and abort nodes for the operation are removed from zookeeper + */ + private void verifyZooKeeperClean(String operationName, ZooKeeperWatcher watcher, + ZKProcedureUtil controller) throws Exception { + String prepare = ZKProcedureUtil.getAcquireBarrierNode(controller, operationName); + String commit = ZKProcedureUtil.getReachedBarrierNode(controller, operationName); + String abort = ZKProcedureUtil.getAbortNode(controller, operationName); + assertEquals("Didn't delete prepare node", -1, ZKUtil.checkExists(watcher, prepare)); + assertEquals("Didn't delete commit node", -1, ZKUtil.checkExists(watcher, commit)); + assertEquals("Didn't delete abort node", -1, ZKUtil.checkExists(watcher, abort)); + } + + /** + * Verify the cohort controller got called once per expected node to start the operation + */ + private void verifyCohort(ProcedureMember member, int cohortSize, + String operationName, byte[] data) { +// verify(member, Mockito.times(cohortSize)).submitSubprocedure(Mockito.eq(operationName), +// (byte[]) Mockito.argThat(new ArrayEquals(data))); + verify(member, Mockito.times(cohortSize)).submitSubprocedure(Mockito.any(Subprocedure.class)); + + } + + /** + * Verify that the coordinator only got called once for each expected node + */ + private void verifyCoordinator(String operationName, + ProcedureCoordinator coordinator, List expected) { + // verify that we got all the expected nodes + for (String node : expected) { + verify(coordinator, once).memberAcquiredBarrier(operationName, node); + verify(coordinator, once).memberFinishedBarrier(operationName, node); + } + } + + /** + * Specify how the controllers that should be started (not spy/mockable) for the test. + */ + private abstract class StartControllers { + public abstract Pair> start( + ZooKeeperWatcher watcher, String operationName, + ProcedureCoordinator coordinator, String controllerName, + ProcedureMember member, List cohortNames) throws Exception; + } + + private final StartControllers startCoordinatorFirst = new StartControllers() { + + @Override + public Pair> start( + ZooKeeperWatcher watcher, String operationName, + ProcedureCoordinator coordinator, String controllerName, + ProcedureMember member, List expected) throws Exception { + // start the controller + ZKProcedureCoordinatorRpcs controller = new ZKProcedureCoordinatorRpcs( + watcher, operationName, CONTROLLER_NODE_NAME); + controller.start(coordinator); + + // make a cohort controller for each expected node + + List cohortControllers = new ArrayList(); + for (String nodeName : expected) { + ZKProcedureMemberRpcs cc = new ZKProcedureMemberRpcs( + watcher, operationName, nodeName); + cc.start(member); + cohortControllers.add(cc); + } + return new Pair>( + controller, cohortControllers); + } + }; + + /** + * Check for the possible race condition where a cohort member starts after the controller and + * therefore could miss a new operation + */ + private final StartControllers startCohortFirst = new StartControllers() { + + @Override + public Pair> start( + ZooKeeperWatcher watcher, String operationName, + ProcedureCoordinator coordinator, String controllerName, + ProcedureMember member, List expected) throws Exception { + + // make a cohort controller for each expected node + List cohortControllers = new ArrayList(); + for (String nodeName : expected) { + ZKProcedureMemberRpcs cc = new ZKProcedureMemberRpcs( + watcher, operationName, nodeName); + cc.start(member); + cohortControllers.add(cc); + } + + // start the controller + ZKProcedureCoordinatorRpcs controller = new ZKProcedureCoordinatorRpcs( + watcher, operationName, CONTROLLER_NODE_NAME); + controller.start(coordinator); + + return new Pair>( + controller, cohortControllers); + } + }; +} Index: src/test/java/org/apache/hadoop/hbase/procedure/TestProcedure.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/procedure/TestProcedure.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/procedure/TestProcedure.java (revision 0) @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CountDownLatch; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Demonstrate how Procedure handles single members, multiple members, and errors semantics + */ +@Category(SmallTests.class) +public class TestProcedure { + + ProcedureCoordinator coord; + + @Before + public void setup() { + coord = mock(ProcedureCoordinator.class); + final ProcedureCoordinatorRpcs comms = mock(ProcedureCoordinatorRpcs.class); + when(coord.getRpcs()).thenReturn(comms); // make it not null + } + + class LatchedProcedure extends Procedure { + CountDownLatch startedAcquireBarrier = new CountDownLatch(1); + CountDownLatch startedDuringBarrier = new CountDownLatch(1); + CountDownLatch completedProcedure = new CountDownLatch(1); + + public LatchedProcedure(ProcedureCoordinator coord, ForeignExceptionDispatcher monitor, + long wakeFreq, long timeout, String opName, byte[] data, + List expectedMembers) { + super(coord, monitor, wakeFreq, timeout, opName, data, expectedMembers); + } + + @Override + public void sendGlobalBarrierStart() { + startedAcquireBarrier.countDown(); + } + + @Override + public void sendGlobalBarrierReached() { + startedDuringBarrier.countDown(); + } + + @Override + public void sendGlobalBarrierComplete() { + completedProcedure.countDown(); + } + }; + + /** + * With a single member, verify ordered execution. The Coordinator side is run in a separate + * thread so we can only trigger from members and wait for particular state latches. + */ + @Test(timeout = 1000) + public void testSingleMember() throws Exception { + // The member + List members = new ArrayList(); + members.add("member"); + LatchedProcedure proc = new LatchedProcedure(coord, new ForeignExceptionDispatcher(), 100, + Integer.MAX_VALUE, "op", null, members); + final LatchedProcedure procspy = spy(proc); + // coordinator: start the barrier procedure + new Thread() { + public void run() { + procspy.call(); + } + }.start(); + + // coordinator: wait for the barrier to be acquired, then send start barrier + proc.startedAcquireBarrier.await(); + + // we only know that {@link Procedure#sendStartBarrier()} was called, and others are blocked. + verify(procspy).sendGlobalBarrierStart(); + verify(procspy, never()).sendGlobalBarrierReached(); + verify(procspy, never()).sendGlobalBarrierComplete(); + verify(procspy, never()).barrierAcquiredByMember(anyString()); + + // member: trigger global barrier acquisition + proc.barrierAcquiredByMember(members.get(0)); + + // coordinator: wait for global barrier to be acquired. + proc.acquiredBarrierLatch.await(); + verify(procspy).sendGlobalBarrierStart(); // old news + + // since two threads, we cannot guarantee that {@link Procedure#sendSatsifiedBarrier()} was + // or was not called here. + + // member: trigger global barrier release + proc.barrierReleasedByMember(members.get(0)); + + // coordinator: wait for procedure to be completed + proc.completedProcedure.await(); + verify(procspy).sendGlobalBarrierReached(); + verify(procspy).sendGlobalBarrierComplete(); + verify(procspy, never()).receive(any(ForeignException.class)); + } + + @Test(timeout=1000) + public void testMultipleMember() throws Exception { + // 2 members + List members = new ArrayList(); + members.add("member1"); + members.add("member2"); + + LatchedProcedure proc = new LatchedProcedure(coord, new ForeignExceptionDispatcher(), 100, + Integer.MAX_VALUE, "op", null, members); + final LatchedProcedure procspy = spy(proc); + // start the barrier procedure + new Thread() { + public void run() { + procspy.call(); + } + }.start(); + + // coordinator: wait for the barrier to be acquired, then send start barrier + procspy.startedAcquireBarrier.await(); + + // we only know that {@link Procedure#sendStartBarrier()} was called, and others are blocked. + verify(procspy).sendGlobalBarrierStart(); + verify(procspy, never()).sendGlobalBarrierReached(); + verify(procspy, never()).sendGlobalBarrierComplete(); + verify(procspy, never()).barrierAcquiredByMember(anyString()); // no externals + + // member0: [1/2] trigger global barrier acquisition. + procspy.barrierAcquiredByMember(members.get(0)); + + // coordinator not satisified. + verify(procspy).sendGlobalBarrierStart(); + verify(procspy, never()).sendGlobalBarrierReached(); + verify(procspy, never()).sendGlobalBarrierComplete(); + + // member 1: [2/2] trigger global barrier acquisition. + procspy.barrierAcquiredByMember(members.get(1)); + + // coordinator: wait for global barrier to be acquired. + procspy.startedDuringBarrier.await(); + verify(procspy).sendGlobalBarrierStart(); // old news + + // member 1, 2: trigger global barrier release + procspy.barrierReleasedByMember(members.get(0)); + procspy.barrierReleasedByMember(members.get(1)); + + // coordinator wait for procedure to be completed + procspy.completedProcedure.await(); + verify(procspy).sendGlobalBarrierReached(); + verify(procspy).sendGlobalBarrierComplete(); + verify(procspy, never()).receive(any(ForeignException.class)); + } + + @Test(timeout = 1000) + public void testErrorPropagation() throws Exception { + List members = new ArrayList(); + members.add("member"); + Procedure proc = new Procedure(coord, new ForeignExceptionDispatcher(), 100, + Integer.MAX_VALUE, "op", null, members); + final Procedure procspy = spy(proc); + + ForeignException cause = new ForeignException("SRC", "External Exception"); + proc.receive(cause); + + // start the barrier procedure + Thread t = new Thread() { + public void run() { + procspy.call(); + } + }; + t.start(); + t.join(); + + verify(procspy, never()).sendGlobalBarrierStart(); + verify(procspy, never()).sendGlobalBarrierReached(); + verify(procspy).sendGlobalBarrierComplete(); + } + + @Test(timeout = 1000) + public void testBarrieredErrorPropagation() throws Exception { + List members = new ArrayList(); + members.add("member"); + LatchedProcedure proc = new LatchedProcedure(coord, new ForeignExceptionDispatcher(), 100, + Integer.MAX_VALUE, "op", null, members); + final LatchedProcedure procspy = spy(proc); + + // start the barrier procedure + Thread t = new Thread() { + public void run() { + procspy.call(); + } + }; + t.start(); + + // now test that we can put an error in before the commit phase runs + procspy.startedAcquireBarrier.await(); + ForeignException cause = new ForeignException("SRC", "External Exception"); + procspy.receive(cause); + procspy.barrierAcquiredByMember(members.get(0)); + t.join(); + + // verify state of all the object + verify(procspy).sendGlobalBarrierStart(); + verify(procspy).sendGlobalBarrierComplete(); + verify(procspy, never()).sendGlobalBarrierReached(); + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/procedure/TestProcedureCoordinator.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/procedure/TestProcedureCoordinator.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/procedure/TestProcedureCoordinator.java (revision 0) @@ -0,0 +1,349 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyListOf; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ThreadPoolExecutor; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.junit.After; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.InOrder; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; + +import com.google.common.collect.Lists; + +/** + * Test Procedure coordinator operation. + *

+ * This only works correctly when we do class level parallelization of tests. If we do method + * level serialization this class will likely throw all kinds of errors. + */ +@Category(SmallTests.class) +public class TestProcedureCoordinator { + // general test constants + private static final long WAKE_FREQUENCY = 1000; + private static final long TIMEOUT = 100000; + private static final long POOL_KEEP_ALIVE = 1; + private static final String nodeName = "node"; + private static final String procName = "some op"; + private static final byte[] procData = new byte[0]; + private static final List expected = Lists.newArrayList("remote1", "remote2"); + + // setup the mocks + private final ProcedureCoordinatorRpcs controller = mock(ProcedureCoordinatorRpcs.class); + private final Procedure task = mock(Procedure.class); + private final ForeignExceptionDispatcher monitor = mock(ForeignExceptionDispatcher.class); + + // handle to the coordinator for each test + private ProcedureCoordinator coordinator; + + @After + public void resetTest() throws IOException { + // reset all the mocks used for the tests + reset(controller, task, monitor); + // close the open coordinator, if it was used + if (coordinator != null) coordinator.close(); + } + + private ProcedureCoordinator buildNewCoordinator() { + ThreadPoolExecutor pool = ProcedureCoordinator.defaultPool(nodeName, POOL_KEEP_ALIVE, 1, WAKE_FREQUENCY); + return spy(new ProcedureCoordinator(controller, pool)); + } + + /** + * Currently we can only handle one procedure at a time. This makes sure we handle that and + * reject submitting more. + */ + @Test + public void testThreadPoolSize() throws Exception { + ProcedureCoordinator coordinator = buildNewCoordinator(); + Procedure proc = new Procedure(coordinator, monitor, + WAKE_FREQUENCY, TIMEOUT, procName, procData, expected); + Procedure procSpy = spy(proc); + + Procedure proc2 = new Procedure(coordinator, monitor, + WAKE_FREQUENCY, TIMEOUT, procName +"2", procData, expected); + Procedure procSpy2 = spy(proc2); + when(coordinator.createProcedure(any(ForeignExceptionDispatcher.class), eq(procName), eq(procData), anyListOf(String.class))) + .thenReturn(procSpy, procSpy2); + + coordinator.startProcedure(procSpy.getErrorMonitor(), procName, procData, expected); + // null here means second procedure failed to start. + assertNull("Coordinator successfully ran two tasks at once with a single thread pool.", + coordinator.startProcedure(proc2.getErrorMonitor(), "another op", procData, expected)); + } + + /** + * Check handling a connection failure correctly if we get it during the acquiring phase + */ + @Test(timeout = 5000) + public void testUnreachableControllerDuringPrepare() throws Exception { + coordinator = buildNewCoordinator(); + // setup the proc + List expected = Arrays.asList("cohort"); + Procedure proc = new Procedure(coordinator, WAKE_FREQUENCY, + TIMEOUT, procName, procData, expected); + final Procedure procSpy = spy(proc); + + when(coordinator.createProcedure(any(ForeignExceptionDispatcher.class), eq(procName), eq(procData), anyListOf(String.class))) + .thenReturn(procSpy); + + // use the passed controller responses + IOException cause = new IOException("Failed to reach comms during acquire"); + doThrow(cause).when(controller) + .sendGlobalBarrierAcquire(eq(procSpy), eq(procData), anyListOf(String.class)); + + // run the operation + proc = coordinator.startProcedure(proc.getErrorMonitor(), procName, procData, expected); + // and wait for it to finish + proc.waitForCompleted(); + verify(procSpy, atLeastOnce()).receive(any(ForeignException.class)); + verify(coordinator, times(1)).rpcConnectionFailure(anyString(), eq(cause)); + verify(controller, times(1)).sendGlobalBarrierAcquire(procSpy, procData, expected); + verify(controller, never()).sendGlobalBarrierReached(any(Procedure.class), + anyListOf(String.class)); + } + + /** + * Check handling a connection failure correctly if we get it during the barrier phase + */ + @Test(timeout = 5000) + public void testUnreachableControllerDuringCommit() throws Exception { + coordinator = buildNewCoordinator(); + + // setup the task and spy on it + List expected = Arrays.asList("cohort"); + final Procedure spy = spy(new Procedure(coordinator, + WAKE_FREQUENCY, TIMEOUT, procName, procData, expected)); + + when(coordinator.createProcedure(any(ForeignExceptionDispatcher.class), eq(procName), eq(procData), anyListOf(String.class))) + .thenReturn(spy); + + // use the passed controller responses + IOException cause = new IOException("Failed to reach controller during prepare"); + doAnswer(new AcquireBarrierAnswer(procName, new String[] { "cohort" })) + .when(controller).sendGlobalBarrierAcquire(eq(spy), eq(procData), anyListOf(String.class)); + doThrow(cause).when(controller).sendGlobalBarrierReached(eq(spy), anyListOf(String.class)); + + // run the operation + Procedure task = coordinator.startProcedure(spy.getErrorMonitor(), procName, procData, expected); + // and wait for it to finish + task.waitForCompleted(); + verify(spy, atLeastOnce()).receive(any(ForeignException.class)); + verify(coordinator, times(1)).rpcConnectionFailure(anyString(), eq(cause)); + verify(controller, times(1)).sendGlobalBarrierAcquire(eq(spy), + eq(procData), anyListOf(String.class)); + verify(controller, times(1)).sendGlobalBarrierReached(any(Procedure.class), + anyListOf(String.class)); + } + + @Test(timeout = 1000) + public void testNoCohort() throws Exception { + runSimpleProcedure(); + } + + @Test(timeout = 1000) + public void testSingleCohortOrchestration() throws Exception { + runSimpleProcedure("one"); + } + + @Test(timeout = 1000) + public void testMultipleCohortOrchestration() throws Exception { + runSimpleProcedure("one", "two", "three", "four"); + } + + public void runSimpleProcedure(String... members) throws Exception { + coordinator = buildNewCoordinator(); + Procedure task = new Procedure(coordinator, monitor, WAKE_FREQUENCY, + TIMEOUT, procName, procData, Arrays.asList(members)); + final Procedure spy = spy(task); + runCoordinatedProcedure(spy, members); + } + + /** + * Test that if nodes join the barrier early we still correctly handle the progress + */ + @Test(timeout = 1000) + public void testEarlyJoiningBarrier() throws Exception { + final String[] cohort = new String[] { "one", "two", "three", "four" }; + coordinator = buildNewCoordinator(); + final ProcedureCoordinator ref = coordinator; + Procedure task = new Procedure(coordinator, monitor, WAKE_FREQUENCY, + TIMEOUT, procName, procData, Arrays.asList(cohort)); + final Procedure spy = spy(task); + + AcquireBarrierAnswer prepare = new AcquireBarrierAnswer(procName, cohort) { + public void doWork() { + // then do some fun where we commit before all nodes have prepared + // "one" commits before anyone else is done + ref.memberAcquiredBarrier(this.opName, this.cohort[0]); + ref.memberFinishedBarrier(this.opName, this.cohort[0]); + // but "two" takes a while + ref.memberAcquiredBarrier(this.opName, this.cohort[1]); + // "three"jumps ahead + ref.memberAcquiredBarrier(this.opName, this.cohort[2]); + ref.memberFinishedBarrier(this.opName, this.cohort[2]); + // and "four" takes a while + ref.memberAcquiredBarrier(this.opName, this.cohort[3]); + } + }; + + BarrierAnswer commit = new BarrierAnswer(procName, cohort) { + @Override + public void doWork() { + ref.memberFinishedBarrier(opName, this.cohort[1]); + ref.memberFinishedBarrier(opName, this.cohort[3]); + } + }; + runCoordinatedOperation(spy, prepare, commit, cohort); + } + + /** + * Just run a procedure with the standard name and data, with not special task for the mock + * coordinator (it works just like a regular coordinator). For custom behavior see + * {@link #runCoordinatedOperation(Procedure, AcquireBarrierAnswer, BarrierAnswer, String[])} + * . + * @param spy Spy on a real {@link Procedure} + * @param cohort expected cohort members + * @throws Exception on failure + */ + public void runCoordinatedProcedure(Procedure spy, String... cohort) throws Exception { + runCoordinatedOperation(spy, new AcquireBarrierAnswer(procName, cohort), + new BarrierAnswer(procName, cohort), cohort); + } + + public void runCoordinatedOperation(Procedure spy, AcquireBarrierAnswer prepare, + String... cohort) throws Exception { + runCoordinatedOperation(spy, prepare, new BarrierAnswer(procName, cohort), cohort); + } + + public void runCoordinatedOperation(Procedure spy, BarrierAnswer commit, + String... cohort) throws Exception { + runCoordinatedOperation(spy, new AcquireBarrierAnswer(procName, cohort), commit, cohort); + } + + public void runCoordinatedOperation(Procedure spy, AcquireBarrierAnswer prepareOperation, + BarrierAnswer commitOperation, String... cohort) throws Exception { + List expected = Arrays.asList(cohort); + when(coordinator.createProcedure(any(ForeignExceptionDispatcher.class), eq(procName), eq(procData), anyListOf(String.class))) + .thenReturn(spy); + + // use the passed controller responses + doAnswer(prepareOperation).when(controller).sendGlobalBarrierAcquire(spy, procData, expected); + doAnswer(commitOperation).when(controller) + .sendGlobalBarrierReached(eq(spy), anyListOf(String.class)); + + // run the operation + Procedure task = coordinator.startProcedure(spy.getErrorMonitor(), procName, procData, expected); + // and wait for it to finish + task.waitForCompleted(); + + // make sure we mocked correctly + prepareOperation.ensureRan(); + // we never got an exception + InOrder inorder = inOrder(spy, controller); + inorder.verify(spy).sendGlobalBarrierStart(); + inorder.verify(controller).sendGlobalBarrierAcquire(task, procData, expected); + inorder.verify(spy).sendGlobalBarrierReached(); + inorder.verify(controller).sendGlobalBarrierReached(eq(task), anyListOf(String.class)); + } + + private abstract class OperationAnswer implements Answer { + private boolean ran = false; + + public void ensureRan() { + assertTrue("Prepare mocking didn't actually run!", ran); + } + + @Override + public final Void answer(InvocationOnMock invocation) throws Throwable { + this.ran = true; + doWork(); + return null; + } + + protected abstract void doWork() throws Throwable; + } + + /** + * Just tell the current coordinator that each of the nodes has prepared + */ + private class AcquireBarrierAnswer extends OperationAnswer { + protected final String[] cohort; + protected final String opName; + + public AcquireBarrierAnswer(String opName, String... cohort) { + this.cohort = cohort; + this.opName = opName; + } + + @Override + public void doWork() { + if (cohort == null) return; + for (String member : cohort) { + TestProcedureCoordinator.this.coordinator.memberAcquiredBarrier(opName, member); + } + } + } + + /** + * Just tell the current coordinator that each of the nodes has committed + */ + private class BarrierAnswer extends OperationAnswer { + protected final String[] cohort; + protected final String opName; + + public BarrierAnswer(String opName, String... cohort) { + this.cohort = cohort; + this.opName = opName; + } + + @Override + public void doWork() { + if (cohort == null) return; + for (String member : cohort) { + TestProcedureCoordinator.this.coordinator.memberFinishedBarrier(opName, member); + } + } + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/procedure/TestZKProcedure.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/procedure/TestZKProcedure.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/procedure/TestZKProcedure.java (revision 0) @@ -0,0 +1,405 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyListOf; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.errorhandling.TimeoutException; +import org.apache.hadoop.hbase.procedure.Subprocedure.SubprocedureImpl; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; +import org.mockito.internal.matchers.ArrayEquals; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import org.mockito.verification.VerificationMode; + +import com.google.common.collect.Lists; + +/** + * Cluster-wide testing of a distributed three-phase commit using a 'real' zookeeper cluster + */ +@Category(MediumTests.class) +public class TestZKProcedure { + + private static final Log LOG = LogFactory.getLog(TestZKProcedure.class); + private static HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final String COORDINATOR_NODE_NAME = "coordinator"; + private static final long KEEP_ALIVE = 100; // seconds + private static final int POOL_SIZE = 1; + private static final long TIMEOUT = 10000; // when debugging make this larger for debugging + private static final long WAKE_FREQUENCY = 500; + private static final String opName = "op"; + private static final byte[] data = new byte[] { 1, 2 }; // TODO what is this used for? + private static final VerificationMode once = Mockito.times(1); + + @BeforeClass + public static void setupTest() throws Exception { + UTIL.startMiniZKCluster(); + } + + @AfterClass + public static void cleanupTest() throws Exception { + UTIL.shutdownMiniZKCluster(); + } + + private static ZooKeeperWatcher newZooKeeperWatcher() throws IOException { + return new ZooKeeperWatcher(UTIL.getConfiguration(), "testing utility", new Abortable() { + @Override + public void abort(String why, Throwable e) { + throw new RuntimeException( + "Unexpected abort in distributed three phase commit test:" + why, e); + } + + @Override + public boolean isAborted() { + return false; + } + }); + } + + @Test + public void testEmptyMemberSet() throws Exception { + runCommit(); + } + + @Test + public void testSingleMember() throws Exception { + runCommit("one"); + } + + @Test + public void testMultipleMembers() throws Exception { + runCommit("one", "two", "three", "four" ); + } + + private void runCommit(String... members) throws Exception { + // make sure we just have an empty list + if (members == null) { + members = new String[0]; + } + List expected = Arrays.asList(members); + + // setup the constants + ZooKeeperWatcher coordZkw = newZooKeeperWatcher(); + String opDescription = "coordination test - " + members.length + " cohort members"; + + // start running the controller + ZKProcedureCoordinatorRpcs coordinatorComms = new ZKProcedureCoordinatorRpcs( + coordZkw, opDescription, COORDINATOR_NODE_NAME); + ThreadPoolExecutor pool = ProcedureCoordinator.defaultPool(COORDINATOR_NODE_NAME, KEEP_ALIVE, POOL_SIZE, WAKE_FREQUENCY); + ProcedureCoordinator coordinator = new ProcedureCoordinator(coordinatorComms, pool) { + @Override + public Procedure createProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs, + List expectedMembers) { + return Mockito.spy(super.createProcedure(fed, procName, procArgs, expectedMembers)); + } + }; + + // build and start members + // NOTE: There is a single subprocedure builder for all members here. + SubprocedureFactory subprocFactory = Mockito.mock(SubprocedureFactory.class); + List> procMembers = new ArrayList>( + members.length); + // start each member + for (String member : members) { + ZooKeeperWatcher watcher = newZooKeeperWatcher(); + ZKProcedureMemberRpcs comms = new ZKProcedureMemberRpcs(watcher, opDescription, member); + ThreadPoolExecutor pool2 = ProcedureMember.defaultPool(WAKE_FREQUENCY, KEEP_ALIVE, 1, member); + ProcedureMember procMember = new ProcedureMember(comms, pool2, subprocFactory); + procMembers.add(new Pair(procMember, comms)); + comms.start(procMember); + } + + // setup mock member subprocedures + final List subprocs = new ArrayList(); + for (int i = 0; i < procMembers.size(); i++) { + ForeignExceptionDispatcher cohortMonitor = new ForeignExceptionDispatcher(); + Subprocedure commit = Mockito + .spy(new SubprocedureImpl(procMembers.get(i).getFirst(), opName, cohortMonitor, + WAKE_FREQUENCY, TIMEOUT)); + subprocs.add(commit); + } + + // link subprocedure to buildNewOperation invocation. + final AtomicInteger i = new AtomicInteger(0); // NOTE: would be racy if not an AtomicInteger + Mockito.when(subprocFactory.buildSubprocedure(Mockito.eq(opName), + (byte[]) Mockito.argThat(new ArrayEquals(data)))).thenAnswer( + new Answer() { + @Override + public Subprocedure answer(InvocationOnMock invocation) throws Throwable { + int index = i.getAndIncrement(); + LOG.debug("Task size:" + subprocs.size() + ", getting:" + index); + Subprocedure commit = subprocs.get(index); + return commit; + } + }); + + // setup spying on the coordinator +// Procedure proc = Mockito.spy(procBuilder.createProcedure(coordinator, opName, data, expected)); +// Mockito.when(procBuilder.build(coordinator, opName, data, expected)).thenReturn(proc); + + // start running the operation + Procedure task = coordinator.startProcedure(new ForeignExceptionDispatcher(), opName, data, expected); +// assertEquals("Didn't mock coordinator task", proc, task); + + // verify all things ran as expected +// waitAndVerifyProc(proc, once, once, never(), once, false); + waitAndVerifyProc(task, once, once, never(), once, false); + verifyCohortSuccessful(expected, subprocFactory, subprocs, once, once, never(), once, false); + + // close all the things + closeAll(coordinator, coordinatorComms, procMembers); + } + + /** + * Test a distributed commit with multiple cohort members, where one of the cohort members has a + * timeout exception during the prepare stage. + */ + @Test + public void testMultiCohortWithMemberTimeoutDuringPrepare() throws Exception { + String opDescription = "error injection coordination"; + String[] cohortMembers = new String[] { "one", "two", "three" }; + List expected = Lists.newArrayList(cohortMembers); + // error constants + final int memberErrorIndex = 2; + final CountDownLatch coordinatorReceivedErrorLatch = new CountDownLatch(1); + + // start running the coordinator and its controller + ZooKeeperWatcher coordinatorWatcher = newZooKeeperWatcher(); + ZKProcedureCoordinatorRpcs coordinatorController = new ZKProcedureCoordinatorRpcs( + coordinatorWatcher, opDescription, COORDINATOR_NODE_NAME); + ThreadPoolExecutor pool = ProcedureCoordinator.defaultPool(COORDINATOR_NODE_NAME, KEEP_ALIVE, POOL_SIZE, WAKE_FREQUENCY); + ProcedureCoordinator coordinator = spy(new ProcedureCoordinator(coordinatorController, pool)); + + // start a member for each node + SubprocedureFactory subprocFactory = Mockito.mock(SubprocedureFactory.class); + List> members = new ArrayList>( + expected.size()); + for (String member : expected) { + ZooKeeperWatcher watcher = newZooKeeperWatcher(); + ZKProcedureMemberRpcs controller = new ZKProcedureMemberRpcs(watcher, opDescription, member); + ThreadPoolExecutor pool2 = ProcedureMember.defaultPool(WAKE_FREQUENCY, KEEP_ALIVE, 1, member); + ProcedureMember mem = new ProcedureMember(controller, pool2, subprocFactory); + members.add(new Pair(mem, controller)); + controller.start(mem); + } + + // setup mock subprocedures + final List cohortTasks = new ArrayList(); + final int[] elem = new int[1]; + for (int i = 0; i < members.size(); i++) { + ForeignExceptionDispatcher cohortMonitor = new ForeignExceptionDispatcher(); + ProcedureMember comms = members.get(i).getFirst(); + Subprocedure commit = Mockito + .spy(new SubprocedureImpl(comms, opName, cohortMonitor, WAKE_FREQUENCY, TIMEOUT)); + // This nasty bit has one of the impls throw a TimeoutException + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + int index = elem[0]; + if (index == memberErrorIndex) { + LOG.debug("Sending error to coordinator"); + ForeignException remoteCause = new ForeignException("TIMER", + new TimeoutException("subprocTimeout" , 1, 2, 0)); + Subprocedure r = ((Subprocedure) invocation.getMock()); + LOG.error("Remote commit failure, not propagating error:" + remoteCause); + r.monitor.receive(remoteCause); + // don't complete the error phase until the coordinator has gotten the error + // notification (which ensures that we never progress past prepare) + try { + Procedure.waitForLatch(coordinatorReceivedErrorLatch, new ForeignExceptionDispatcher(), + WAKE_FREQUENCY, "coordinator received error"); + } catch (InterruptedException e) { + LOG.debug("Wait for latch interrupted, done:" + (coordinatorReceivedErrorLatch.getCount() == 0)); + // reset the interrupt status on the thread + Thread.currentThread().interrupt(); + } + } + elem[0] = ++index; + return null; + } + }).when(commit).acquireBarrier(); + cohortTasks.add(commit); + } + + // pass out a task per member + final int[] i = new int[] { 0 }; + Mockito.when( + subprocFactory.buildSubprocedure(Mockito.eq(opName), + (byte[]) Mockito.argThat(new ArrayEquals(data)))).thenAnswer( + new Answer() { + @Override + public Subprocedure answer(InvocationOnMock invocation) throws Throwable { + int index = i[0]; + Subprocedure commit = cohortTasks.get(index); + index++; + i[0] = index; + return commit; + } + }); + + // setup spying on the coordinator + ForeignExceptionDispatcher coordinatorTaskErrorMonitor = Mockito + .spy(new ForeignExceptionDispatcher()); + Procedure coordinatorTask = Mockito.spy(new Procedure(coordinator, + coordinatorTaskErrorMonitor, WAKE_FREQUENCY, TIMEOUT, + opName, data, expected)); + when(coordinator.createProcedure(any(ForeignExceptionDispatcher.class), eq(opName), eq(data), anyListOf(String.class))) + .thenReturn(coordinatorTask); + // count down the error latch when we get the remote error + Mockito.doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + // pass on the error to the master + invocation.callRealMethod(); + // then count down the got error latch + coordinatorReceivedErrorLatch.countDown(); + return null; + } + }).when(coordinatorTask).receive(Mockito.any(ForeignException.class)); + + // ---------------------------- + // start running the operation + // ---------------------------- + + Procedure task = coordinator.startProcedure(coordinatorTaskErrorMonitor, opName, data, expected); + assertEquals("Didn't mock coordinator task", coordinatorTask, task); + + // wait for the task to complete + try { + task.waitForCompleted(); + } catch (ForeignException fe) { + // this may get caught or may not + } + + // ------------- + // verification + // ------------- + waitAndVerifyProc(coordinatorTask, once, never(), once, once, true); + verifyCohortSuccessful(expected, subprocFactory, cohortTasks, once, never(), once, + once, true); + + // close all the open things + closeAll(coordinator, coordinatorController, members); + } + + /** + * Wait for the coordinator task to complete, and verify all the mocks + * @param task to wait on + * @throws Exception on unexpected failure + */ + private void waitAndVerifyProc(Procedure proc, VerificationMode prepare, + VerificationMode commit, VerificationMode cleanup, VerificationMode finish, boolean opHasError) + throws Exception { + boolean caughtError = false; + try { + proc.waitForCompleted(); + } catch (ForeignException fe) { + caughtError = true; + } + // make sure that the task called all the expected phases + Mockito.verify(proc, prepare).sendGlobalBarrierStart(); + Mockito.verify(proc, commit).sendGlobalBarrierReached(); + Mockito.verify(proc, finish).sendGlobalBarrierComplete(); + assertEquals("Operation error state was unexpected", opHasError, proc.getErrorMonitor() + .hasException()); + assertEquals("Operation error state was unexpected", opHasError, caughtError); + + } + + /** + * Wait for the coordinator task to complete, and verify all the mocks + * @param task to wait on + * @throws Exception on unexpected failure + */ + private void waitAndVerifySubproc(Subprocedure op, VerificationMode prepare, + VerificationMode commit, VerificationMode cleanup, VerificationMode finish, boolean opHasError) + throws Exception { + boolean caughtError = false; + try { + op.waitForLocallyCompleted(); + } catch (ForeignException fe) { + caughtError = true; + } + // make sure that the task called all the expected phases + Mockito.verify(op, prepare).acquireBarrier(); + Mockito.verify(op, commit).insideBarrier(); + // We cannot guarantee that cleanup has run so we don't check it. + + assertEquals("Operation error state was unexpected", opHasError, op.getErrorCheckable() + .hasException()); + assertEquals("Operation error state was unexpected", opHasError, caughtError); + + } + + private void verifyCohortSuccessful(List cohortNames, + SubprocedureFactory subprocFactory, Iterable cohortTasks, + VerificationMode prepare, VerificationMode commit, VerificationMode cleanup, + VerificationMode finish, boolean opHasError) throws Exception { + + // make sure we build the correct number of cohort members + Mockito.verify(subprocFactory, Mockito.times(cohortNames.size())).buildSubprocedure( + Mockito.eq(opName), (byte[]) Mockito.argThat(new ArrayEquals(data))); + // verify that we ran each of the operations cleanly + int j = 0; + for (Subprocedure op : cohortTasks) { + LOG.debug("Checking mock:" + (j++)); + waitAndVerifySubproc(op, prepare, commit, cleanup, finish, opHasError); + } + } + + private void closeAll( + ProcedureCoordinator coordinator, + ZKProcedureCoordinatorRpcs coordinatorController, + List> cohort) + throws IOException { + // make sure we close all the resources + for (Pair member : cohort) { + member.getFirst().close(); + member.getSecond().close(); + } + coordinator.close(); + coordinatorController.close(); + } +} Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestReferenceRegionHFilesTask.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestReferenceRegionHFilesTask.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestReferenceRegionHFilesTask.java (revision 0) @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.ReferenceRegionHFilesTask; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +@Category(SmallTests.class) +public class TestReferenceRegionHFilesTask { + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + @Test + public void testRun() throws IOException { + FileSystem fs = UTIL.getTestFileSystem(); + // setup the region internals + Path testdir = UTIL.getDataTestDir(); + Path regionDir = new Path(testdir, "region"); + Path family1 = new Path(regionDir, "fam1"); + // make an empty family + Path family2 = new Path(regionDir, "fam2"); + fs.mkdirs(family2); + + // add some files to family 1 + Path file1 = new Path(family1, "05f99689ae254693836613d1884c6b63"); + fs.createNewFile(file1); + Path file2 = new Path(family1, "7ac9898bf41d445aa0003e3d699d5d26"); + fs.createNewFile(file2); + + // create the snapshot directory + Path snapshotRegionDir = new Path(testdir, HConstants.SNAPSHOT_DIR_NAME); + fs.mkdirs(snapshotRegionDir); + + SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("name") + .setTable("table").build(); + ForeignExceptionDispatcher monitor = Mockito.mock(ForeignExceptionDispatcher.class); + ReferenceRegionHFilesTask task = new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, + fs, snapshotRegionDir); + ReferenceRegionHFilesTask taskSpy = Mockito.spy(task); + task.call(); + + // make sure we never get an error + Mockito.verify(taskSpy, Mockito.never()).snapshotFailure(Mockito.anyString(), + Mockito.any(Exception.class)); + + // verify that all the hfiles get referenced + List hfiles = new ArrayList(2); + FileStatus[] regions = FSUtils.listStatus(fs, snapshotRegionDir); + for (FileStatus region : regions) { + FileStatus[] fams = FSUtils.listStatus(fs, region.getPath()); + for (FileStatus fam : fams) { + FileStatus[] files = FSUtils.listStatus(fs, fam.getPath()); + for (FileStatus file : files) { + hfiles.add(file.getPath().getName()); + } + } + } + assertTrue("Didn't reference :" + file1, hfiles.contains(file1.getName())); + assertTrue("Didn't reference :" + file1, hfiles.contains(file2.getName())); + } +} Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java (revision 0) @@ -0,0 +1,257 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.snapshot.ExportSnapshot; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.mapreduce.Job; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test Export Snapshot Tool + */ +@Category(MediumTests.class) +public class TestExportSnapshot { + private final Log LOG = LogFactory.getLog(getClass()); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private final static byte[] FAMILY = Bytes.toBytes("cf"); + + private byte[] snapshotName; + private byte[] tableName; + private HBaseAdmin admin; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); + TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100); + TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250); + TEST_UTIL.getConfiguration().setInt("hbase.client.retries.number", 6); + TEST_UTIL.getConfiguration().setBoolean("hbase.master.enabletable.roundrobin", true); + TEST_UTIL.startMiniCluster(3); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * Create a table and take a snapshot of the table used by the export test. + */ + @Before + public void setUp() throws Exception { + this.admin = TEST_UTIL.getHBaseAdmin(); + + long tid = System.currentTimeMillis(); + tableName = Bytes.toBytes("testtb-" + tid); + snapshotName = Bytes.toBytes("snaptb0-" + tid); + + // create Table + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor(FAMILY)); + admin.createTable(htd, null); + HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName); + TEST_UTIL.loadTable(table, FAMILY); + + // take a snapshot + admin.disableTable(tableName); + admin.snapshot(snapshotName, tableName); + admin.enableTable(tableName); + } + + @After + public void tearDown() throws Exception { + this.admin.close(); + } + + /** + * Verfy the result of getBalanceSplits() method. + * The result are groups of files, used as input list for the "export" mappers. + * All the groups should have similar amount of data. + * + * The input list is a pair of file path and length. + * The getBalanceSplits() function sort it by length, + * and assign to each group a file, going back and forth through the groups. + */ + @Test + public void testBalanceSplit() throws Exception { + // Create a list of files + List> files = new ArrayList>(); + for (long i = 0; i <= 20; i++) { + files.add(new Pair(new Path("file-" + i), i)); + } + + // Create 5 groups (total size 210) + // group 0: 20, 11, 10, 1 (total size: 42) + // group 1: 19, 12, 9, 2 (total size: 42) + // group 2: 18, 13, 8, 3 (total size: 42) + // group 3: 17, 12, 7, 4 (total size: 42) + // group 4: 16, 11, 6, 5 (total size: 42) + List> splits = ExportSnapshot.getBalancedSplits(files, 5); + assertEquals(5, splits.size()); + assertEquals(Arrays.asList(new Path("file-20"), new Path("file-11"), + new Path("file-10"), new Path("file-1"), new Path("file-0")), splits.get(0)); + assertEquals(Arrays.asList(new Path("file-19"), new Path("file-12"), + new Path("file-9"), new Path("file-2")), splits.get(1)); + assertEquals(Arrays.asList(new Path("file-18"), new Path("file-13"), + new Path("file-8"), new Path("file-3")), splits.get(2)); + assertEquals(Arrays.asList(new Path("file-17"), new Path("file-14"), + new Path("file-7"), new Path("file-4")), splits.get(3)); + assertEquals(Arrays.asList(new Path("file-16"), new Path("file-15"), + new Path("file-6"), new Path("file-5")), splits.get(4)); + } + + /** + * Verify if exported snapshot and copied files matches the original one. + */ + @Test + public void testExportFileSystemState() throws Exception { + Path copyDir = TEST_UTIL.getDataTestDir("export-" + System.currentTimeMillis()); + URI hdfsUri = FileSystem.get(TEST_UTIL.getConfiguration()).getUri(); + FileSystem fs = FileSystem.get(copyDir.toUri(), new Configuration()); + copyDir = copyDir.makeQualified(fs); + + // Export Snapshot + int res = ExportSnapshot.innerMain(TEST_UTIL.getConfiguration(), new String[] { + "-snapshot", Bytes.toString(snapshotName), + "-copy-to", copyDir.toString() + }); + assertEquals(0, res); + + // Verify File-System state + FileStatus[] rootFiles = fs.listStatus(copyDir); + assertEquals(2, rootFiles.length); + for (FileStatus fileStatus: rootFiles) { + String name = fileStatus.getPath().getName(); + assertTrue(fileStatus.isDir()); + assertTrue(name.equals(".snapshot") || name.equals(".archive")); + } + + // compare the snapshot metadata and verify the hfiles + final FileSystem hdfs = FileSystem.get(hdfsUri, TEST_UTIL.getConfiguration()); + final Path snapshotDir = new Path(".snapshot", Bytes.toString(snapshotName)); + verifySnapshot(hdfs, new Path(TEST_UTIL.getDefaultRootDirPath(), snapshotDir), + fs, new Path(copyDir, snapshotDir)); + verifyArchive(fs, copyDir, Bytes.toString(snapshotName)); + + // Remove the exported dir + fs.delete(copyDir, true); + } + + /* + * verify if the snapshot folder on file-system 1 match the one on file-system 2 + */ + private void verifySnapshot(final FileSystem fs1, final Path root1, + final FileSystem fs2, final Path root2) throws IOException { + Set s = new HashSet(); + assertEquals(listFiles(fs1, root1, root1), listFiles(fs2, root2, root2)); + } + + /* + * Verify if the files exists + */ + private void verifyArchive(final FileSystem fs, final Path rootDir, final String snapshotName) + throws IOException { + final Path exportedSnapshot = new Path(rootDir, new Path(".snapshot", snapshotName)); + final Path exportedArchive = new Path(rootDir, ".archive"); + LOG.debug(listFiles(fs, exportedArchive, exportedArchive)); + SnapshotReferenceUtil.visitReferencedFiles(fs, exportedSnapshot, + new SnapshotReferenceUtil.FileVisitor() { + public void storeFile (final String region, final String family, final String hfile) + throws IOException { + verifyNonEmptyFile(new Path(exportedArchive, + new Path(Bytes.toString(tableName), new Path(region, new Path(family, hfile))))); + } + + public void recoveredEdits (final String region, final String logfile) + throws IOException { + verifyNonEmptyFile(new Path(exportedSnapshot, + new Path(Bytes.toString(tableName), new Path(region, logfile)))); + } + + public void logFile (final String server, final String logfile) + throws IOException { + verifyNonEmptyFile(new Path(exportedSnapshot, new Path(server, logfile))); + } + + private void verifyNonEmptyFile(final Path path) throws IOException { + LOG.debug(path); + assertTrue(fs.exists(path)); + assertTrue(fs.getFileStatus(path).getLen() > 0); + } + }); + } + + private Set listFiles(final FileSystem fs, final Path root, final Path dir) + throws IOException { + Set files = new HashSet(); + int rootPrefix = root.toString().length(); + FileStatus[] list = FSUtils.listStatus(fs, dir); + if (list != null) { + for (FileStatus fstat: list) { + LOG.debug(fstat.getPath()); + if (fstat.isDir()) { + files.addAll(listFiles(fs, root, fstat.getPath())); + } else { + files.add(fstat.getPath().toString().substring(rootPrefix)); + } + } + } + return files; + } +} + Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotTask.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotTask.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotTask.java (revision 0) @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotTask; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +@Category(SmallTests.class) +public class TestSnapshotTask { + + /** + * Check that errors from running the task get propagated back to the error listener. + */ + @Test + public void testErrorPropagation() throws Exception { + ForeignExceptionDispatcher error = mock(ForeignExceptionDispatcher.class); + SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("snapshot") + .setTable("table").build(); + final Exception thrown = new Exception("Failed!"); + SnapshotTask fail = new SnapshotTask(snapshot, error) { + @Override + public Void call() { + snapshotFailure("Injected failure", thrown); + return null; + } + }; + fail.call(); + + verify(error, Mockito.times(1)).receive(any(ForeignException.class)); + } + +} Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelper.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelper.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreSnapshotHelper.java (revision 0) @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.Arrays; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.client.HConnection; +import org.apache.hadoop.hbase.client.HConnectionTestingUtility; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.MD5Hash; +import org.junit.*; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +/** + * Test the restore/clone operation from a file-system point of view. + */ +@Category(SmallTests.class) +public class TestRestoreSnapshotHelper { + final Log LOG = LogFactory.getLog(getClass()); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private final static String TEST_FAMILY = "cf"; + private final static String TEST_HFILE = "abc"; + + private Configuration conf; + private Path archiveDir; + private FileSystem fs; + private Path rootDir; + + @Before + public void setup() throws Exception { + rootDir = TEST_UTIL.getDataTestDir("testRestore"); + archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY); + fs = TEST_UTIL.getTestFileSystem(); + conf = TEST_UTIL.getConfiguration(); + FSUtils.setRootDir(conf, rootDir); + } + + @After + public void tearDown() throws Exception { + fs.delete(TEST_UTIL.getDataTestDir(), true); + } + + @Test + public void testRestore() throws IOException { + HTableDescriptor htd = createTableDescriptor("testtb"); + + Path snapshotDir = new Path(rootDir, "snapshot"); + createSnapshot(rootDir, snapshotDir, htd); + + // Test clone a snapshot + HTableDescriptor htdClone = createTableDescriptor("testtb-clone"); + testRestore(snapshotDir, htd.getNameAsString(), htdClone); + verifyRestore(rootDir, htd, htdClone); + + // Test clone a clone ("link to link") + Path cloneDir = HTableDescriptor.getTableDir(rootDir, htdClone.getName()); + HTableDescriptor htdClone2 = createTableDescriptor("testtb-clone2"); + testRestore(cloneDir, htdClone.getNameAsString(), htdClone2); + verifyRestore(rootDir, htd, htdClone2); + } + + private void verifyRestore(final Path rootDir, final HTableDescriptor sourceHtd, + final HTableDescriptor htdClone) throws IOException { + String[] files = getHFiles(HTableDescriptor.getTableDir(rootDir, htdClone.getName())); + assertEquals(2, files.length); + assertTrue(files[0] + " should be a HFileLink", HFileLink.isHFileLink(files[0])); + assertTrue(files[1] + " should be a Referene", StoreFile.isReference(files[1])); + assertEquals(sourceHtd.getNameAsString(), HFileLink.getReferencedTableName(files[0])); + assertEquals(TEST_HFILE, HFileLink.getReferencedHFileName(files[0])); + Path refPath = getReferredToFile(files[1]); + assertTrue(refPath.getName() + " should be a HFileLink", HFileLink.isHFileLink(refPath.getName())); + assertEquals(files[0], refPath.getName()); + } + + /** + * Execute the restore operation + * @param snapshotDir The snapshot directory to use as "restore source" + * @param sourceTableName The name of the snapshotted table + * @param htdClone The HTableDescriptor of the table to restore/clone. + */ + public void testRestore(final Path snapshotDir, final String sourceTableName, + final HTableDescriptor htdClone) throws IOException { + LOG.debug("pre-restore table=" + htdClone.getNameAsString() + " snapshot=" + snapshotDir); + FSUtils.logFileSystemState(fs, rootDir, LOG); + + FSTableDescriptors.createTableDescriptor(htdClone, conf); + RestoreSnapshotHelper helper = getRestoreHelper(rootDir, snapshotDir, sourceTableName, htdClone); + helper.restoreHdfsRegions(); + + LOG.debug("post-restore table=" + htdClone.getNameAsString() + " snapshot=" + snapshotDir); + FSUtils.logFileSystemState(fs, rootDir, LOG); + } + + /** + * Initialize the restore helper, based on the snapshot and table information provided. + */ + private RestoreSnapshotHelper getRestoreHelper(final Path rootDir, final Path snapshotDir, + final String sourceTableName, final HTableDescriptor htdClone) throws IOException { + CatalogTracker catalogTracker = Mockito.mock(CatalogTracker.class); + HTableDescriptor tableDescriptor = Mockito.mock(HTableDescriptor.class); + ForeignExceptionDispatcher monitor = Mockito.mock(ForeignExceptionDispatcher.class); + + SnapshotDescription sd = SnapshotDescription.newBuilder() + .setName("snapshot").setTable(sourceTableName).build(); + + return new RestoreSnapshotHelper(conf, fs, sd, snapshotDir, + htdClone, HTableDescriptor.getTableDir(rootDir, htdClone.getName()), monitor); + } + + private void createSnapshot(final Path rootDir, final Path snapshotDir, final HTableDescriptor htd) + throws IOException { + // First region, simple with one plain hfile. + HRegion r0 = HRegion.createHRegion(new HRegionInfo(htd.getName()), archiveDir, + conf, htd, null, true, true); + Path storeFile = new Path(new Path(r0.getRegionDir(), TEST_FAMILY), TEST_HFILE); + fs.createNewFile(storeFile); + r0.close(); + + // Second region, used to test the split case. + // This region contains a reference to the hfile in the first region. + HRegion r1 = HRegion.createHRegion(new HRegionInfo(htd.getName()), archiveDir, + conf, htd, null, true, true); + fs.createNewFile(new Path(new Path(r1.getRegionDir(), TEST_FAMILY), + storeFile.getName() + '.' + r0.getRegionInfo().getEncodedName())); + r1.close(); + + Path tableDir = HTableDescriptor.getTableDir(archiveDir, htd.getName()); + FileUtil.copy(fs, tableDir, fs, snapshotDir, false, conf); + } + + private HTableDescriptor createTableDescriptor(final String tableName) { + HTableDescriptor htd = new HTableDescriptor(tableName); + htd.addFamily(new HColumnDescriptor(TEST_FAMILY)); + return htd; + } + + private Path getReferredToFile(final String referenceName) { + Path fakeBasePath = new Path(new Path("table", "region"), "cf"); + return StoreFile.getReferredToFile(new Path(fakeBasePath, referenceName)); + } + + private String[] getHFiles(final Path tableDir) throws IOException { + List files = new ArrayList(); + for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) { + for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) { + for (FileStatus file: FSUtils.listStatus(fs, familyDir)) { + files.add(file.getPath().getName()); + } + } + } + Collections.sort(files); + return files.toArray(new String[files.size()]); + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java (revision 0) @@ -0,0 +1,369 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CountDownLatch; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HBaseFsck; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test creating/using/deleting snapshots from the client + *

+ * This is an end-to-end test for the snapshot utility + * + * TODO This is essentially a clone of TestSnapshotFromClient. This is worth refactoring this + * because there will be a few more flavors of snapshots that need to run these tests. + */ +@Category(LargeTests.class) +public class TestFlushSnapshotFromClient { + private static final Log LOG = LogFactory.getLog(TestFlushSnapshotFromClient.class); + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final int NUM_RS = 2; + private static final String STRING_TABLE_NAME = "test"; + private static final byte[] TEST_FAM = Bytes.toBytes("fam"); + private static final byte[] TABLE_NAME = Bytes.toBytes(STRING_TABLE_NAME); + + /** + * Setup the config for the cluster + * @throws Exception on failure + */ + @BeforeClass + public static void setupCluster() throws Exception { + setupConf(UTIL.getConfiguration()); + UTIL.startMiniCluster(NUM_RS); + } + + private static void setupConf(Configuration conf) { + // disable the ui + conf.setInt("hbase.regionsever.info.port", -1); + // change the flush size to a small amount, regulating number of store files + conf.setInt("hbase.hregion.memstore.flush.size", 25000); + // so make sure we get a compaction when doing a load, but keep around some + // files in the store + conf.setInt("hbase.hstore.compaction.min", 10); + conf.setInt("hbase.hstore.compactionThreshold", 10); + // block writes if we get to 12 store files + conf.setInt("hbase.hstore.blockingStoreFiles", 12); + // drop the number of attempts for the hbase admin + conf.setInt("hbase.client.retries.number", 1); + // Enable snapshot + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); + // prevent aggressive region split + conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, + ConstantSizeRegionSplitPolicy.class.getName()); + } + + @Before + public void setup() throws Exception { + UTIL.createTable(TABLE_NAME, TEST_FAM); + } + + @After + public void tearDown() throws Exception { + UTIL.deleteTable(TABLE_NAME); + // and cleanup the archive directory + try { + UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true); + } catch (IOException e) { + LOG.warn("Failure to delete archive directory", e); + } + } + + @AfterClass + public static void cleanupTest() throws Exception { + try { + UTIL.shutdownMiniCluster(); + } catch (Exception e) { + LOG.warn("failure shutting down cluster", e); + } + } + + /** + * Test simple flush snapshotting a table that is online + * @throws Exception + */ + @Test + public void testFlushTableSnapshot() throws Exception { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + + // put some stuff in the table + HTable table = new HTable(UTIL.getConfiguration(), TABLE_NAME); + UTIL.loadTable(table, TEST_FAM); + + // get the name of all the regionservers hosting the snapshotted table + Set snapshotServers = new HashSet(); + List servers = UTIL.getMiniHBaseCluster().getLiveRegionServerThreads(); + for (RegionServerThread server : servers) { + if (server.getRegionServer().getOnlineRegions(TABLE_NAME).size() > 0) { + snapshotServers.add(server.getRegionServer().getServerName().toString()); + } + } + + LOG.debug("FS state before snapshot:"); + FSUtils.logFileSystemState(UTIL.getTestFileSystem(), + FSUtils.getRootDir(UTIL.getConfiguration()), LOG); + + // take a snapshot of the enabled table + String snapshotString = "offlineTableSnapshot"; + byte[] snapshot = Bytes.toBytes(snapshotString); + admin.snapshot(snapshotString, STRING_TABLE_NAME, SnapshotDescription.Type.FLUSH); + LOG.debug("Snapshot completed."); + + // make sure we have the snapshot + List snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, + snapshot, TABLE_NAME); + + // make sure its a valid snapshot + FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); + Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); + LOG.debug("FS state after snapshot:"); + FSUtils.logFileSystemState(UTIL.getTestFileSystem(), + FSUtils.getRootDir(UTIL.getConfiguration()), LOG); + + SnapshotTestingUtils.confirmSnapshotValid(snapshots.get(0), TABLE_NAME, TEST_FAM, rootDir, + admin, fs, false, new Path(rootDir, HConstants.HREGION_LOGDIR_NAME), snapshotServers); + + admin.deleteSnapshot(snapshot); + snapshots = admin.listSnapshots(); + SnapshotTestingUtils.assertNoSnapshots(admin); + } + + @Test + public void testSnapshotFailsOnNonExistantTable() throws Exception { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + String tableName = "_not_a_table"; + + // make sure the table doesn't exist + boolean fail = false; + do { + try { + admin.getTableDescriptor(Bytes.toBytes(tableName)); + fail = true; + LOG.error("Table:" + tableName + " already exists, checking a new name"); + tableName = tableName+"!"; + } catch (TableNotFoundException e) { + fail = false; + } + } while (fail); + + // snapshot the non-existant table + try { + admin.snapshot("fail", tableName, SnapshotDescription.Type.FLUSH); + fail("Snapshot succeeded even though there is not table."); + } catch (SnapshotCreationException e) { + LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage()); + } + } + + /** + * Basic end-to-end test of simple-flush-based snapshots + */ + @Test + public void testFlushCreateListDestroy() throws Exception { + LOG.debug("------- Starting Snapshot test -------------"); + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + // load the table so we have some data + UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE_NAME), TEST_FAM); + // and wait until everything stabilizes + HRegionServer rs = UTIL.getRSForFirstRegionInTable(TABLE_NAME); + List onlineRegions = rs.getOnlineRegions(TABLE_NAME); + for (HRegion region : onlineRegions) { + region.waitForFlushesAndCompactions(); + } + String snapshotName = "flushSnapshotCreateListDestroy"; + // test creating the snapshot + admin.snapshot(snapshotName, STRING_TABLE_NAME, SnapshotDescription.Type.FLUSH); + logFSTree(new Path(UTIL.getConfiguration().get(HConstants.HBASE_DIR))); + + // make sure we only have 1 matching snapshot + List snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, + snapshotName, STRING_TABLE_NAME); + + // check the directory structure + FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); + Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshots.get(0), rootDir); + assertTrue(fs.exists(snapshotDir)); + HBaseFsck.debugLsr(UTIL.getHBaseCluster().getConfiguration(), snapshotDir); + Path snapshotinfo = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); + assertTrue(fs.exists(snapshotinfo)); + + // check the table info + HTableDescriptor desc = FSTableDescriptors.getTableDescriptor(fs, rootDir, TABLE_NAME); + HTableDescriptor snapshotDesc = FSTableDescriptors.getTableDescriptor(fs, + SnapshotDescriptionUtils.getSnapshotsDir(rootDir), Bytes.toBytes(snapshotName)); + assertEquals(desc, snapshotDesc); + + // check the region snapshot for all the regions + List regions = admin.getTableRegions(TABLE_NAME); + for (HRegionInfo info : regions) { + String regionName = info.getEncodedName(); + Path regionDir = new Path(snapshotDir, regionName); + HRegionInfo snapshotRegionInfo = HRegion.loadDotRegionInfoFileContent(fs, regionDir); + assertEquals(info, snapshotRegionInfo); + // check to make sure we have the family + Path familyDir = new Path(regionDir, Bytes.toString(TEST_FAM)); + assertTrue(fs.exists(familyDir)); + // make sure we have some file references + assertTrue(fs.listStatus(familyDir).length > 0); + } + + // test that we can delete the snapshot + admin.deleteSnapshot(snapshotName); + HBaseFsck.debugLsr(UTIL.getHBaseCluster().getConfiguration(), FSUtils.getRootDir(UTIL.getConfiguration())); + + // make sure we don't have any snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + LOG.debug("------- Flush-Snapshot Create List Destroy-------------"); + } + + /** + * Demonstrate that we reject snapshot requests if there is a snapshot currently running. + */ + @Test(timeout=60000) + public void testConcurrentSnapshottingAttempts() throws IOException, InterruptedException { + int ssNum = 10; + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + // load the table so we have some data + UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE_NAME), TEST_FAM); + // and wait until everything stabilizes + HRegionServer rs = UTIL.getRSForFirstRegionInTable(TABLE_NAME); + List onlineRegions = rs.getOnlineRegions(TABLE_NAME); + for (HRegion region : onlineRegions) { + region.waitForFlushesAndCompactions(); + } + + // build descriptions + SnapshotDescription[] descs = new SnapshotDescription[ssNum]; + for (int i = 0; i < ssNum; i++) { + SnapshotDescription.Builder builder = SnapshotDescription.newBuilder(); + builder.setTable(STRING_TABLE_NAME); + builder.setName("ss"+i); + builder.setType(SnapshotDescription.Type.FLUSH); + descs[i] = builder.build(); + } + + final CountDownLatch toBeSubmitted = new CountDownLatch(ssNum); + // We'll have one of these per thread + class SSRunnable implements Runnable { + SnapshotDescription ss; + SSRunnable(SnapshotDescription ss) { + this.ss = ss; + } + + @Override + public void run() { + try { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + LOG.info("Submitting snapshot request: " + SnapshotDescriptionUtils.toString(ss)); + admin.takeSnapshotAsync(ss); + } catch (Exception e) { + LOG.info("Exception during snapshot request: " + SnapshotDescriptionUtils.toString(ss) + + ". This is ok, we expect some", e); + } + LOG.info("Submitted snapshot request: " + SnapshotDescriptionUtils.toString(ss)); + toBeSubmitted.countDown(); + } + }; + + // kick each off its own thread + for (int i=0 ; i < ssNum; i++) { + new Thread(new SSRunnable(descs[i])).start(); + } + + // wait until all have been submitted + toBeSubmitted.await(); + + // loop until all are done. + while (true) { + int doneCount = 0; + for (SnapshotDescription ss : descs) { + try { + if (admin.isSnapshotFinished(ss)) { + doneCount++; + } + } catch (Exception e) { + LOG.warn("Got an exception when checking for snapshot " + ss.getName(), e); + doneCount++; + } + } + if (doneCount == descs.length) { + break; + } + Thread.sleep(100); + } + + // dump for debugging + logFSTree(new Path(UTIL.getConfiguration().get(HConstants.HBASE_DIR))); + + List taken = admin.listSnapshots(); + int takenSize = taken.size(); + LOG.info("Taken " + takenSize + " snapshots: " + taken); + assertTrue("We expect at least 1 request to be rejected because of we concurrently" + + " issued many requests", takenSize < ssNum && takenSize > 0); + } + + private void logFSTree(Path root) throws IOException { + FSUtils.logFileSystemState(UTIL.getDFSCluster().getFileSystem(), root, LOG); + } +} Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotDescriptionUtils.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotDescriptionUtils.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotDescriptionUtils.java (revision 0) @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type; +import org.apache.hadoop.hbase.util.EnvironmentEdge; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; +import org.junit.After; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test that the {@link SnapshotDescription} helper is helping correctly. + */ +@Category(MediumTests.class) +public class TestSnapshotDescriptionUtils { + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static FileSystem fs; + private static Path root; + + @BeforeClass + public static void setupFS() throws Exception { + fs = UTIL.getTestFileSystem(); + root = new Path(UTIL.getDataTestDir(), "hbase"); + } + + @After + public void cleanupFS() throws Exception { + if (fs.exists(root)) { + if (!fs.delete(root, true)) { + throw new IOException("Failed to delete root test dir: " + root); + } + if (!fs.mkdirs(root)) { + throw new IOException("Failed to create root test dir: " + root); + } + } + EnvironmentEdgeManagerTestHelper.reset(); + } + + private static final Log LOG = LogFactory.getLog(TestSnapshotDescriptionUtils.class); + + @Test + public void testValidateMissingTableName() { + Configuration conf = new Configuration(false); + try { + SnapshotDescriptionUtils.validate(SnapshotDescription.newBuilder().setName("fail").build(), + conf); + fail("Snapshot was considered valid without a table name"); + } catch (IllegalArgumentException e) { + LOG.debug("Correctly failed when snapshot doesn't have a tablename"); + } + } + + /** + * Test that we throw an exception if there is no working snapshot directory when we attempt to + * 'complete' the snapshot + * @throws Exception on failure + */ + @Test + public void testCompleteSnapshotWithNoSnapshotDirectoryFailure() throws Exception { + Path snapshotDir = new Path(root, ".snapshot"); + Path tmpDir = new Path(snapshotDir, ".tmp"); + Path workingDir = new Path(tmpDir, "not_a_snapshot"); + assertFalse("Already have working snapshot dir: " + workingDir + + " but shouldn't. Test file leak?", fs.exists(workingDir)); + SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("snapshot").build(); + try { + SnapshotDescriptionUtils.completeSnapshot(snapshot, root, workingDir, fs); + fail("Shouldn't successfully complete move of a non-existent directory."); + } catch (IOException e) { + LOG.info("Correctly failed to move non-existant directory: " + e.getMessage()); + } + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestCopyRecoveredEditsTask.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestCopyRecoveredEditsTask.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestCopyRecoveredEditsTask.java (revision 0) @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +/** + * Test that we correctly copy the recovered edits from a directory + */ +@Category(SmallTests.class) +public class TestCopyRecoveredEditsTask { + + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + @Test + public void testCopyFiles() throws Exception { + + SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("snapshot").build(); + ForeignExceptionDispatcher monitor = Mockito.mock(ForeignExceptionDispatcher.class); + FileSystem fs = UTIL.getTestFileSystem(); + Path root = UTIL.getDataTestDir(); + String regionName = "regionA"; + Path regionDir = new Path(root, regionName); + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, root); + + try { + // doesn't really matter where the region's snapshot directory is, but this is pretty close + Path snapshotRegionDir = new Path(workingDir, regionName); + fs.mkdirs(snapshotRegionDir); + + // put some stuff in the recovered.edits directory + Path edits = HLog.getRegionDirRecoveredEditsDir(regionDir); + fs.mkdirs(edits); + // make a file with some data + Path file1 = new Path(edits, "0000000000000002352"); + FSDataOutputStream out = fs.create(file1); + byte[] data = new byte[] { 1, 2, 3, 4 }; + out.write(data); + out.close(); + // make an empty file + Path empty = new Path(edits, "empty"); + fs.createNewFile(empty); + + CopyRecoveredEditsTask task = new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, + snapshotRegionDir); + CopyRecoveredEditsTask taskSpy = Mockito.spy(task); + taskSpy.call(); + + Path snapshotEdits = HLog.getRegionDirRecoveredEditsDir(snapshotRegionDir); + FileStatus[] snapshotEditFiles = FSUtils.listStatus(fs, snapshotEdits); + assertEquals("Got wrong number of files in the snapshot edits", 1, snapshotEditFiles.length); + FileStatus file = snapshotEditFiles[0]; + assertEquals("Didn't copy expected file", file1.getName(), file.getPath().getName()); + + Mockito.verify(monitor, Mockito.never()).receive(Mockito.any(ForeignException.class)); + Mockito.verify(taskSpy, Mockito.never()).snapshotFailure(Mockito.anyString(), + Mockito.any(Exception.class)); + } finally { + // cleanup the working directory + FSUtils.delete(fs, regionDir, true); + FSUtils.delete(fs, workingDir, true); + } + } + + /** + * Check that we don't get an exception if there is no recovered edits directory to copy + * @throws Exception on failure + */ + @Test + public void testNoEditsDir() throws Exception { + SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName("snapshot").build(); + ForeignExceptionDispatcher monitor = Mockito.mock(ForeignExceptionDispatcher.class); + FileSystem fs = UTIL.getTestFileSystem(); + Path root = UTIL.getDataTestDir(); + String regionName = "regionA"; + Path regionDir = new Path(root, regionName); + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, root); + try { + // doesn't really matter where the region's snapshot directory is, but this is pretty close + Path snapshotRegionDir = new Path(workingDir, regionName); + fs.mkdirs(snapshotRegionDir); + Path regionEdits = HLog.getRegionDirRecoveredEditsDir(regionDir); + assertFalse("Edits dir exists already - it shouldn't", fs.exists(regionEdits)); + + CopyRecoveredEditsTask task = new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, + snapshotRegionDir); + task.call(); + } finally { + // cleanup the working directory + FSUtils.delete(fs, regionDir, true); + FSUtils.delete(fs, workingDir, true); + } + } +} Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestWALReferenceTask.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestWALReferenceTask.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestWALReferenceTask.java (revision 0) @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.ReferenceServerWALsTask; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +/** + * Test that the WAL reference task works as expected + */ +@Category(SmallTests.class) +public class TestWALReferenceTask { + + private static final Log LOG = LogFactory.getLog(TestWALReferenceTask.class); + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + @Test + public void testRun() throws IOException { + Configuration conf = UTIL.getConfiguration(); + FileSystem fs = UTIL.getTestFileSystem(); + // setup the log dir + Path testDir = UTIL.getDataTestDir(); + Set servers = new HashSet(); + Path logDir = new Path(testDir, ".logs"); + Path server1Dir = new Path(logDir, "Server1"); + servers.add(server1Dir.getName()); + Path server2Dir = new Path(logDir, "me.hbase.com,56073,1348618509968"); + servers.add(server2Dir.getName()); + // logs under server 1 + Path log1_1 = new Path(server1Dir, "me.hbase.com%2C56073%2C1348618509968.1348618520536"); + Path log1_2 = new Path(server1Dir, "me.hbase.com%2C56073%2C1348618509968.1234567890123"); + // logs under server 2 + Path log2_1 = new Path(server2Dir, "me.hbase.com%2C56074%2C1348618509998.1348618515589"); + Path log2_2 = new Path(server2Dir, "me.hbase.com%2C56073%2C1348618509968.1234567890123"); + + // create all the log files + fs.createNewFile(log1_1); + fs.createNewFile(log1_2); + fs.createNewFile(log2_1); + fs.createNewFile(log2_2); + + FSUtils.logFileSystemState(fs, testDir, LOG); + FSUtils.setRootDir(conf, testDir); + SnapshotDescription snapshot = SnapshotDescription.newBuilder() + .setName("testWALReferenceSnapshot").build(); + ForeignExceptionDispatcher listener = Mockito.mock(ForeignExceptionDispatcher.class); + + // reference all the files in the first server directory + ReferenceServerWALsTask task = new ReferenceServerWALsTask(snapshot, listener, server1Dir, + conf, fs); + task.call(); + + // reference all the files in the first server directory + task = new ReferenceServerWALsTask(snapshot, listener, server2Dir, conf, fs); + task.call(); + + // verify that we got everything + FSUtils.logFileSystemState(fs, testDir, LOG); + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, testDir); + Path snapshotLogDir = new Path(workingDir, HConstants.HREGION_LOGDIR_NAME); + + // make sure we reference the all the wal files + TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logDir, servers, snapshot, snapshotLogDir); + + // make sure we never got an error + Mockito.verify(listener, Mockito.atLeastOnce()).rethrowException(); + Mockito.verifyNoMoreInteractions(listener); + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreFlushSnapshotFromClient.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreFlushSnapshotFromClient.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestRestoreFlushSnapshotFromClient.java (revision 0) @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.MD5Hash; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test clone/restore snapshots from the client + * + * TODO This is essentially a clone of TestRestoreSnapshotFromClient. This is worth refactoring + * this because there will be a few more flavors of snapshots that need to run these tests. + */ +@Category(LargeTests.class) +public class TestRestoreFlushSnapshotFromClient { + final Log LOG = LogFactory.getLog(getClass()); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private final byte[] FAMILY = Bytes.toBytes("cf"); + + private byte[] snapshotName0; + private byte[] snapshotName1; + private byte[] snapshotName2; + private int snapshot0Rows; + private int snapshot1Rows; + private byte[] tableName; + private HBaseAdmin admin; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().setBoolean("hbase.online.schema.update.enable", true); + TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100); + TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250); + TEST_UTIL.getConfiguration().setInt("hbase.client.retries.number", 6); + TEST_UTIL.getConfiguration().setBoolean( + "hbase.master.enabletable.roundrobin", true); + + // Enable snapshot + TEST_UTIL.getConfiguration().setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); + + TEST_UTIL.startMiniCluster(3); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * Initialize the tests with a table filled with some data + * and two snapshots (snapshotName0, snapshotName1) of different states. + * The tableName, snapshotNames and the number of rows in the snapshot are initialized. + */ + @Before + public void setup() throws Exception { + this.admin = TEST_UTIL.getHBaseAdmin(); + + long tid = System.currentTimeMillis(); + tableName = Bytes.toBytes("testtb-" + tid); + snapshotName0 = Bytes.toBytes("snaptb0-" + tid); + snapshotName1 = Bytes.toBytes("snaptb1-" + tid); + snapshotName2 = Bytes.toBytes("snaptb2-" + tid); + + // create Table and disable it + createTable(tableName, FAMILY); + HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName); + try { + loadData(table, 500, FAMILY); + snapshot0Rows = TEST_UTIL.countRows(table); + LOG.info("=== before snapshot with 500 rows"); + logFSTree(); + + // take a snapshot + admin.snapshot(Bytes.toString(snapshotName0), Bytes.toString(tableName), + SnapshotDescription.Type.FLUSH); + + LOG.info("=== after snapshot with 500 rows"); + logFSTree(); + + // insert more data + loadData(table, 500, FAMILY); + snapshot1Rows = TEST_UTIL.countRows(table); + LOG.info("=== before snapshot with 1000 rows"); + logFSTree(); + + // take a snapshot of the updated table + admin.snapshot(Bytes.toString(snapshotName1), Bytes.toString(tableName), + SnapshotDescription.Type.FLUSH); + LOG.info("=== after snapshot with 1000 rows"); + logFSTree(); + } finally { + table.close(); + } + } + + @After + public void tearDown() throws Exception { + TEST_UTIL.deleteTable(tableName); + admin.deleteSnapshot(snapshotName0); + admin.deleteSnapshot(snapshotName1); + + // Ensure the archiver to be empty + MasterFileSystem mfs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem(); + mfs.getFileSystem().delete( + new Path(mfs.getRootDir(), HConstants.HFILE_ARCHIVE_DIRECTORY), true); + } + + @Test + public void testTakeFlushSnapshot() throws IOException { + // taking happens in setup. + } + + @Test + public void testRestoreSnapshot() throws IOException { + verifyRowCount(tableName, snapshot1Rows); + + // Restore from snapshot-0 + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName0); + logFSTree(); + admin.enableTable(tableName); + LOG.info("=== after restore with 500 row snapshot"); + logFSTree(); + verifyRowCount(tableName, snapshot0Rows); + + // Restore from snapshot-1 + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName1); + admin.enableTable(tableName); + verifyRowCount(tableName, snapshot1Rows); + } + + @Test(expected=SnapshotDoesNotExistException.class) + public void testCloneNonExistentSnapshot() throws IOException, InterruptedException { + String snapshotName = "random-snapshot-" + System.currentTimeMillis(); + String tableName = "random-table-" + System.currentTimeMillis(); + admin.cloneSnapshot(snapshotName, tableName); + } + + @Test + public void testCloneSnapshot() throws IOException, InterruptedException { + byte[] clonedTableName = Bytes.toBytes("clonedtb-" + System.currentTimeMillis()); + testCloneSnapshot(clonedTableName, snapshotName0, snapshot0Rows); + testCloneSnapshot(clonedTableName, snapshotName1, snapshot1Rows); + } + + private void testCloneSnapshot(final byte[] tableName, final byte[] snapshotName, + int snapshotRows) throws IOException, InterruptedException { + // create a new table from snapshot + admin.cloneSnapshot(snapshotName, tableName); + verifyRowCount(tableName, snapshotRows); + + TEST_UTIL.deleteTable(tableName); + } + + @Test + public void testRestoreSnapshotOfCloned() throws IOException, InterruptedException { + byte[] clonedTableName = Bytes.toBytes("clonedtb-" + System.currentTimeMillis()); + admin.cloneSnapshot(snapshotName0, clonedTableName); + verifyRowCount(clonedTableName, snapshot0Rows); + admin.snapshot(Bytes.toString(snapshotName2), Bytes.toString(clonedTableName), SnapshotDescription.Type.FLUSH); + TEST_UTIL.deleteTable(clonedTableName); + + admin.cloneSnapshot(snapshotName2, clonedTableName); + verifyRowCount(clonedTableName, snapshot0Rows); + TEST_UTIL.deleteTable(clonedTableName); + } + + // ========================================================================== + // Helpers + // ========================================================================== + private void createTable(final byte[] tableName, final byte[]... families) throws IOException { + HTableDescriptor htd = new HTableDescriptor(tableName); + for (byte[] family: families) { + HColumnDescriptor hcd = new HColumnDescriptor(family); + htd.addFamily(hcd); + } + byte[][] splitKeys = new byte[16][]; + byte[] hex = Bytes.toBytes("0123456789abcdef"); + for (int i = 0; i < 16; ++i) { + splitKeys[i] = new byte[] { hex[i] }; + } + admin.createTable(htd, splitKeys); + } + + public void loadData(final HTable table, int rows, byte[]... families) throws IOException { + byte[] qualifier = Bytes.toBytes("q"); + table.setAutoFlush(false); + while (rows-- > 0) { + byte[] value = Bytes.add(Bytes.toBytes(System.currentTimeMillis()), Bytes.toBytes(rows)); + byte[] key = Bytes.toBytes(MD5Hash.getMD5AsHex(value)); + Put put = new Put(key); + put.setWriteToWAL(false); + for (byte[] family: families) { + put.add(family, qualifier, value); + } + table.put(put); + } + table.flushCommits(); + } + + private void logFSTree() throws IOException { + MasterFileSystem mfs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem(); + FSUtils.logFileSystemState(mfs.getFileSystem(), mfs.getRootDir(), LOG); + } + + private void verifyRowCount(final byte[] tableName, long expectedRows) throws IOException { + HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName); + assertEquals(expectedRows, TEST_UTIL.countRows(table)); + table.close(); + } +} Index: src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotLogSplitter.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotLogSplitter.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotLogSplitter.java (revision 0) @@ -0,0 +1,176 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.regionserver.wal.HLogKey; +import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.*; +import org.junit.experimental.categories.Category; + +/** + * Test snapshot log splitter + */ +@Category(SmallTests.class) +public class TestSnapshotLogSplitter { + final Log LOG = LogFactory.getLog(getClass()); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private byte[] TEST_QUALIFIER = Bytes.toBytes("q"); + private byte[] TEST_FAMILY = Bytes.toBytes("f"); + + private Configuration conf; + private FileSystem fs; + private Path logFile; + + @Before + public void setup() throws Exception { + conf = TEST_UTIL.getConfiguration(); + fs = FileSystem.get(conf); + logFile = new Path(TEST_UTIL.getDataTestDir(), "test.log"); + writeTestLog(logFile); + } + + @After + public void tearDown() throws Exception { + fs.delete(logFile, false); + } + + @Test + public void testSplitLogs() throws IOException { + Map regionsMap = new TreeMap(Bytes.BYTES_COMPARATOR); + splitTestLogs(getTableName(5), regionsMap); + } + + @Test + public void testSplitLogsOnDifferentTable() throws IOException { + byte[] tableName = getTableName(1); + Map regionsMap = new TreeMap(Bytes.BYTES_COMPARATOR); + for (int j = 0; j < 10; ++j) { + byte[] regionName = getRegionName(tableName, j); + byte[] newRegionName = getNewRegionName(tableName, j); + regionsMap.put(regionName, newRegionName); + } + splitTestLogs(tableName, regionsMap); + } + + /* + * Split and verify test logs for the specified table + */ + private void splitTestLogs(final byte[] tableName, final Map regionsMap) + throws IOException { + Path tableDir = new Path(TEST_UTIL.getDataTestDir(), Bytes.toString(tableName)); + SnapshotLogSplitter logSplitter = new SnapshotLogSplitter(conf, fs, tableDir, + tableName, regionsMap); + try { + logSplitter.splitLog(logFile); + } finally { + logSplitter.close(); + } + verifyRecoverEdits(tableDir, tableName, regionsMap); + } + + /* + * Verify that every logs in the table directory has just the specified table and regions. + */ + private void verifyRecoverEdits(final Path tableDir, final byte[] tableName, + final Map regionsMap) throws IOException { + for (FileStatus regionStatus: FSUtils.listStatus(fs, tableDir)) { + assertTrue(regionStatus.getPath().getName().startsWith(Bytes.toString(tableName))); + Path regionEdits = HLog.getRegionDirRecoveredEditsDir(regionStatus.getPath()); + byte[] regionName = Bytes.toBytes(regionStatus.getPath().getName()); + assertFalse(regionsMap.containsKey(regionName)); + for (FileStatus logStatus: FSUtils.listStatus(fs, regionEdits)) { + HLog.Reader reader = HLog.getReader(fs, logStatus.getPath(), conf); + try { + HLog.Entry entry; + while ((entry = reader.next()) != null) { + HLogKey key = entry.getKey(); + assertArrayEquals(tableName, key.getTablename()); + assertArrayEquals(regionName, key.getEncodedRegionName()); + } + } finally { + reader.close(); + } + } + } + } + + /* + * Write some entries in the log file. + * 7 different tables with name "testtb-%d" + * 10 region per table with name "tableName-region-%d" + * 50 entry with row key "row-%d" + */ + private void writeTestLog(final Path logFile) throws IOException { + fs.mkdirs(logFile.getParent()); + HLog.Writer writer = HLog.createWriter(fs, logFile, conf); + try { + for (int i = 0; i < 7; ++i) { + byte[] tableName = getTableName(i); + for (int j = 0; j < 10; ++j) { + byte[] regionName = getRegionName(tableName, j); + for (int k = 0; k < 50; ++k) { + byte[] rowkey = Bytes.toBytes("row-" + k); + HLogKey key = new HLogKey(regionName, tableName, (long)k, + System.currentTimeMillis(), HConstants.DEFAULT_CLUSTER_ID); + WALEdit edit = new WALEdit(); + edit.add(new KeyValue(rowkey, TEST_FAMILY, TEST_QUALIFIER, rowkey)); + writer.append(new HLog.Entry(key, edit)); + } + } + } + } finally { + writer.close(); + } + } + + private byte[] getTableName(int tableId) { + return Bytes.toBytes("testtb-" + tableId); + } + + private byte[] getRegionName(final byte[] tableName, int regionId) { + return Bytes.toBytes(Bytes.toString(tableName) + "-region-" + regionId); + } + + private byte[] getNewRegionName(final byte[] tableName, int regionId) { + return Bytes.toBytes(Bytes.toString(tableName) + "-new-region-" + regionId); + } +} Index: src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java (revision 0) @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; +import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.Assert; + +/** + * Utilities class for snapshots + */ +public class SnapshotTestingUtils { + + private static final Log LOG = LogFactory.getLog(SnapshotTestingUtils.class); + + /** + * Assert that we don't have any snapshots lists + * @throws IOException if the admin operation fails + */ + public static void assertNoSnapshots(HBaseAdmin admin) throws IOException { + assertEquals("Have some previous snapshots", 0, admin.listSnapshots().size()); + } + + /** + * Make sure that there is only one snapshot returned from the master and its name and table match + * the passed in parameters. + */ + public static void assertOneSnapshotThatMatches(HBaseAdmin admin, HSnapshotDescription snapshot) + throws IOException { + assertOneSnapshotThatMatches(admin, snapshot.getName(), snapshot.getTable()); + } + + /** + * Make sure that there is only one snapshot returned from the master and its name and table match + * the passed in parameters. + */ + public static void assertOneSnapshotThatMatches(HBaseAdmin admin, SnapshotDescription snapshot) + throws IOException { + assertOneSnapshotThatMatches(admin, snapshot.getName(), snapshot.getTable()); + } + + /** + * Make sure that there is only one snapshot returned from the master and its name and table match + * the passed in parameters. + */ + public static List assertOneSnapshotThatMatches(HBaseAdmin admin, + String snapshotName, String tableName) throws IOException { + // list the snapshot + List snapshots = admin.listSnapshots(); + + assertEquals("Should only have 1 snapshot", 1, snapshots.size()); + assertEquals(snapshotName, snapshots.get(0).getName()); + assertEquals(tableName, snapshots.get(0).getTable()); + + return snapshots; + } + + /** + * Make sure that there is only one snapshot returned from the master and its name and table match + * the passed in parameters. + */ + public static List assertOneSnapshotThatMatches(HBaseAdmin admin, + byte[] snapshot, byte[] tableName) throws IOException { + return assertOneSnapshotThatMatches(admin, Bytes.toString(snapshot), Bytes.toString(tableName)); + } + + /** + * Confirm that the snapshot contains references to all the files that should be in the snapshot + */ + public static void confirmSnapshotValid(SnapshotDescription snapshotDescriptor, + byte[] tableName, byte[] testFamily, Path rootDir, HBaseAdmin admin, FileSystem fs, + boolean requireLogs, Path logsDir, Set snapshotServers) throws IOException { + Path snapshotDir = SnapshotDescriptionUtils + .getCompletedSnapshotDir(snapshotDescriptor, rootDir); + assertTrue(fs.exists(snapshotDir)); + Path snapshotinfo = new Path(snapshotDir, SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); + assertTrue(fs.exists(snapshotinfo)); + // check the logs dir + if (requireLogs) { + TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logsDir, snapshotServers, + snapshotDescriptor, new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME)); + } + // check the table info + HTableDescriptor desc = FSTableDescriptors.getTableDescriptor(fs, rootDir, tableName); + HTableDescriptor snapshotDesc = FSTableDescriptors.getTableDescriptor(fs, snapshotDir); + assertEquals(desc, snapshotDesc); + + // check the region snapshot for all the regions + List regions = admin.getTableRegions(tableName); + for (HRegionInfo info : regions) { + String regionName = info.getEncodedName(); + Path regionDir = new Path(snapshotDir, regionName); + HRegionInfo snapshotRegionInfo = HRegion.loadDotRegionInfoFileContent(fs, regionDir); + assertEquals(info, snapshotRegionInfo); + // check to make sure we have the family + Path familyDir = new Path(regionDir, Bytes.toString(testFamily)); + assertTrue("Expected to find: " + familyDir + ", but it doesn't exist", fs.exists(familyDir)); + // make sure we have some files references + assertTrue(fs.listStatus(familyDir).length > 0); + } + } + + /** + * Helper method for testing async snapshot operations. Just waits for the given snapshot to + * complete on the server by repeatedly checking the master. + * @param master running the snapshot + * @param snapshot to check + * @param sleep amount to sleep between checks to see if the snapshot is done + * @throws IOException if the snapshot fails + */ + public static void waitForSnapshotToComplete(HMaster master, HSnapshotDescription snapshot, + long sleep) throws IOException { + boolean done = false; + while (!done) { + done = master.isSnapshotDone(snapshot); + try { + Thread.sleep(sleep); + } catch (InterruptedException e) { + throw new IOException(e); + } + } + } + + public static void cleanupSnapshot(HBaseAdmin admin, byte[] tableName) throws IOException { + SnapshotTestingUtils.cleanupSnapshot(admin, Bytes.toString(tableName)); + } + + public static void cleanupSnapshot(HBaseAdmin admin, String snapshotName) throws IOException { + // delete the taken snapshot + admin.deleteSnapshot(snapshotName); + assertNoSnapshots(admin); + } + + /** + * Expect the snapshot to throw an error when checking if the snapshot is complete + * @param master master to check + * @param snapshot the {@link HSnapshotDescription} request to pass to the master + * @param clazz expected exception from the master + */ + public static void expectSnapshotDoneException(HMaster master, HSnapshotDescription snapshot, + Class clazz) { + try { + boolean res = master.isSnapshotDone(snapshot); + Assert.fail("didn't fail to lookup a snapshot: res=" + res); + } catch (HBaseSnapshotException e) { + assertEquals("Threw wrong snapshot exception!", clazz, e.getClass()); + } catch (Throwable t) { + Assert.fail("Threw an unexpected exception:" + t); + } + } + + /** + * List all the HFiles in the given table + * @param fs FileSystem where the table lives + * @param tableDir directory of the table + * @return array of the current HFiles in the table (could be a zero-length array) + * @throws IOException on unexecpted error reading the FS + */ + public static FileStatus[] listHFiles(final FileSystem fs, Path tableDir) throws IOException { + // setup the filters we will need based on the filesystem + PathFilter regionFilter = new FSUtils.RegionDirFilter(fs); + PathFilter familyFilter = new FSUtils.FamilyDirFilter(fs); + final PathFilter fileFilter = new PathFilter() { + @Override + public boolean accept(Path file) { + try { + return fs.isFile(file); + } catch (IOException e) { + return false; + } + } + }; + + FileStatus[] regionDirs = FSUtils.listStatus(fs, tableDir, regionFilter); + // if no regions, then we are done + if (regionDirs == null || regionDirs.length == 0) return new FileStatus[0]; + + // go through each of the regions, and add al the hfiles under each family + List regionFiles = new ArrayList(regionDirs.length); + for (FileStatus regionDir : regionDirs) { + FileStatus[] fams = FSUtils.listStatus(fs, regionDir.getPath(), familyFilter); + // if no families, then we are done again + if (fams == null || fams.length == 0) continue; + // add all the hfiles under the family + regionFiles.addAll(SnapshotTestingUtils.getHFilesInRegion(fams, fs, fileFilter)); + } + FileStatus[] files = new FileStatus[regionFiles.size()]; + regionFiles.toArray(files); + return files; + } + + /** + * Get all the hfiles in the region, under the passed set of families + * @param families all the family directories under the region + * @param fs filesystem where the families live + * @param fileFilter filter to only include files + * @return collection of all the hfiles under all the passed in families (non-null) + * @throws IOException on unexecpted error reading the FS + */ + public static Collection getHFilesInRegion(FileStatus[] families, FileSystem fs, + PathFilter fileFilter) throws IOException { + Set files = new TreeSet(); + for (FileStatus family : families) { + // get all the hfiles in the family + FileStatus[] hfiles = FSUtils.listStatus(fs, family.getPath(), fileFilter); + // if no hfiles, then we are done with this family + if (hfiles == null || hfiles.length == 0) continue; + files.addAll(Arrays.asList(hfiles)); + } + return files; + } +} Index: src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java (revision 1451296) +++ src/test/java/org/apache/hadoop/hbase/backup/TestHFileArchiving.java (working copy) @@ -360,7 +360,7 @@ try { // Try to archive the file - HFileArchiver.archiveRegion(conf, fs, rootDir, + HFileArchiver.archiveRegion(fs, rootDir, sourceRegionDir.getParent(), sourceRegionDir); // The archiver succeded, the file is no longer in the original location Index: src/test/java/org/apache/hadoop/hbase/util/TestHFileArchiveUtil.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/util/TestHFileArchiveUtil.java (revision 1451296) +++ src/test/java/org/apache/hadoop/hbase/util/TestHFileArchiveUtil.java (working copy) @@ -50,9 +50,10 @@ @Test public void testRegionArchiveDir() { + Configuration conf = null; Path tableDir = new Path("table"); Path regionDir = new Path("region"); - assertNotNull(HFileArchiveUtil.getRegionArchiveDir(null, tableDir, regionDir)); + assertNotNull(HFileArchiveUtil.getRegionArchiveDir(conf, tableDir, regionDir)); } @Test Index: src/test/java/org/apache/hadoop/hbase/util/TestFSVisitor.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/util/TestFSVisitor.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/util/TestFSVisitor.java (revision 0) @@ -0,0 +1,225 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.UUID; +import java.util.Set; +import java.util.HashSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HDFSBlocksDistribution; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.util.MD5Hash; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.*; +import org.junit.experimental.categories.Category; + +/** + * Test {@link FSUtils}. + */ +@Category(MediumTests.class) +public class TestFSVisitor { + final Log LOG = LogFactory.getLog(getClass()); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private final String TABLE_NAME = "testtb"; + + private Set tableFamilies; + private Set tableRegions; + private Set recoveredEdits; + private Set tableHFiles; + private Set regionServers; + private Set serverLogs; + + private FileSystem fs; + private Path tableDir; + private Path logsDir; + private Path rootDir; + + @Before + public void setUp() throws Exception { + fs = FileSystem.get(TEST_UTIL.getConfiguration()); + rootDir = TEST_UTIL.getDataTestDir("hbase"); + logsDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); + + tableFamilies = new HashSet(); + tableRegions = new HashSet(); + recoveredEdits = new HashSet(); + tableHFiles = new HashSet(); + regionServers = new HashSet(); + serverLogs = new HashSet(); + tableDir = createTableFiles(rootDir, TABLE_NAME, tableRegions, tableFamilies, tableHFiles); + createRecoverEdits(tableDir, tableRegions, recoveredEdits); + createLogs(logsDir, regionServers, serverLogs); + FSUtils.logFileSystemState(fs, rootDir, LOG); + } + + @After + public void tearDown() throws Exception { + fs.delete(rootDir); + } + + @Test + public void testVisitStoreFiles() throws IOException { + final Set regions = new HashSet(); + final Set families = new HashSet(); + final Set hfiles = new HashSet(); + FSVisitor.visitTableStoreFiles(fs, tableDir, new FSVisitor.StoreFileVisitor() { + public void storeFile(final String region, final String family, final String hfileName) + throws IOException { + regions.add(region); + families.add(family); + hfiles.add(hfileName); + } + }); + assertEquals(tableRegions, regions); + assertEquals(tableFamilies, families); + assertEquals(tableHFiles, hfiles); + } + + @Test + public void testVisitRecoveredEdits() throws IOException { + final Set regions = new HashSet(); + final Set edits = new HashSet(); + FSVisitor.visitTableRecoveredEdits(fs, tableDir, new FSVisitor.RecoveredEditsVisitor() { + public void recoveredEdits (final String region, final String logfile) + throws IOException { + regions.add(region); + edits.add(logfile); + } + }); + assertEquals(tableRegions, regions); + assertEquals(recoveredEdits, edits); + } + + @Test + public void testVisitLogFiles() throws IOException { + final Set servers = new HashSet(); + final Set logs = new HashSet(); + FSVisitor.visitLogFiles(fs, rootDir, new FSVisitor.LogFileVisitor() { + public void logFile (final String server, final String logfile) throws IOException { + servers.add(server); + logs.add(logfile); + } + }); + assertEquals(regionServers, servers); + assertEquals(serverLogs, logs); + } + + + /* + * |-testtb/ + * |----f1d3ff8443297732862df21dc4e57262/ + * |-------f1/ + * |----------d0be84935ba84b66b1e866752ec5d663 + * |----------9fc9d481718f4878b29aad0a597ecb94 + * |-------f2/ + * |----------4b0fe6068c564737946bcf4fd4ab8ae1 + */ + private Path createTableFiles(final Path rootDir, final String tableName, + final Set tableRegions, final Set tableFamilies, + final Set tableHFiles) throws IOException { + Path tableDir = new Path(rootDir, tableName); + for (int r = 0; r < 10; ++r) { + String regionName = MD5Hash.getMD5AsHex(Bytes.toBytes(r)); + tableRegions.add(regionName); + Path regionDir = new Path(tableDir, regionName); + for (int f = 0; f < 3; ++f) { + String familyName = "f" + f; + tableFamilies.add(familyName); + Path familyDir = new Path(regionDir, familyName); + fs.mkdirs(familyDir); + for (int h = 0; h < 5; ++h) { + String hfileName = UUID.randomUUID().toString().replaceAll("-", ""); + tableHFiles.add(hfileName); + fs.createNewFile(new Path(familyDir, hfileName)); + } + } + } + return tableDir; + } + + /* + * |-testtb/ + * |----f1d3ff8443297732862df21dc4e57262/ + * |-------recovered.edits/ + * |----------0000001351969633479 + * |----------0000001351969633481 + */ + private void createRecoverEdits(final Path tableDir, final Set tableRegions, + final Set recoverEdits) throws IOException { + for (String region: tableRegions) { + Path regionEditsDir = HLog.getRegionDirRecoveredEditsDir(new Path(tableDir, region)); + long seqId = System.currentTimeMillis(); + for (int i = 0; i < 3; ++i) { + String editName = String.format("%019d", seqId + i); + recoverEdits.add(editName); + FSDataOutputStream stream = fs.create(new Path(regionEditsDir, editName)); + stream.write(Bytes.toBytes("test")); + stream.close(); + } + } + } + + /* + * |-.logs/ + * |----server5,5,1351969633508/ + * |-------server5,5,1351969633508.0 + * |----server6,6,1351969633512/ + * |-------server6,6,1351969633512.0 + * |-------server6,6,1351969633512.3 + */ + private void createLogs(final Path logDir, final Set servers, + final Set logs) throws IOException { + for (int s = 0; s < 7; ++s) { + String server = String.format("server%d,%d,%d", s, s, System.currentTimeMillis()); + servers.add(server); + Path serverLogDir = new Path(logDir, server); + fs.mkdirs(serverLogDir); + for (int i = 0; i < 5; ++i) { + String logfile = server + '.' + i; + logs.add(logfile); + FSDataOutputStream stream = fs.create(new Path(serverLogDir, logfile)); + stream.write(Bytes.toBytes("test")); + stream.close(); + } + } + } +} Index: src/test/java/org/apache/hadoop/hbase/client/TestSnapshotsFromAdmin.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/client/TestSnapshotsFromAdmin.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/client/TestSnapshotsFromAdmin.java (revision 0) @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.ipc.HMasterInterface; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +import com.google.protobuf.RpcController; + +/** + * Test snapshot logic from the client + */ +@Category(SmallTests.class) +public class TestSnapshotsFromAdmin { + + private static final Log LOG = LogFactory.getLog(TestSnapshotsFromAdmin.class); + + /** + * Test that the logic for doing 'correct' back-off based on exponential increase and the max-time + * passed from the server ensures the correct overall waiting for the snapshot to finish. + * @throws Exception + */ + @Test(timeout = 10000) + public void testBackoffLogic() throws Exception { + final int maxWaitTime = 7500; + final int numRetries = 10; + final int pauseTime = 500; + // calculate the wait time, if we just do straight backoff (ignoring the expected time from + // master) + long ignoreExpectedTime = 0; + for (int i = 0; i < 6; i++) { + ignoreExpectedTime += HConstants.RETRY_BACKOFF[i] * pauseTime; + } + // the correct wait time, capping at the maxTime/tries + fudge room + final long time = pauseTime * 3 + ((maxWaitTime / numRetries) * 3) + 300; + assertTrue("Capped snapshot wait time isn't less that the uncapped backoff time " + + "- further testing won't prove anything.", time < ignoreExpectedTime); + + // setup the mocks + HConnectionManager.HConnectionImplementation mockConnection = Mockito + .mock(HConnectionManager.HConnectionImplementation.class); + Configuration conf = HBaseConfiguration.create(); + // setup the conf to match the expected properties + conf.setInt("hbase.client.retries.number", numRetries); + conf.setLong("hbase.client.pause", pauseTime); + // mock the master admin to our mock + HMasterInterface mockMaster = Mockito.mock(HMasterInterface.class); + Mockito.when(mockConnection.getConfiguration()).thenReturn(conf); + Mockito.when(mockConnection.getMaster()).thenReturn(mockMaster); + // set the max wait time for the snapshot to complete + Mockito + .when( + mockMaster.snapshot( + Mockito.any(HSnapshotDescription.class))).thenReturn((long)maxWaitTime); + // first five times, we return false, last we get success + Mockito.when( + mockMaster.isSnapshotDone( + Mockito.any(HSnapshotDescription.class))).thenReturn(false, false, + false, false, false, true); + + // setup the admin and run the test + HBaseAdmin admin = new HBaseAdmin(mockConnection); + String snapshot = "snasphot"; + String table = "table"; + // get start time + long start = System.currentTimeMillis(); + admin.snapshot(snapshot, table); + long finish = System.currentTimeMillis(); + long elapsed = (finish - start); + assertTrue("Elapsed time:" + elapsed + " is more than expected max:" + time, elapsed <= time); + } + + /** + * Make sure that we validate the snapshot name and the table name before we pass anything across + * the wire + * @throws IOException on failure + */ + @Test + public void testValidateSnapshotName() throws IOException { + HConnectionManager.HConnectionImplementation mockConnection = Mockito + .mock(HConnectionManager.HConnectionImplementation.class); + Configuration conf = HBaseConfiguration.create(); + Mockito.when(mockConnection.getConfiguration()).thenReturn(conf); + HBaseAdmin admin = new HBaseAdmin(mockConnection); + SnapshotDescription.Builder builder = SnapshotDescription.newBuilder(); + // check that invalid snapshot names fail + failSnapshotStart(admin, builder.setName(".snapshot").build()); + failSnapshotStart(admin, builder.setName("-snapshot").build()); + failSnapshotStart(admin, builder.setName("snapshot fails").build()); + failSnapshotStart(admin, builder.setName("snap$hot").build()); + // check the table name also get verified + failSnapshotStart(admin, builder.setName("snapshot").setTable(".table").build()); + failSnapshotStart(admin, builder.setName("snapshot").setTable("-table").build()); + failSnapshotStart(admin, builder.setName("snapshot").setTable("table fails").build()); + failSnapshotStart(admin, builder.setName("snapshot").setTable("tab%le").build()); + } + + private void failSnapshotStart(HBaseAdmin admin, SnapshotDescription snapshot) throws IOException { + try { + admin.snapshot(snapshot); + fail("Snapshot should not have succeed with name:" + snapshot.getName()); + } catch (IllegalArgumentException e) { + LOG.debug("Correctly failed to start snapshot:" + e.getMessage()); + } + } +} Index: src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromClient.java (revision 0) @@ -0,0 +1,231 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test create/using/deleting snapshots from the client + *

+ * This is an end-to-end test for the snapshot utility + */ +@Category(LargeTests.class) +public class TestSnapshotFromClient { + private static final Log LOG = LogFactory.getLog(TestSnapshotFromClient.class); + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final int NUM_RS = 2; + private static final String STRING_TABLE_NAME = "test"; + private static final byte[] TEST_FAM = Bytes.toBytes("fam"); + private static final byte[] TABLE_NAME = Bytes.toBytes(STRING_TABLE_NAME); + + /** + * Setup the config for the cluster + * @throws Exception on failure + */ + @BeforeClass + public static void setupCluster() throws Exception { + setupConf(UTIL.getConfiguration()); + UTIL.startMiniCluster(NUM_RS); + } + + private static void setupConf(Configuration conf) { + // disable the ui + conf.setInt("hbase.regionsever.info.port", -1); + // change the flush size to a small amount, regulating number of store files + conf.setInt("hbase.hregion.memstore.flush.size", 25000); + // so make sure we get a compaction when doing a load, but keep around some + // files in the store + conf.setInt("hbase.hstore.compaction.min", 10); + conf.setInt("hbase.hstore.compactionThreshold", 10); + // block writes if we get to 12 store files + conf.setInt("hbase.hstore.blockingStoreFiles", 12); + // drop the number of attempts for the hbase admin + conf.setInt("hbase.client.retries.number", 1); + // Enable snapshot + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); + // prevent aggressive region split + conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, + ConstantSizeRegionSplitPolicy.class.getName()); + } + + @Before + public void setup() throws Exception { + UTIL.createTable(TABLE_NAME, TEST_FAM); + } + + @After + public void tearDown() throws Exception { + UTIL.deleteTable(TABLE_NAME); + // and cleanup the archive directory + try { + UTIL.getTestFileSystem().delete(new Path(UTIL.getDefaultRootDirPath(), ".archive"), true); + } catch (IOException e) { + LOG.warn("Failure to delete archive directory", e); + } + } + + @AfterClass + public static void cleanupTest() throws Exception { + try { + UTIL.shutdownMiniCluster(); + } catch (Exception e) { + LOG.warn("failure shutting down cluster", e); + } + } + + /** + * Test snapshotting not allowed .META. and -ROOT- + * @throws Exception + */ + @Test + public void testMetaTablesSnapshot() throws Exception { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + byte[] snapshotName = Bytes.toBytes("metaSnapshot"); + + try { + admin.snapshot(snapshotName, HConstants.META_TABLE_NAME); + fail("taking a snapshot of .META. should not be allowed"); + } catch (IllegalArgumentException e) { + // expected + } + + try { + admin.snapshot(snapshotName, HConstants.ROOT_TABLE_NAME); + fail("taking a snapshot of -ROOT- should not be allowed"); + } catch (IllegalArgumentException e) { + // expected + } + } + + /** + * Test snapshotting a table that is offline + * @throws Exception + */ + @Test + public void testOfflineTableSnapshot() throws Exception { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + + // put some stuff in the table + HTable table = new HTable(UTIL.getConfiguration(), TABLE_NAME); + UTIL.loadTable(table, TEST_FAM); + + // get the name of all the regionservers hosting the snapshotted table + Set snapshotServers = new HashSet(); + List servers = UTIL.getMiniHBaseCluster().getLiveRegionServerThreads(); + for (RegionServerThread server : servers) { + if (server.getRegionServer().getOnlineRegions(TABLE_NAME).size() > 0) { + snapshotServers.add(server.getRegionServer().getServerName().toString()); + } + } + + LOG.debug("FS state before disable:"); + FSUtils.logFileSystemState(UTIL.getTestFileSystem(), + FSUtils.getRootDir(UTIL.getConfiguration()), LOG); + // XXX if this is flakey, might want to consider using the async version and looping as + // disableTable can succeed and still timeout. + admin.disableTable(TABLE_NAME); + + LOG.debug("FS state before snapshot:"); + FSUtils.logFileSystemState(UTIL.getTestFileSystem(), + FSUtils.getRootDir(UTIL.getConfiguration()), LOG); + + // take a snapshot of the disabled table + byte[] snapshot = Bytes.toBytes("offlineTableSnapshot"); + admin.snapshot(snapshot, TABLE_NAME); + LOG.debug("Snapshot completed."); + + // make sure we have the snapshot + List snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, + snapshot, TABLE_NAME); + + // make sure its a valid snapshot + FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); + Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); + LOG.debug("FS state after snapshot:"); + FSUtils.logFileSystemState(UTIL.getTestFileSystem(), + FSUtils.getRootDir(UTIL.getConfiguration()), LOG); + + SnapshotTestingUtils.confirmSnapshotValid(snapshots.get(0), TABLE_NAME, TEST_FAM, rootDir, + admin, fs, false, new Path(rootDir, HConstants.HREGION_LOGDIR_NAME), snapshotServers); + + admin.deleteSnapshot(snapshot); + snapshots = admin.listSnapshots(); + SnapshotTestingUtils.assertNoSnapshots(admin); + } + + @Test + public void testSnapshotFailsOnNonExistantTable() throws Exception { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + String tableName = "_not_a_table"; + + // make sure the table doesn't exist + boolean fail = false; + do { + try { + admin.getTableDescriptor(Bytes.toBytes(tableName)); + fail = true; + LOG.error("Table:" + tableName + " already exists, checking a new name"); + tableName = tableName+"!"; + } catch (TableNotFoundException e) { + fail = false; + } + } while (fail); + + // snapshot the non-existant table + try { + admin.snapshot("fail", tableName); + fail("Snapshot succeeded even though there is not table."); + } catch (SnapshotCreationException e) { + LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage()); + } + } +} Index: src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java (revision 0) @@ -0,0 +1,391 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.LargeTests; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.MD5Hash; +import org.junit.*; +import org.junit.experimental.categories.Category; + +/** + * Test clone/restore snapshots from the client + */ +@Category(LargeTests.class) +public class TestRestoreSnapshotFromClient { + final Log LOG = LogFactory.getLog(getClass()); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private final byte[] FAMILY = Bytes.toBytes("cf"); + + private byte[] emptySnapshot; + private byte[] snapshotName0; + private byte[] snapshotName1; + private byte[] snapshotName2; + private int snapshot0Rows; + private int snapshot1Rows; + private byte[] tableName; + private HBaseAdmin admin; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.getConfiguration().setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); + TEST_UTIL.getConfiguration().setBoolean("hbase.online.schema.update.enable", true); + TEST_UTIL.getConfiguration().setInt("hbase.hstore.compactionThreshold", 10); + TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100); + TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250); + TEST_UTIL.getConfiguration().setInt("hbase.client.retries.number", 6); + TEST_UTIL.getConfiguration().setBoolean( + "hbase.master.enabletable.roundrobin", true); + TEST_UTIL.startMiniCluster(3); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * Initialize the tests with a table filled with some data + * and two snapshots (snapshotName0, snapshotName1) of different states. + * The tableName, snapshotNames and the number of rows in the snapshot are initialized. + */ + @Before + public void setup() throws Exception { + this.admin = TEST_UTIL.getHBaseAdmin(); + + long tid = System.currentTimeMillis(); + tableName = Bytes.toBytes("testtb-" + tid); + emptySnapshot = Bytes.toBytes("emptySnaptb-" + tid); + snapshotName0 = Bytes.toBytes("snaptb0-" + tid); + snapshotName1 = Bytes.toBytes("snaptb1-" + tid); + snapshotName2 = Bytes.toBytes("snaptb2-" + tid); + + // create Table and disable it + createTable(tableName, FAMILY); + admin.disableTable(tableName); + + // take an empty snapshot + admin.snapshot(emptySnapshot, tableName); + + HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName); + try { + // enable table and insert data + admin.enableTable(tableName); + loadData(table, 500, FAMILY); + snapshot0Rows = TEST_UTIL.countRows(table); + admin.disableTable(tableName); + + // take a snapshot + admin.snapshot(snapshotName0, tableName); + + // enable table and insert more data + admin.enableTable(tableName); + loadData(table, 500, FAMILY); + snapshot1Rows = TEST_UTIL.countRows(table); + admin.disableTable(tableName); + + // take a snapshot of the updated table + admin.snapshot(snapshotName1, tableName); + + // re-enable table + admin.enableTable(tableName); + } finally { + table.close(); + } + } + + @After + public void tearDown() throws Exception { + if (admin.tableExists(tableName)) { + TEST_UTIL.deleteTable(tableName); + } + admin.deleteSnapshot(snapshotName0); + admin.deleteSnapshot(snapshotName1); + + // Ensure the archiver to be empty + MasterFileSystem mfs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem(); + mfs.getFileSystem().delete( + new Path(mfs.getRootDir(), HConstants.HFILE_ARCHIVE_DIRECTORY), true); + } + + @Test + public void testRestoreSnapshot() throws IOException { + verifyRowCount(tableName, snapshot1Rows); + + // Restore from snapshot-0 + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName0); + admin.enableTable(tableName); + verifyRowCount(tableName, snapshot0Rows); + + // Restore from emptySnapshot + admin.disableTable(tableName); + admin.restoreSnapshot(emptySnapshot); + admin.enableTable(tableName); + verifyRowCount(tableName, 0); + + // Restore from snapshot-1 + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName1); + admin.enableTable(tableName); + verifyRowCount(tableName, snapshot1Rows); + } + + @Test + public void testRestoreSchemaChange() throws IOException { + byte[] TEST_FAMILY2 = Bytes.toBytes("cf2"); + + HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName); + + // Add one column family and put some data in it + admin.disableTable(tableName); + admin.addColumn(tableName, new HColumnDescriptor(TEST_FAMILY2)); + admin.enableTable(tableName); + assertEquals(2, table.getTableDescriptor().getFamilies().size()); + HTableDescriptor htd = admin.getTableDescriptor(tableName); + assertEquals(2, htd.getFamilies().size()); + loadData(table, 500, TEST_FAMILY2); + long snapshot2Rows = snapshot1Rows + 500; + assertEquals(snapshot2Rows, TEST_UTIL.countRows(table)); + assertEquals(500, TEST_UTIL.countRows(table, TEST_FAMILY2)); + Set fsFamilies = getFamiliesFromFS(tableName); + assertEquals(2, fsFamilies.size()); + table.close(); + + // Take a snapshot + admin.disableTable(tableName); + admin.snapshot(snapshotName2, tableName); + + // Restore the snapshot (without the cf) + admin.restoreSnapshot(snapshotName0); + assertEquals(1, table.getTableDescriptor().getFamilies().size()); + admin.enableTable(tableName); + try { + TEST_UTIL.countRows(table, TEST_FAMILY2); + fail("family '" + Bytes.toString(TEST_FAMILY2) + "' should not exists"); + } catch (NoSuchColumnFamilyException e) { + // expected + } + assertEquals(snapshot0Rows, TEST_UTIL.countRows(table)); + htd = admin.getTableDescriptor(tableName); + assertEquals(1, htd.getFamilies().size()); + fsFamilies = getFamiliesFromFS(tableName); + assertEquals(1, fsFamilies.size()); + table.close(); + + // Restore back the snapshot (with the cf) + admin.disableTable(tableName); + admin.restoreSnapshot(snapshotName2); + admin.enableTable(tableName); + htd = admin.getTableDescriptor(tableName); + assertEquals(2, htd.getFamilies().size()); + assertEquals(2, table.getTableDescriptor().getFamilies().size()); + assertEquals(500, TEST_UTIL.countRows(table, TEST_FAMILY2)); + assertEquals(snapshot2Rows, TEST_UTIL.countRows(table)); + fsFamilies = getFamiliesFromFS(tableName); + assertEquals(2, fsFamilies.size()); + table.close(); + } + + @Test(expected=SnapshotDoesNotExistException.class) + public void testCloneNonExistentSnapshot() throws IOException, InterruptedException { + String snapshotName = "random-snapshot-" + System.currentTimeMillis(); + String tableName = "random-table-" + System.currentTimeMillis(); + admin.cloneSnapshot(snapshotName, tableName); + } + + @Test + public void testCloneSnapshot() throws IOException, InterruptedException { + byte[] clonedTableName = Bytes.toBytes("clonedtb-" + System.currentTimeMillis()); + testCloneSnapshot(clonedTableName, snapshotName0, snapshot0Rows); + testCloneSnapshot(clonedTableName, snapshotName1, snapshot1Rows); + testCloneSnapshot(clonedTableName, emptySnapshot, 0); + } + + private void testCloneSnapshot(final byte[] tableName, final byte[] snapshotName, + int snapshotRows) throws IOException, InterruptedException { + // create a new table from snapshot + admin.cloneSnapshot(snapshotName, tableName); + verifyRowCount(tableName, snapshotRows); + + admin.disableTable(tableName); + admin.deleteTable(tableName); + } + + @Test + public void testRestoreSnapshotOfCloned() throws IOException, InterruptedException { + byte[] clonedTableName = Bytes.toBytes("clonedtb-" + System.currentTimeMillis()); + admin.cloneSnapshot(snapshotName0, clonedTableName); + verifyRowCount(clonedTableName, snapshot0Rows); + admin.disableTable(clonedTableName); + admin.snapshot(snapshotName2, clonedTableName); + admin.deleteTable(clonedTableName); + waitCleanerRun(); + + admin.cloneSnapshot(snapshotName2, clonedTableName); + verifyRowCount(clonedTableName, snapshot0Rows); + admin.disableTable(clonedTableName); + admin.deleteTable(clonedTableName); + } + + /** + * Verify that tables created from the snapshot are still alive after source table deletion. + */ + @Test + public void testCloneLinksAfterDelete() throws IOException, InterruptedException { + // Clone a table from the first snapshot + byte[] clonedTableName = Bytes.toBytes("clonedtb1-" + System.currentTimeMillis()); + admin.cloneSnapshot(snapshotName0, clonedTableName); + verifyRowCount(clonedTableName, snapshot0Rows); + + // Take a snapshot of this cloned table. + admin.disableTable(clonedTableName); + admin.snapshot(snapshotName2, clonedTableName); + + // Clone the snapshot of the cloned table + byte[] clonedTableName2 = Bytes.toBytes("clonedtb2-" + System.currentTimeMillis()); + admin.cloneSnapshot(snapshotName2, clonedTableName2); + verifyRowCount(clonedTableName2, snapshot0Rows); + admin.disableTable(clonedTableName2); + + // Remove the original table + admin.disableTable(tableName); + admin.deleteTable(tableName); + waitCleanerRun(); + + // Verify the first cloned table + admin.enableTable(clonedTableName); + verifyRowCount(clonedTableName, snapshot0Rows); + + // Verify the second cloned table + admin.enableTable(clonedTableName2); + verifyRowCount(clonedTableName2, snapshot0Rows); + admin.disableTable(clonedTableName2); + + // Delete the first cloned table + admin.disableTable(clonedTableName); + admin.deleteTable(clonedTableName); + waitCleanerRun(); + + // Verify the second cloned table + admin.enableTable(clonedTableName2); + verifyRowCount(clonedTableName2, snapshot0Rows); + + // Clone a new table from cloned + byte[] clonedTableName3 = Bytes.toBytes("clonedtb3-" + System.currentTimeMillis()); + admin.cloneSnapshot(snapshotName2, clonedTableName3); + verifyRowCount(clonedTableName3, snapshot0Rows); + + // Delete the cloned tables + admin.disableTable(clonedTableName2); + admin.deleteTable(clonedTableName2); + admin.disableTable(clonedTableName3); + admin.deleteTable(clonedTableName3); + admin.deleteSnapshot(snapshotName2); + } + + // ========================================================================== + // Helpers + // ========================================================================== + private void createTable(final byte[] tableName, final byte[]... families) throws IOException { + HTableDescriptor htd = new HTableDescriptor(tableName); + for (byte[] family: families) { + HColumnDescriptor hcd = new HColumnDescriptor(family); + htd.addFamily(hcd); + } + byte[][] splitKeys = new byte[16][]; + byte[] hex = Bytes.toBytes("0123456789abcdef"); + for (int i = 0; i < 16; ++i) { + splitKeys[i] = new byte[] { hex[i] }; + } + admin.createTable(htd, splitKeys); + } + + public void loadData(final HTable table, int rows, byte[]... families) throws IOException { + byte[] qualifier = Bytes.toBytes("q"); + table.setAutoFlush(false); + while (rows-- > 0) { + byte[] value = Bytes.add(Bytes.toBytes(System.currentTimeMillis()), Bytes.toBytes(rows)); + byte[] key = Bytes.toBytes(MD5Hash.getMD5AsHex(value)); + Put put = new Put(key); + put.setWriteToWAL(false); + for (byte[] family: families) { + put.add(family, qualifier, value); + } + table.put(put); + } + table.flushCommits(); + } + + private void waitCleanerRun() throws InterruptedException { + TEST_UTIL.getMiniHBaseCluster().getMaster().getHFileCleaner().choreForTesting(); + } + + private Set getFamiliesFromFS(final byte[] tableName) throws IOException { + MasterFileSystem mfs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem(); + Set families = new HashSet(); + Path tableDir = HTableDescriptor.getTableDir(mfs.getRootDir(), tableName); + for (Path regionDir: FSUtils.getRegionDirs(mfs.getFileSystem(), tableDir)) { + for (Path familyDir: FSUtils.getFamilyDirs(mfs.getFileSystem(), regionDir)) { + families.add(familyDir.getName()); + } + } + return families; + } + + private void verifyRowCount(final byte[] tableName, long expectedRows) throws IOException { + HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName); + assertEquals(expectedRows, TEST_UTIL.countRows(table)); + table.close(); + } +} Index: src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromAdmin.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromAdmin.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/client/TestSnapshotFromAdmin.java (revision 0) @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.ipc.HMasterInterface; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +import com.google.protobuf.RpcController; + +/** + * Test snapshot logic from the client + */ +@Category(SmallTests.class) +public class TestSnapshotFromAdmin { + + private static final Log LOG = LogFactory.getLog(TestSnapshotFromAdmin.class); + + /** + * Test that the logic for doing 'correct' back-off based on exponential increase and the max-time + * passed from the server ensures the correct overall waiting for the snapshot to finish. + * @throws Exception + */ + @Test(timeout = 10000) + public void testBackoffLogic() throws Exception { + final int maxWaitTime = 7500; + final int numRetries = 10; + final int pauseTime = 500; + // calculate the wait time, if we just do straight backoff (ignoring the expected time from + // master) + long ignoreExpectedTime = 0; + for (int i = 0; i < 6; i++) { + ignoreExpectedTime += HConstants.RETRY_BACKOFF[i] * pauseTime; + } + // the correct wait time, capping at the maxTime/tries + fudge room + final long time = pauseTime * 3 + ((maxWaitTime / numRetries) * 3) + 300; + assertTrue("Capped snapshot wait time isn't less that the uncapped backoff time " + + "- further testing won't prove anything.", time < ignoreExpectedTime); + + // setup the mocks + HConnectionManager.HConnectionImplementation mockConnection = Mockito + .mock(HConnectionManager.HConnectionImplementation.class); + Configuration conf = HBaseConfiguration.create(); + // setup the conf to match the expected properties + conf.setInt("hbase.client.retries.number", numRetries); + conf.setLong("hbase.client.pause", pauseTime); + // mock the master admin to our mock + HMasterInterface mockMaster = Mockito.mock(HMasterInterface.class); + Mockito.when(mockConnection.getConfiguration()).thenReturn(conf); + Mockito.when(mockConnection.getMaster()).thenReturn(mockMaster); + // set the max wait time for the snapshot to complete + Mockito + .when( + mockMaster.snapshot( + Mockito.any(HSnapshotDescription.class))).thenReturn((long)maxWaitTime); + + // first five times, we return false, last we get success + Mockito.when( + mockMaster.isSnapshotDone( + Mockito.any(HSnapshotDescription.class))).thenReturn(false, false, + false, false, false, true); + + // setup the admin and run the test + HBaseAdmin admin = new HBaseAdmin(mockConnection); + String snapshot = "snapshot"; + String table = "table"; + // get start time + long start = System.currentTimeMillis(); + admin.snapshot(snapshot, table); + long finish = System.currentTimeMillis(); + long elapsed = (finish - start); + assertTrue("Elapsed time:" + elapsed + " is more than expected max:" + time, elapsed <= time); + admin.close(); + } + + /** + * Make sure that we validate the snapshot name and the table name before we pass anything across + * the wire + * @throws Exception on failure + */ + @Test + public void testValidateSnapshotName() throws Exception { + HConnectionManager.HConnectionImplementation mockConnection = Mockito + .mock(HConnectionManager.HConnectionImplementation.class); + Configuration conf = HBaseConfiguration.create(); + Mockito.when(mockConnection.getConfiguration()).thenReturn(conf); + HBaseAdmin admin = new HBaseAdmin(mockConnection); + SnapshotDescription.Builder builder = SnapshotDescription.newBuilder(); + // check that invalid snapshot names fail + failSnapshotStart(admin, builder.setName(".snapshot").build()); + failSnapshotStart(admin, builder.setName("-snapshot").build()); + failSnapshotStart(admin, builder.setName("snapshot fails").build()); + failSnapshotStart(admin, builder.setName("snap$hot").build()); + // check the table name also get verified + failSnapshotStart(admin, builder.setName("snapshot").setTable(".table").build()); + failSnapshotStart(admin, builder.setName("snapshot").setTable("-table").build()); + failSnapshotStart(admin, builder.setName("snapshot").setTable("table fails").build()); + failSnapshotStart(admin, builder.setName("snapshot").setTable("tab%le").build()); + + // mock the master connection + HMasterInterface master = Mockito.mock(HMasterInterface.class); + Mockito.when(mockConnection.getMaster()).thenReturn(master); + + Mockito.when( + master.snapshot(Mockito.any(HSnapshotDescription.class))).thenReturn((long)0); + Mockito.when( + master.isSnapshotDone( + Mockito.any(HSnapshotDescription.class))).thenReturn(true); + + // make sure that we can use valid names + admin.snapshot(builder.setName("snapshot").setTable("table").build()); + } + + private void failSnapshotStart(HBaseAdmin admin, SnapshotDescription snapshot) throws IOException { + try { + admin.snapshot(snapshot); + fail("Snapshot should not have succeed with name:" + snapshot.getName()); + } catch (IllegalArgumentException e) { + LOG.debug("Correctly failed to start snapshot:" + e.getMessage()); + } + } +} Index: src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 1451296) +++ src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -1089,6 +1089,20 @@ return count; } + public int countRows(final HTable table, final byte[]... families) throws IOException { + Scan scan = new Scan(); + for (byte[] family: families) { + scan.addFamily(family); + } + ResultScanner results = table.getScanner(scan); + int count = 0; + for (@SuppressWarnings("unused") Result res : results) { + count++; + } + results.close(); + return count; + } + /** * Return an md5 digest of the entire contents of a table. */ Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (revision 1451296) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java (working copy) @@ -28,16 +28,22 @@ import java.util.List; import java.util.Map; import java.util.TreeSet; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.io.HalfStoreFileReader; +import org.apache.hadoop.hbase.io.Reference; import org.apache.hadoop.hbase.io.Reference.Range; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.BlockCache; @@ -53,6 +59,7 @@ import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; +import org.apache.hadoop.hbase.util.FSUtils; import org.junit.experimental.categories.Category; import org.mockito.Mockito; @@ -92,8 +99,8 @@ * @throws Exception */ public void testBasicHalfMapFile() throws Exception { - // Make up a directory hierarchy that has a regiondir and familyname. - Path outputDir = new Path(new Path(this.testDir, "regionname"), + // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. + Path outputDir = new Path(new Path(this.testDir, "7e0102"), "familyname"); StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 2 * 1024) @@ -107,6 +114,10 @@ private void writeStoreFile(final StoreFile.Writer writer) throws IOException { writeStoreFile(writer, Bytes.toBytes(getName()), Bytes.toBytes(getName())); } + + // pick an split point (roughly halfway) + byte[] SPLITKEY = new byte[] { (LAST_CHAR-FIRST_CHAR)/2, FIRST_CHAR}; + /* * Writes HStoreKey and ImmutableBytes data to passed writer and * then closes it. @@ -135,12 +146,12 @@ */ public void testReference() throws IOException { - Path storedir = new Path(new Path(this.testDir, "regionname"), "familyname"); - Path dir = new Path(storedir, "1234567890"); + // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. + Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname"); // Make a store file and write data to it. StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 8 * 1024) - .withOutputDir(dir) + .withOutputDir(storedir) .build(); writeStoreFile(writer); StoreFile hsf = new StoreFile(this.fs, writer.getPath(), conf, cacheConf, @@ -154,7 +165,7 @@ kv = KeyValue.createKeyValueFromKey(reader.getLastKey()); byte [] finalRow = kv.getRow(); // Make a reference - Path refPath = StoreFile.split(fs, dir, hsf, midRow, Range.top); + Path refPath = StoreFile.split(fs, storedir, hsf, midRow, Range.top); StoreFile refHsf = new StoreFile(this.fs, refPath, conf, cacheConf, StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); // Now confirm that I can read from the reference and that it only gets @@ -171,6 +182,147 @@ assertTrue(Bytes.equals(kv.getRow(), finalRow)); } + public void testHFileLink() throws IOException { + final String columnFamily = "f"; + + Configuration testConf = new Configuration(this.conf); + FSUtils.setRootDir(testConf, this.testDir); + + HRegionInfo hri = new HRegionInfo(Bytes.toBytes("table-link")); + Path storedir = new Path(new Path(this.testDir, + new Path(hri.getTableNameAsString(), hri.getEncodedName())), columnFamily); + + // Make a store file and write data to it. + StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, + this.fs, 8 * 1024) + .withOutputDir(storedir) + .build(); + Path storeFilePath = writer.getPath(); + writeStoreFile(writer); + writer.close(); + + Path dstPath = new Path(this.testDir, new Path("test-region", columnFamily)); + HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName()); + Path linkFilePath = new Path(dstPath, + HFileLink.createHFileLinkName(hri, storeFilePath.getName())); + + // Try to open store file from link + StoreFile hsf = new StoreFile(this.fs, linkFilePath, testConf, cacheConf, + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); + assertTrue(hsf.isLink()); + + // Now confirm that I can read from the link + int count = 1; + HFileScanner s = hsf.createReader().getScanner(false, false); + s.seekTo(); + while (s.next()) { + count++; + } + assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); + } + + /** + * Validate that we can handle valid tables with '.', '_', and '-' chars. + */ + public void testStoreFileNames() { + String[] legalHFileLink = { "MyTable_02=abc012-def345", "MyTable_02.300=abc012-def345", + "MyTable_02-400=abc012-def345", "MyTable_02-400.200=abc012-def345", + "MyTable_02=abc012-def345_SeqId_1_", "MyTable_02=abc012-def345_SeqId_20_" }; + for (String name: legalHFileLink) { + assertTrue("should be a valid link: " + name, HFileLink.isHFileLink(name)); + assertTrue("should be a valid StoreFile" + name, StoreFile.validateStoreFileName(name)); + assertFalse("should not be a valid reference: " + name, StoreFile.isReference(name)); + + String refName = name + ".6789"; + assertTrue("should be a valid link reference: " + refName, StoreFile.isReference(refName)); + assertTrue("should be a valid StoreFile" + refName, StoreFile.validateStoreFileName(refName)); + } + + String[] illegalHFileLink = { ".MyTable_02=abc012-def345", "-MyTable_02.300=abc012-def345", + "MyTable_02-400=abc0_12-def345", "MyTable_02-400.200=abc012-def345...." }; + for (String name: illegalHFileLink) { + assertFalse("should not be a valid link: " + name, HFileLink.isHFileLink(name)); + } + } + + /** + * This test creates an hfile and then the dir structures and files to verify that references + * to hfilelinks (created by snapshot clones) can be properly interpreted. + */ + public void testReferenceToHFileLink() throws IOException { + final String columnFamily = "f"; + + Path rootDir = FSUtils.getRootDir(conf); + + String tablename = "_original-evil-name"; // adding legal table name chars to verify regex handles it. + HRegionInfo hri = new HRegionInfo(Bytes.toBytes(tablename)); + // store dir = /// + Path storedir = new Path(new Path(rootDir, + new Path(hri.getTableNameAsString(), hri.getEncodedName())), columnFamily); + + // Make a store file and write data to it. //// + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, + this.fs, 8 * 1024) + .withOutputDir(storedir) + .build(); + Path storeFilePath = writer.getPath(); + writeStoreFile(writer); + writer.close(); + + // create link to store file. /clone/region//-- + String target = "clone"; + Path dstPath = new Path(rootDir, new Path(new Path(target, "7e0102"), columnFamily)); + HFileLink.create(conf, this.fs, dstPath, hri, storeFilePath.getName()); + Path linkFilePath = new Path(dstPath, + HFileLink.createHFileLinkName(hri, storeFilePath.getName())); + + // create splits of the link. + // /clone/splitA//, + // /clone/splitB// + Path splitDirA = new Path(new Path(rootDir, + new Path(target, "571A")), columnFamily); + Path splitDirB = new Path(new Path(rootDir, + new Path(target, "571B")), columnFamily); + StoreFile f = new StoreFile(fs, linkFilePath, conf, cacheConf, BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE); + byte[] splitRow = SPLITKEY; + Path pathA = StoreFile.split(fs, splitDirA, f, splitRow, Range.top); // top + Path pathB = StoreFile.split(fs, splitDirB, f, splitRow, Range.bottom); // bottom + + // OK test the thing + FSUtils.logFileSystemState(fs, rootDir, LOG); + + // There is a case where a file with the hfilelink pattern is actually a daughter + // reference to a hfile link. This code in StoreFile that handles this case. + + // Try to open store file from link + StoreFile hsfA = new StoreFile(this.fs, pathA, conf, cacheConf, + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); + + // Now confirm that I can read from the ref to link + int count = 1; + HFileScanner s = hsfA.createReader().getScanner(false, false); + s.seekTo(); + while (s.next()) { + count++; + } + assertTrue(count > 0); // read some rows here + + // Try to open store file from link + StoreFile hsfB = new StoreFile(this.fs, pathB, conf, cacheConf, + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); + + // Now confirm that I can read from the ref to link + HFileScanner sB = hsfB.createReader().getScanner(false, false); + sB.seekTo(); + while (sB.next()) { + count++; + } + + // read the rest of the rows + assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); + } + private void checkHalfHFile(final StoreFile f) throws IOException { byte [] midkey = f.createReader().midkey(); @@ -694,8 +846,8 @@ long[] timestamps = new long[] {20,10,5,1}; Scan scan = new Scan(); - Path storedir = new Path(new Path(this.testDir, "regionname"), - "familyname"); + // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. + Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname"); Path dir = new Path(storedir, "1234567890"); StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 8 * 1024) @@ -738,8 +890,8 @@ public void testCacheOnWriteEvictOnClose() throws Exception { Configuration conf = this.conf; - // Find a home for our files - Path baseDir = new Path(new Path(this.testDir, "regionname"),"twoCOWEOC"); + // Find a home for our files (regiondir ("7e0102") and familyname). + Path baseDir = new Path(new Path(this.testDir, "7e0102"),"twoCOWEOC"); // Grab the block cache and get the initial hit/miss counts BlockCache bc = new CacheConfig(conf).getBlockCache(); @@ -810,7 +962,7 @@ kv2 = scannerTwo.next(); assertTrue(kv1.equals(kv2)); assertTrue(Bytes.compareTo( - kv1.getBuffer(), kv1.getKeyOffset(), kv1.getKeyLength(), + kv1.getBuffer(), kv1.getKeyOffset(), kv1.getKeyLength(), kv2.getBuffer(), kv2.getKeyOffset(), kv2.getKeyLength()) == 0); assertTrue(Bytes.compareTo( kv1.getBuffer(), kv1.getValueOffset(), kv1.getValueLength(), @@ -891,7 +1043,8 @@ * file info. */ public void testDataBlockEncodingMetaData() throws IOException { - Path dir = new Path(new Path(this.testDir, "regionname"), "familyname"); + // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. + Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); Path path = new Path(dir, "1234567890"); DataBlockEncoding dataBlockEncoderAlgo = @@ -910,11 +1063,11 @@ .withBytesPerChecksum(CKBYTES) .build(); writer.close(); - + StoreFile storeFile = new StoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, dataBlockEncoder); StoreFile.Reader reader = storeFile.createReader(); - + Map fileInfo = reader.loadFileInfo(); byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); Index: src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java (revision 1451296) +++ src/test/java/org/apache/hadoop/hbase/TestHTableDescriptor.java (working copy) @@ -20,9 +20,15 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; import org.apache.hadoop.hbase.coprocessor.SampleRegionWALObserver; +import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -31,6 +37,7 @@ */ @Category(SmallTests.class) public class TestHTableDescriptor { + final static Log LOG = LogFactory.getLog(TestHTableDescriptor.class); /** * Test cps in the table description @@ -122,4 +129,42 @@ desc.setMemStoreFlushSize(1111L); assertEquals(1111L, desc.getMemStoreFlushSize()); } + + String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok", }; + String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok" }; + + @Test + public void testLegalHTableNames() { + for (String tn : legalTableNames) { + HTableDescriptor.isLegalTableName(Bytes.toBytes(tn)); + } + } + + @Test + public void testIllegalHTableNames() { + for (String tn : illegalTableNames) { + try { + HTableDescriptor.isLegalTableName(Bytes.toBytes(tn)); + fail("invalid tablename " + tn + " should have failed"); + } catch (Exception e) { + // expected + } + } + } + + @Test + public void testLegalHTableNamesRegex() { + for (String tn : legalTableNames) { + LOG.info("Testing: '" + tn + "'"); + assertTrue(Pattern.matches(HTableDescriptor.VALID_USER_TABLE_REGEX, tn)); + } + } + + @Test + public void testIllegalHTableNamesRegex() { + for (String tn : illegalTableNames) { + LOG.info("Testing: '" + tn + "'"); + assertFalse(Pattern.matches(HTableDescriptor.VALID_USER_TABLE_REGEX, tn)); + } + } } Index: src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java (revision 0) @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import static org.junit.Assert.assertFalse; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.AfterClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test that the snapshot hfile cleaner finds hfiles referenced in a snapshot + */ +@Category(SmallTests.class) +public class TestSnapshotHFileCleaner { + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @AfterClass + public static void cleanup() throws IOException { + Configuration conf = TEST_UTIL.getConfiguration(); + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = FileSystem.get(conf); + // cleanup + fs.delete(rootDir, true); + } + + @Test + public void testFindsSnapshotFilesWhenCleaning() throws IOException { + Configuration conf = TEST_UTIL.getConfiguration(); + FSUtils.setRootDir(conf, TEST_UTIL.getDataTestDir()); + Path rootDir = FSUtils.getRootDir(conf); + Path archivedHfileDir = new Path(TEST_UTIL.getDataTestDir(), HConstants.HFILE_ARCHIVE_DIRECTORY); + + FileSystem fs = FileSystem.get(conf); + SnapshotHFileCleaner cleaner = new SnapshotHFileCleaner(); + cleaner.setConf(conf); + + // write an hfile to the snapshot directory + String snapshotName = "snapshot"; + byte[] snapshot = Bytes.toBytes(snapshotName); + String table = "table"; + byte[] tableName = Bytes.toBytes(table); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + HRegionInfo mockRegion = new HRegionInfo(tableName); + Path regionSnapshotDir = new Path(snapshotDir, mockRegion.getEncodedName()); + Path familyDir = new Path(regionSnapshotDir, "family"); + // create a reference to a supposedly valid hfile + String hfile = "fd1e73e8a96c486090c5cec07b4894c4"; + Path refFile = new Path(familyDir, hfile); + + // make sure the reference file exists + fs.create(refFile); + + // create the hfile in the archive + fs.mkdirs(archivedHfileDir); + fs.createNewFile(new Path(archivedHfileDir, hfile)); + + // make sure that the file isn't deletable + assertFalse(cleaner.isFileDeletable(new Path(hfile))); + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotFileCache.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotFileCache.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotFileCache.java (revision 0) @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test that we correctly reload the cache, filter directories, etc. + */ +@Category(MediumTests.class) +public class TestSnapshotFileCache { + + private static final Log LOG = LogFactory.getLog(TestSnapshotFileCache.class); + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static FileSystem fs; + private static Path rootDir; + + @BeforeClass + public static void startCluster() throws Exception { + UTIL.startMiniDFSCluster(1); + fs = UTIL.getDFSCluster().getFileSystem(); + rootDir = UTIL.getDefaultRootDirPath(); + } + + @AfterClass + public static void stopCluster() throws Exception { + UTIL.shutdownMiniDFSCluster(); + } + + @After + public void cleanupFiles() throws Exception { + // cleanup the snapshot directory + Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + fs.delete(snapshotDir, true); + } + + @Test(timeout = 10000000) + public void testLoadAndDelete() throws Exception { + // don't refresh the cache unless we tell it to + long period = Long.MAX_VALUE; + Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + + Path snapshot = new Path(snapshotDir, "snapshot"); + Path region = new Path(snapshot, "7e91021"); + Path family = new Path(region, "fam"); + Path file1 = new Path(family, "file1"); + Path file2 = new Path(family, "file2"); + + // create two hfiles under the snapshot + fs.create(file1); + fs.create(file2); + + FSUtils.logFileSystemState(fs, rootDir, LOG); + + // then make sure the cache finds them + assertTrue("Cache didn't find:" + file1, cache.contains(file1.getName())); + assertTrue("Cache didn't find:" + file2, cache.contains(file2.getName())); + String not = "file-shouldn't-be-found"; + assertFalse("Cache found '" + not + "', but it shouldn't have.", cache.contains(not)); + + // make sure we get a little bit of separation in the modification times + // its okay if we sleep a little longer (b/c of GC pause), as long as we sleep a little + Thread.sleep(10); + + LOG.debug("Deleting snapshot."); + // then delete the snapshot and make sure that we can still find the files + if (!fs.delete(snapshot, true)) { + throw new IOException("Couldn't delete " + snapshot + " for an unknown reason."); + } + FSUtils.logFileSystemState(fs, rootDir, LOG); + + + LOG.debug("Checking to see if file is deleted."); + assertTrue("Cache didn't find:" + file1, cache.contains(file1.getName())); + assertTrue("Cache didn't find:" + file2, cache.contains(file2.getName())); + + // then trigger a refresh + cache.triggerCacheRefreshForTesting(); + // and not it shouldn't find those files + assertFalse("Cache found '" + file1 + "', but it shouldn't have.", + cache.contains(file1.getName())); + assertFalse("Cache found '" + file2 + "', but it shouldn't have.", + cache.contains(file2.getName())); + + fs.delete(snapshotDir, true); + } + + @Test + public void testLoadsTmpDir() throws Exception { + // don't refresh the cache unless we tell it to + long period = Long.MAX_VALUE; + Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + + // create a file in a 'completed' snapshot + Path snapshot = new Path(snapshotDir, "snapshot"); + Path region = new Path(snapshot, "7e91021"); + Path family = new Path(region, "fam"); + Path file1 = new Path(family, "file1"); + fs.create(file1); + + // create an 'in progress' snapshot + SnapshotDescription desc = SnapshotDescription.newBuilder().setName("working").build(); + snapshot = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir); + region = new Path(snapshot, "7e91021"); + family = new Path(region, "fam"); + Path file2 = new Path(family, "file2"); + fs.create(file2); + + FSUtils.logFileSystemState(fs, rootDir, LOG); + + // then make sure the cache finds both files + assertTrue("Cache didn't find:" + file1, cache.contains(file1.getName())); + assertTrue("Cache didn't find:" + file2, cache.contains(file2.getName())); + } + + @Test + public void testJustFindLogsDirectory() throws Exception { + // don't refresh the cache unless we tell it to + long period = Long.MAX_VALUE; + Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFileCache.SnapshotFileInspector() { + public Collection filesUnderSnapshot(final Path snapshotDir) + throws IOException { + return SnapshotReferenceUtil.getHLogNames(fs, snapshotDir); + } + }); + + // create a file in a 'completed' snapshot + Path snapshot = new Path(snapshotDir, "snapshot"); + Path region = new Path(snapshot, "7e91021"); + Path family = new Path(region, "fam"); + Path file1 = new Path(family, "file1"); + fs.create(file1); + + // and another file in the logs directory + Path logs = TakeSnapshotUtils.getSnapshotHLogsDir(snapshot, "server"); + Path log = new Path(logs, "me.hbase.com%2C58939%2C1350424310315.1350424315552"); + fs.create(log); + + FSUtils.logFileSystemState(fs, rootDir, LOG); + + // then make sure the cache only finds the log files + assertFalse("Cache found '" + file1 + "', but it shouldn't have.", + cache.contains(file1.getName())); + assertTrue("Cache didn't find:" + log, cache.contains(log.getName())); + } + + @Test + public void testReloadModifiedDirectory() throws IOException { + // don't refresh the cache unless we tell it to + long period = Long.MAX_VALUE; + Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000, + "test-snapshot-file-cache-refresh", new SnapshotFiles()); + + Path snapshot = new Path(snapshotDir, "snapshot"); + Path region = new Path(snapshot, "7e91021"); + Path family = new Path(region, "fam"); + Path file1 = new Path(family, "file1"); + Path file2 = new Path(family, "file2"); + + // create two hfiles under the snapshot + fs.create(file1); + fs.create(file2); + + FSUtils.logFileSystemState(fs, rootDir, LOG); + + assertTrue("Cache didn't find " + file1, cache.contains(file1.getName())); + + // now delete the snapshot and add a file with a different name + fs.delete(snapshot, true); + Path file3 = new Path(family, "new_file"); + fs.create(file3); + + FSUtils.logFileSystemState(fs, rootDir, LOG); + assertTrue("Cache didn't find new file:" + file3, cache.contains(file3.getName())); + } + + class SnapshotFiles implements SnapshotFileCache.SnapshotFileInspector { + public Collection filesUnderSnapshot(final Path snapshotDir) throws IOException { + Collection files = new HashSet(); + files.addAll(SnapshotReferenceUtil.getHLogNames(fs, snapshotDir)); + files.addAll(SnapshotReferenceUtil.getHFileNames(fs, snapshotDir)); + return files; + } + }; +} Index: src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotManager.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotManager.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotManager.java (revision 0) @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; +import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner; +import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.zookeeper.KeeperException; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +/** + * Test basic snapshot manager functionality + */ +@Category(SmallTests.class) +public class TestSnapshotManager { + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + MasterServices services = Mockito.mock(MasterServices.class); + ProcedureCoordinator coordinator = Mockito.mock(ProcedureCoordinator.class); + ExecutorService pool = Mockito.mock(ExecutorService.class); + MasterFileSystem mfs = Mockito.mock(MasterFileSystem.class); + FileSystem fs; + { + try { + fs = UTIL.getTestFileSystem(); + } catch (IOException e) { + throw new RuntimeException("Couldn't get test filesystem", e); + } + } + + private SnapshotManager getNewManager() throws IOException, KeeperException { + return getNewManager(UTIL.getConfiguration()); + } + + private SnapshotManager getNewManager(final Configuration conf) throws IOException, KeeperException { + Mockito.reset(services); + Mockito.when(services.getConfiguration()).thenReturn(conf); + Mockito.when(services.getMasterFileSystem()).thenReturn(mfs); + Mockito.when(mfs.getFileSystem()).thenReturn(fs); + Mockito.when(mfs.getRootDir()).thenReturn(UTIL.getDataTestDir()); + return new SnapshotManager(services, coordinator, pool); + } + + @Test + public void testInProcess() throws KeeperException, IOException { + SnapshotManager manager = getNewManager(); + TakeSnapshotHandler handler = Mockito.mock(TakeSnapshotHandler.class); + assertFalse("Manager is in process when there is no current handler", manager.isTakingSnapshot()); + manager.setSnapshotHandlerForTesting(handler); + Mockito.when(handler.isFinished()).thenReturn(false); + assertTrue("Manager isn't in process when handler is running", manager.isTakingSnapshot()); + Mockito.when(handler.isFinished()).thenReturn(true); + assertFalse("Manager is process when handler isn't running", manager.isTakingSnapshot()); + } + + /** + * Verify the snapshot support based on the configuration. + */ + @Test + public void testSnapshotSupportConfiguration() throws Exception { + // No configuration (no cleaners, not enabled): snapshot feature disabled + Configuration conf = new Configuration(); + SnapshotManager manager = getNewManager(conf); + assertFalse("Snapshot should be disabled with no configuration", isSnapshotSupported(manager)); + + // force snapshot feature to be enabled + conf = new Configuration(); + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); + manager = getNewManager(conf); + assertTrue("Snapshot should be enabled", isSnapshotSupported(manager)); + + // force snapshot feature to be disabled + conf = new Configuration(); + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, false); + manager = getNewManager(conf); + assertFalse("Snapshot should be disabled", isSnapshotSupported(manager)); + + // force snapshot feature to be disabled, even if cleaners are present + conf = new Configuration(); + conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, + SnapshotHFileCleaner.class.getName(), HFileLinkCleaner.class.getName()); + conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, SnapshotLogCleaner.class.getName()); + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, false); + manager = getNewManager(conf); + assertFalse("Snapshot should be disabled", isSnapshotSupported(manager)); + + // cleaners are present, but missing snapshot enabled property + conf = new Configuration(); + conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, + SnapshotHFileCleaner.class.getName(), HFileLinkCleaner.class.getName()); + conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, SnapshotLogCleaner.class.getName()); + manager = getNewManager(conf); + assertTrue("Snapshot should be enabled, because cleaners are present", + isSnapshotSupported(manager)); + + // Create a "test snapshot" + Path rootDir = UTIL.getDataTestDir(); + Path testSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir( + "testSnapshotSupportConfiguration", rootDir); + fs.mkdirs(testSnapshotDir); + try { + // force snapshot feature to be disabled, but snapshots are present + conf = new Configuration(); + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, false); + manager = getNewManager(conf); + fail("Master should not start when snapshot is disabled, but snapshots are present"); + } catch (UnsupportedOperationException e) { + // expected + } finally { + fs.delete(testSnapshotDir, true); + } + } + + private boolean isSnapshotSupported(final SnapshotManager manager) { + try { + manager.checkSnapshotSupport(); + return true; + } catch (UnsupportedOperationException e) { + return false; + } + } +} Index: src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotLogCleaner.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotLogCleaner.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotLogCleaner.java (revision 0) @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import static org.junit.Assert.assertFalse; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.junit.AfterClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test that the snapshot log cleaner finds logs referenced in a snapshot + */ +@Category(SmallTests.class) +public class TestSnapshotLogCleaner { + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @AfterClass + public static void cleanup() throws IOException { + Configuration conf = TEST_UTIL.getConfiguration(); + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = FileSystem.get(conf); + // cleanup + fs.delete(rootDir, true); + } + + @Test + public void testFindsSnapshotFilesWhenCleaning() throws IOException { + Configuration conf = TEST_UTIL.getConfiguration(); + FSUtils.setRootDir(conf, TEST_UTIL.getDataTestDir()); + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = FileSystem.get(conf); + SnapshotLogCleaner cleaner = new SnapshotLogCleaner(); + cleaner.setConf(conf); + + // write an hfile to the snapshot directory + String snapshotName = "snapshot"; + byte[] snapshot = Bytes.toBytes(snapshotName); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + Path snapshotLogDir = new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME); + String timestamp = "1339643343027"; + String hostFromMaster = "localhost%2C59648%2C1339643336601"; + + Path hostSnapshotLogDir = new Path(snapshotLogDir, hostFromMaster); + String snapshotlogfile = hostFromMaster + "." + timestamp + ".hbase"; + + // add the reference to log in the snapshot + fs.create(new Path(hostSnapshotLogDir, snapshotlogfile)); + + // now check to see if that log file would get deleted. + Path oldlogDir = new Path(rootDir, ".oldlogs"); + Path logFile = new Path(oldlogDir, snapshotlogfile); + fs.create(logFile); + + // make sure that the file isn't deletable + assertFalse(cleaner.isFileDeletable(logFile)); + } +} \ No newline at end of file Index: src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java (revision 0) @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.backup.HFileArchiver; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test the HFileLink Cleaner. + * HFiles with links cannot be deleted until a link is present. + */ +@Category(SmallTests.class) +public class TestHFileLinkCleaner { + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @Test + public void testHFileLinkCleaning() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + conf.set(HConstants.HBASE_DIR, TEST_UTIL.getDataTestDir().toString()); + conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, HFileLinkCleaner.class.getName()); + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = FileSystem.get(conf); + + final String tableName = "test-table"; + final String tableLinkName = "test-link"; + final String hfileName = "1234567890"; + final String familyName = "cf"; + + HRegionInfo hri = new HRegionInfo(Bytes.toBytes(tableName)); + HRegionInfo hriLink = new HRegionInfo(Bytes.toBytes(tableLinkName)); + + Path archiveDir = HFileArchiveUtil.getArchivePath(conf); + Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, + tableName, hri.getEncodedName(), familyName); + Path archiveLinkStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, + tableLinkName, hriLink.getEncodedName(), familyName); + + // Create hfile /hbase/table-link/region/cf/getEncodedName.HFILE(conf); + Path familyPath = getFamilyDirPath(archiveDir, tableName, hri.getEncodedName(), familyName); + fs.mkdirs(familyPath); + Path hfilePath = new Path(familyPath, hfileName); + fs.createNewFile(hfilePath); + + // Create link to hfile + Path familyLinkPath = getFamilyDirPath(rootDir, tableLinkName, + hriLink.getEncodedName(), familyName); + fs.mkdirs(familyLinkPath); + HFileLink.create(conf, fs, familyLinkPath, hri, hfileName); + Path linkBackRefDir = HFileLink.getBackReferencesDir(archiveStoreDir, hfileName); + assertTrue(fs.exists(linkBackRefDir)); + FileStatus[] backRefs = fs.listStatus(linkBackRefDir); + assertEquals(1, backRefs.length); + Path linkBackRef = backRefs[0].getPath(); + + // Initialize cleaner + final long ttl = 1000; + conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, ttl); + Server server = new DummyServer(); + HFileCleaner cleaner = new HFileCleaner(1000, server, conf, fs, archiveDir); + + // Link backref cannot be removed + cleaner.chore(); + assertTrue(fs.exists(linkBackRef)); + assertTrue(fs.exists(hfilePath)); + + // Link backref can be removed + fs.rename(new Path(rootDir, tableLinkName), new Path(archiveDir, tableLinkName)); + cleaner.chore(); + assertFalse("Link should be deleted", fs.exists(linkBackRef)); + + // HFile can be removed + Thread.sleep(ttl * 2); + cleaner.chore(); + assertFalse("HFile should be deleted", fs.exists(hfilePath)); + + // Remove everything + for (int i = 0; i < 4; ++i) { + Thread.sleep(ttl * 2); + cleaner.chore(); + } + assertFalse("HFile should be deleted", fs.exists(new Path(archiveDir, tableName))); + assertFalse("Link should be deleted", fs.exists(new Path(archiveDir, tableLinkName))); + + cleaner.interrupt(); + } + + private static Path getFamilyDirPath (final Path rootDir, final String table, + final String region, final String family) { + return new Path(new Path(new Path(rootDir, table), region), family); + } + + static class DummyServer implements Server { + + @Override + public Configuration getConfiguration() { + return TEST_UTIL.getConfiguration(); + } + + @Override + public ZooKeeperWatcher getZooKeeper() { + try { + return new ZooKeeperWatcher(getConfiguration(), "dummy server", this); + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + + @Override + public CatalogTracker getCatalogTracker() { + return null; + } + + @Override + public ServerName getServerName() { + return new ServerName("regionserver,60020,000000"); + } + + @Override + public void abort(String why, Throwable e) {} + + @Override + public boolean isAborted() { + return false; + } + + @Override + public void stop(String why) {} + + @Override + public boolean isStopped() { + return false; + } + } +} Index: src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotFromMaster.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotFromMaster.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/master/cleaner/TestSnapshotFromMaster.java (revision 0) @@ -0,0 +1,382 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.snapshot.DisabledTableSnapshotHandler; +import org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; +import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +import com.google.common.collect.Lists; + +/** + * Test the master-related aspects of a snapshot + */ +@Category(MediumTests.class) +public class TestSnapshotFromMaster { + + private static final Log LOG = LogFactory.getLog(TestSnapshotFromMaster.class); + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static final int NUM_RS = 2; + private static Path rootDir; + private static Path snapshots; + private static FileSystem fs; + private static HMaster master; + + // for hfile archiving test. + private static Path archiveDir; + private static final String STRING_TABLE_NAME = "test"; + private static final byte[] TEST_FAM = Bytes.toBytes("fam"); + private static final byte[] TABLE_NAME = Bytes.toBytes(STRING_TABLE_NAME); + // refresh the cache every 1/2 second + private static final long cacheRefreshPeriod = 500; + + /** + * Setup the config for the cluster + */ + @BeforeClass + public static void setupCluster() throws Exception { + setupConf(UTIL.getConfiguration()); + UTIL.startMiniCluster(NUM_RS); + fs = UTIL.getDFSCluster().getFileSystem(); + master = UTIL.getMiniHBaseCluster().getMaster(); + rootDir = master.getMasterFileSystem().getRootDir(); + snapshots = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + archiveDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY); + } + + private static void setupConf(Configuration conf) { + // disable the ui + conf.setInt("hbase.regionsever.info.port", -1); + // change the flush size to a small amount, regulating number of store files + conf.setInt("hbase.hregion.memstore.flush.size", 25000); + // so make sure we get a compaction when doing a load, but keep around some + // files in the store + conf.setInt("hbase.hstore.compaction.min", 3); + conf.setInt("hbase.hstore.compactionThreshold", 5); + // block writes if we get to 12 store files + conf.setInt("hbase.hstore.blockingStoreFiles", 12); + // drop the number of attempts for the hbase admin + conf.setInt("hbase.client.retries.number", 1); + // Ensure no extra cleaners on by default (e.g. TimeToLiveHFileCleaner) + conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, ""); + conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, ""); + // Enable snapshot + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); + conf.setLong(SnapshotHFileCleaner.HFILE_CACHE_REFRESH_PERIOD_CONF_KEY, cacheRefreshPeriod); + + // prevent aggressive region split + conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, + ConstantSizeRegionSplitPolicy.class.getName()); + } + + @Before + public void setup() throws Exception { + UTIL.createTable(TABLE_NAME, TEST_FAM); + master.getSnapshotManagerForTesting().setSnapshotHandlerForTesting(null); + } + + @After + public void tearDown() throws Exception { + UTIL.deleteTable(TABLE_NAME); + + // delete the archive directory, if its exists + if (fs.exists(archiveDir)) { + if (!fs.delete(archiveDir, true)) { + throw new IOException("Couldn't delete archive directory (" + archiveDir + + " for an unknown reason"); + } + } + + // delete the snapshot directory, if its exists + if (fs.exists(snapshots)) { + if (!fs.delete(snapshots, true)) { + throw new IOException("Couldn't delete snapshots directory (" + snapshots + + " for an unknown reason"); + } + } + } + + @AfterClass + public static void cleanupTest() throws Exception { + try { + UTIL.shutdownMiniCluster(); + } catch (Exception e) { + // NOOP; + } + } + + /** + * Test that the contract from the master for checking on a snapshot are valid. + *

+ *

    + *
  1. If a snapshot fails with an error, we expect to get the source error.
  2. + *
  3. If there is no snapshot name supplied, we should get an error.
  4. + *
  5. If asking about a snapshot has hasn't occurred, you should get an error.
  6. + *
+ */ + @Test(timeout = 15000) + public void testIsDoneContract() throws Exception { + + String snapshotName = "asyncExpectedFailureTest"; + + // check that we get an exception when looking up snapshot where one hasn't happened + SnapshotTestingUtils.expectSnapshotDoneException(master, new HSnapshotDescription(), + UnknownSnapshotException.class); + + // and that we get the same issue, even if we specify a name + SnapshotDescription desc = SnapshotDescription.newBuilder() + .setName(snapshotName).build(); + SnapshotTestingUtils.expectSnapshotDoneException(master, new HSnapshotDescription(desc), + UnknownSnapshotException.class); + + // set a mock handler to simulate a snapshot + DisabledTableSnapshotHandler mockHandler = Mockito.mock(DisabledTableSnapshotHandler.class); + Mockito.when(mockHandler.getException()).thenReturn(null); + Mockito.when(mockHandler.getSnapshot()).thenReturn(desc); + Mockito.when(mockHandler.isFinished()).thenReturn(new Boolean(true)); + + master.getSnapshotManagerForTesting().setSnapshotHandlerForTesting(mockHandler); + + // if we do a lookup without a snapshot name, we should fail - you should always know your name + SnapshotTestingUtils.expectSnapshotDoneException(master, new HSnapshotDescription(), + UnknownSnapshotException.class); + + // then do the lookup for the snapshot that it is done + boolean isDone = master.isSnapshotDone(new HSnapshotDescription(desc)); + assertTrue("Snapshot didn't complete when it should have.", isDone); + + // now try the case where we are looking for a snapshot we didn't take + desc = SnapshotDescription.newBuilder().setName("Not A Snapshot").build(); + SnapshotTestingUtils.expectSnapshotDoneException(master, new HSnapshotDescription(desc), + UnknownSnapshotException.class); + + // then create a snapshot to the fs and make sure that we can find it when checking done + snapshotName = "completed"; + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + desc = desc.toBuilder().setName(snapshotName).build(); + SnapshotDescriptionUtils.writeSnapshotInfo(desc, snapshotDir, fs); + + isDone = master.isSnapshotDone(new HSnapshotDescription(desc)); + assertTrue("Completed, on-disk snapshot not found", isDone); + } + + @Test + public void testGetCompletedSnapshots() throws Exception { + // first check when there are no snapshots + List snapshots = master.getCompletedSnapshots(); + assertEquals("Found unexpected number of snapshots", 0, snapshots.size()); + + // write one snapshot to the fs + String snapshotName = "completed"; + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName(snapshotName).build(); + SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, snapshotDir, fs); + + // check that we get one snapshot + snapshots = master.getCompletedSnapshots(); + assertEquals("Found unexpected number of snapshots", 1, snapshots.size()); + List expected = Lists.newArrayList(new HSnapshotDescription(snapshot)); + assertEquals("Returned snapshots don't match created snapshots", expected, snapshots); + + // write a second snapshot + snapshotName = "completed_two"; + snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + snapshot = SnapshotDescription.newBuilder().setName(snapshotName).build(); + SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, snapshotDir, fs); + expected.add(new HSnapshotDescription(snapshot)); + + // check that we get one snapshot + snapshots = master.getCompletedSnapshots(); + assertEquals("Found unexpected number of snapshots", 2, snapshots.size()); + assertEquals("Returned snapshots don't match created snapshots", expected, snapshots); + } + + @Test + public void testDeleteSnapshot() throws Exception { + + String snapshotName = "completed"; + SnapshotDescription snapshot = SnapshotDescription.newBuilder().setName(snapshotName).build(); + + try { + master.deleteSnapshot(new HSnapshotDescription(snapshot)); + fail("Master didn't throw exception when attempting to delete snapshot that doesn't exist"); + } catch (IOException e) { + LOG.debug("Correctly failed delete of non-existant snapshot:" + e.getMessage()); + } + + // write one snapshot to the fs + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, snapshotDir, fs); + + // then delete the existing snapshot,which shouldn't cause an exception to be thrown + master.deleteSnapshot(new HSnapshotDescription(snapshot)); + } + + /** + * Test that the snapshot hfile archive cleaner works correctly. HFiles that are in snapshots + * should be retained, while those that are not in a snapshot should be deleted. + * @throws Exception on failure + */ + @Test + public void testSnapshotHFileArchiving() throws Exception { + HBaseAdmin admin = UTIL.getHBaseAdmin(); + // make sure we don't fail on listing snapshots + SnapshotTestingUtils.assertNoSnapshots(admin); + // load the table + UTIL.loadTable(new HTable(UTIL.getConfiguration(), TABLE_NAME), TEST_FAM); + + // disable the table so we can take a snapshot + admin.disableTable(TABLE_NAME); + + // take a snapshot of the table + String snapshotName = "snapshot"; + byte[] snapshotNameBytes = Bytes.toBytes(snapshotName); + admin.snapshot(snapshotNameBytes, TABLE_NAME); + + Configuration conf = master.getConfiguration(); + LOG.info("After snapshot File-System state"); + FSUtils.logFileSystemState(fs, rootDir, LOG); + + // ensure we only have one snapshot + SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshotNameBytes, TABLE_NAME); + + // renable the table so we can compact the regions + admin.enableTable(TABLE_NAME); + + // compact the files so we get some archived files for the table we just snapshotted + List regions = UTIL.getHBaseCluster().getRegions(TABLE_NAME); + for (HRegion region : regions) { + region.waitForFlushesAndCompactions(); // enable can trigger a compaction, wait for it. + region.compactStores(); + } + LOG.info("After compaction File-System state"); + FSUtils.logFileSystemState(fs, rootDir, LOG); + + // make sure the cleaner has run + LOG.debug("Running hfile cleaners"); + ensureHFileCleanersRun(); + LOG.info("After cleaners File-System state: " + rootDir); + FSUtils.logFileSystemState(fs, rootDir, LOG); + + // get the snapshot files for the table + Path snapshotTable = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + FileStatus[] snapshotHFiles = SnapshotTestingUtils.listHFiles(fs, snapshotTable); + // check that the files in the archive contain the ones that we need for the snapshot + LOG.debug("Have snapshot hfiles:"); + for (FileStatus file : snapshotHFiles) { + LOG.debug(file.getPath()); + } + // get the archived files for the table + Collection files = getArchivedHFiles(archiveDir, rootDir, fs, STRING_TABLE_NAME); + + // and make sure that there is a proper subset + for (FileStatus file : snapshotHFiles) { + assertTrue("Archived hfiles " + files + " is missing snapshot file:" + file.getPath(), + files.contains(file.getPath().getName())); + } + + // delete the existing snapshot + admin.deleteSnapshot(snapshotNameBytes); + SnapshotTestingUtils.assertNoSnapshots(admin); + + // make sure that we don't keep around the hfiles that aren't in a snapshot + // make sure we wait long enough to refresh the snapshot hfile + List delegates = UTIL.getMiniHBaseCluster().getMaster() + .getHFileCleaner().cleanersChain; + for (BaseHFileCleanerDelegate delegate: delegates) { + if (delegate instanceof SnapshotHFileCleaner) { + ((SnapshotHFileCleaner)delegate).getFileCacheForTesting().triggerCacheRefreshForTesting(); + } + } + // run the cleaner again + LOG.debug("Running hfile cleaners"); + ensureHFileCleanersRun(); + LOG.info("After delete snapshot cleaners run File-System state"); + FSUtils.logFileSystemState(fs, rootDir, LOG); + + files = getArchivedHFiles(archiveDir, rootDir, fs, STRING_TABLE_NAME); + assertEquals("Still have some hfiles in the archive, when their snapshot has been deleted.", 0, + files.size()); + } + + /** + * @return all the HFiles for a given table that have been archived + * @throws IOException on expected failure + */ + private final Collection getArchivedHFiles(Path archiveDir, Path rootDir, + FileSystem fs, String tableName) throws IOException { + Path tableArchive = new Path(archiveDir, tableName); + FileStatus[] archivedHFiles = SnapshotTestingUtils.listHFiles(fs, tableArchive); + List files = new ArrayList(archivedHFiles.length); + LOG.debug("Have archived hfiles: " + tableArchive); + for (FileStatus file : archivedHFiles) { + LOG.debug(file.getPath()); + files.add(file.getPath().getName()); + } + // sort the archived files + + Collections.sort(files); + return files; + } + + /** + * Make sure the {@link HFileCleaner HFileCleaners} run at least once + */ + private static void ensureHFileCleanersRun() { + UTIL.getHBaseCluster().getMaster().getHFileCleaner().chore(); + } +} Index: src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterObserver.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterObserver.java (revision 1451296) +++ src/test/java/org/apache/hadoop/hbase/coprocessor/TestMasterObserver.java (working copy) @@ -42,6 +42,8 @@ import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; @@ -92,6 +94,14 @@ private boolean postStartMasterCalled; private boolean startCalled; private boolean stopCalled; + private boolean preSnapshotCalled; + private boolean postSnapshotCalled; + private boolean preCloneSnapshotCalled; + private boolean postCloneSnapshotCalled; + private boolean preRestoreSnapshotCalled; + private boolean postRestoreSnapshotCalled; + private boolean preDeleteSnapshotCalled; + private boolean postDeleteSnapshotCalled; public void enableBypass(boolean bypass) { this.bypass = bypass; @@ -124,6 +134,14 @@ postBalanceCalled = false; preBalanceSwitchCalled = false; postBalanceSwitchCalled = false; + preSnapshotCalled = false; + postSnapshotCalled = false; + preCloneSnapshotCalled = false; + postCloneSnapshotCalled = false; + preRestoreSnapshotCalled = false; + postRestoreSnapshotCalled = false; + preDeleteSnapshotCalled = false; + postDeleteSnapshotCalled = false; } @Override @@ -463,10 +481,82 @@ public boolean wasStarted() { return startCalled; } public boolean wasStopped() { return stopCalled; } + + @Override + public void preSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + preSnapshotCalled = true; + } + + @Override + public void postSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + postSnapshotCalled = true; + } + + public boolean wasSnapshotCalled() { + return preSnapshotCalled && postSnapshotCalled; + } + + @Override + public void preCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + preCloneSnapshotCalled = true; + } + + @Override + public void postCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + postCloneSnapshotCalled = true; + } + + public boolean wasCloneSnapshotCalled() { + return preCloneSnapshotCalled && postCloneSnapshotCalled; + } + + @Override + public void preRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + preRestoreSnapshotCalled = true; + } + + @Override + public void postRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + postRestoreSnapshotCalled = true; + } + + public boolean wasRestoreSnapshotCalled() { + return preRestoreSnapshotCalled && postRestoreSnapshotCalled; + } + + @Override + public void preDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException { + preDeleteSnapshotCalled = true; + } + + @Override + public void postDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException { + postDeleteSnapshotCalled = true; + } + + public boolean wasDeleteSnapshotCalled() { + return preDeleteSnapshotCalled && postDeleteSnapshotCalled; + } } private static HBaseTestingUtility UTIL = new HBaseTestingUtility(); + private static byte[] TEST_SNAPSHOT = Bytes.toBytes("observed_snapshot"); private static byte[] TEST_TABLE = Bytes.toBytes("observed_table"); + private static byte[] TEST_CLONE = Bytes.toBytes("observed_clone"); private static byte[] TEST_FAMILY = Bytes.toBytes("fam1"); private static byte[] TEST_FAMILY2 = Bytes.toBytes("fam2"); @@ -475,6 +565,8 @@ Configuration conf = UTIL.getConfiguration(); conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, CPMasterObserver.class.getName()); + // Enable snapshot + conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); // We need more than one data server on this test UTIL.startMiniCluster(2); } @@ -719,6 +811,63 @@ cp.wasBalanceCalled()); } + @Test + public void testSnapshotOperations() throws Exception { + MiniHBaseCluster cluster = UTIL.getHBaseCluster(); + HMaster master = cluster.getMaster(); + MasterCoprocessorHost host = master.getCoprocessorHost(); + CPMasterObserver cp = (CPMasterObserver)host.findCoprocessor( + CPMasterObserver.class.getName()); + cp.resetStates(); + + // create a table + HTableDescriptor htd = new HTableDescriptor(TEST_TABLE); + htd.addFamily(new HColumnDescriptor(TEST_FAMILY)); + HBaseAdmin admin = UTIL.getHBaseAdmin(); + + // delete table if exists + if (admin.tableExists(TEST_TABLE)) { + UTIL.deleteTable(TEST_TABLE); + } + + admin.createTable(htd); + admin.disableTable(TEST_TABLE); + assertTrue(admin.isTableDisabled(TEST_TABLE)); + + try { + // Test snapshot operation + assertFalse("Coprocessor should not have been called yet", + cp.wasSnapshotCalled()); + admin.snapshot(TEST_SNAPSHOT, TEST_TABLE); + assertTrue("Coprocessor should have been called on snapshot", + cp.wasSnapshotCalled()); + + // Test clone operation + admin.cloneSnapshot(TEST_SNAPSHOT, TEST_CLONE); + assertTrue("Coprocessor should have been called on snapshot clone", + cp.wasCloneSnapshotCalled()); + assertFalse("Coprocessor restore should not have been called on snapshot clone", + cp.wasRestoreSnapshotCalled()); + admin.disableTable(TEST_CLONE); + assertTrue(admin.isTableDisabled(TEST_TABLE)); + admin.deleteTable(TEST_CLONE); + + // Test restore operation + cp.resetStates(); + admin.restoreSnapshot(TEST_SNAPSHOT); + assertTrue("Coprocessor should have been called on snapshot restore", + cp.wasRestoreSnapshotCalled()); + assertFalse("Coprocessor clone should not have been called on snapshot restore", + cp.wasCloneSnapshotCalled()); + + admin.deleteSnapshot(TEST_SNAPSHOT); + assertTrue("Coprocessor should have been called on snapshot delete", + cp.wasDeleteSnapshotCalled()); + } finally { + admin.deleteTable(TEST_TABLE); + } + } + private void waitForRITtoBeZero(HMaster master) throws IOException { // wait for assignments to finish AssignmentManager mgr = master.getAssignmentManager(); Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java (revision 1451296) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java (working copy) @@ -19,6 +19,9 @@ */ package org.apache.hadoop.hbase.io.hfile; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + import java.nio.ByteBuffer; import java.util.Collection; import java.util.Map; @@ -28,19 +31,18 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.io.hfile.LruBlockCache.EvictionThread; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.metrics.TestSchemaMetrics; import org.apache.hadoop.hbase.util.ClassSize; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; -import org.junit.experimental.categories.Category; -import static org.junit.Assert.*; - /** * Tests the concurrent LruBlockCache.

* @@ -77,7 +79,6 @@ @Test public void testBackgroundEvictionThread() throws Exception { - long maxSize = 100000; long blockSize = calculateBlockSizeDefault(maxSize, 9); // room for 9, will evict @@ -85,6 +86,14 @@ CachedItem [] blocks = generateFixedBlocks(10, blockSize, "block"); + EvictionThread evictionThread = cache.getEvictionThread(); + assertTrue(evictionThread != null); + + // Make sure eviction thread has entered run method + while (!evictionThread.isEnteringRun()) { + Thread.sleep(1); + } + // Add all the blocks for (CachedItem block : blocks) { cache.cacheBlock(block.cacheKey, block); Index: src/test/java/org/apache/hadoop/hbase/io/TestFileLink.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/TestFileLink.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/io/TestFileLink.java (revision 0) @@ -0,0 +1,244 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import junit.framework.TestCase; +import org.junit.experimental.categories.Category; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.io.FileLink; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Test that FileLink switches between alternate locations + * when the current location moves or gets deleted. + */ +@Category(MediumTests.class) +public class TestFileLink { + /** + * Test, on HDFS, that the FileLink is still readable + * even when the current file gets renamed. + */ + @Test + public void testHDFSLinkReadDuringRename() throws Exception { + HBaseTestingUtility testUtil = new HBaseTestingUtility(); + Configuration conf = testUtil.getConfiguration(); + conf.setInt("dfs.blocksize", 1024 * 1024); + conf.setInt("dfs.client.read.prefetch.size", 2 * 1024 * 1024); + + testUtil.startMiniDFSCluster(1); + MiniDFSCluster cluster = testUtil.getDFSCluster(); + FileSystem fs = cluster.getFileSystem(); + assertEquals("hdfs", fs.getUri().getScheme()); + + try { + testLinkReadDuringRename(fs, testUtil.getDefaultRootDirPath()); + } finally { + testUtil.shutdownMiniCluster(); + } + } + + /** + * Test, on a local filesystem, that the FileLink is still readable + * even when the current file gets renamed. + */ + @Test + public void testLocalLinkReadDuringRename() throws IOException { + HBaseTestingUtility testUtil = new HBaseTestingUtility(); + FileSystem fs = testUtil.getTestFileSystem(); + assertEquals("file", fs.getUri().getScheme()); + testLinkReadDuringRename(fs, testUtil.getDataTestDir()); + } + + /** + * Test that link is still readable even when the current file gets renamed. + */ + private void testLinkReadDuringRename(FileSystem fs, Path rootDir) throws IOException { + Path originalPath = new Path(rootDir, "test.file"); + Path archivedPath = new Path(rootDir, "archived.file"); + + writeSomeData(fs, originalPath, 256 << 20, (byte)2); + + List files = new ArrayList(); + files.add(originalPath); + files.add(archivedPath); + + FileLink link = new FileLink(files); + FSDataInputStream in = link.open(fs); + try { + byte[] data = new byte[8192]; + long size = 0; + + // Read from origin + int n = in.read(data); + dataVerify(data, n, (byte)2); + size += n; + + // Move origin to archive + assertFalse(fs.exists(archivedPath)); + fs.rename(originalPath, archivedPath); + assertFalse(fs.exists(originalPath)); + assertTrue(fs.exists(archivedPath)); + + // Try to read to the end + while ((n = in.read(data)) > 0) { + dataVerify(data, n, (byte)2); + size += n; + } + + assertEquals(256 << 20, size); + } finally { + in.close(); + if (fs.exists(originalPath)) fs.delete(originalPath); + if (fs.exists(archivedPath)) fs.delete(archivedPath); + } + } + + /** + * Test that link is still readable even when the current file gets deleted. + * + * NOTE: This test is valid only on HDFS. + * When a file is deleted from a local file-system, it is simply 'unlinked'. + * The inode, which contains the file's data, is not deleted until all + * processes have finished with it. + * In HDFS when the request exceed the cached block locations, + * a query to the namenode is performed, using the filename, + * and the deleted file doesn't exists anymore (FileNotFoundException). + */ + @Test + public void testHDFSLinkReadDuringDelete() throws Exception { + HBaseTestingUtility testUtil = new HBaseTestingUtility(); + Configuration conf = testUtil.getConfiguration(); + conf.setInt("dfs.blocksize", 1024 * 1024); + conf.setInt("dfs.client.read.prefetch.size", 2 * 1024 * 1024); + + testUtil.startMiniDFSCluster(1); + MiniDFSCluster cluster = testUtil.getDFSCluster(); + FileSystem fs = cluster.getFileSystem(); + assertEquals("hdfs", fs.getUri().getScheme()); + + try { + List files = new ArrayList(); + for (int i = 0; i < 3; i++) { + Path path = new Path(String.format("test-data-%d", i)); + writeSomeData(fs, path, 1 << 20, (byte)i); + files.add(path); + } + + FileLink link = new FileLink(files); + FSDataInputStream in = link.open(fs); + try { + byte[] data = new byte[8192]; + int n; + + // Switch to file 1 + n = in.read(data); + dataVerify(data, n, (byte)0); + fs.delete(files.get(0)); + skipBuffer(in, (byte)0); + + // Switch to file 2 + n = in.read(data); + dataVerify(data, n, (byte)1); + fs.delete(files.get(1)); + skipBuffer(in, (byte)1); + + // Switch to file 3 + n = in.read(data); + dataVerify(data, n, (byte)2); + fs.delete(files.get(2)); + skipBuffer(in, (byte)2); + + // No more files available + try { + n = in.read(data); + assert(n <= 0); + } catch (FileNotFoundException e) { + assertTrue(true); + } + } finally { + in.close(); + } + } finally { + testUtil.shutdownMiniCluster(); + } + } + + /** + * Write up to 'size' bytes with value 'v' into a new file called 'path'. + */ + private void writeSomeData (FileSystem fs, Path path, long size, byte v) throws IOException { + byte[] data = new byte[4096]; + for (int i = 0; i < data.length; i++) { + data[i] = v; + } + + FSDataOutputStream stream = fs.create(path); + try { + long written = 0; + while (written < size) { + stream.write(data, 0, data.length); + written += data.length; + } + } finally { + stream.close(); + } + } + + /** + * Verify that all bytes in 'data' have 'v' as value. + */ + private static void dataVerify(byte[] data, int n, byte v) { + for (int i = 0; i < n; ++i) { + assertEquals(v, data[i]); + } + } + + private static void skipBuffer(FSDataInputStream in, byte v) throws IOException { + byte[] data = new byte[8192]; + try { + int n; + while ((n = in.read(data)) == data.length) { + for (int i = 0; i < data.length; ++i) { + if (data[i] != v) + throw new Exception("File changed"); + } + } + } catch (Exception e) { + } + } +} Index: src/main/protobuf/ErrorHandling.proto =================================================================== --- src/main/protobuf/ErrorHandling.proto (revision 0) +++ src/main/protobuf/ErrorHandling.proto (revision 0) @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file contains protocol buffers that are used for error handling + +option java_package = "org.apache.hadoop.hbase.protobuf.generated"; +option java_outer_classname = "ErrorHandlingProtos"; +option java_generate_equals_and_hash = true; +option optimize_for = SPEED; + +/** + * Protobuf version of a java.lang.StackTraceElement + * so we can serialize exceptions. + */ +message StackTraceElementMessage { + optional string declaringClass = 1; + optional string methodName = 2; + optional string fileName = 3; + optional int32 lineNumber = 4; +} + +/** + * Cause of a remote failure for a generic exception. Contains + * all the information for a generic exception as well as + * optional info about the error for generic info passing + * (which should be another protobuffed class). + */ +message GenericExceptionMessage { + optional string className = 1; + optional string message = 2; + optional bytes errorInfo = 3; + repeated StackTraceElementMessage trace = 4; +} + +/** + * Exception sent across the wire when a remote task needs + * to notify other tasks that it failed and why + */ +message ForeignExceptionMessage { + optional string source = 1; + optional GenericExceptionMessage genericException = 2; + +} Index: src/main/protobuf/hbase.proto =================================================================== --- src/main/protobuf/hbase.proto (revision 0) +++ src/main/protobuf/hbase.proto (revision 0) @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file contains protocol buffers that are shared throughout HBase + +option java_package = "org.apache.hadoop.hbase.protobuf.generated"; +option java_outer_classname = "HBaseProtos"; +option java_generate_equals_and_hash = true; +option optimize_for = SPEED; + +/** + * Description of the snapshot to take + */ +message SnapshotDescription { + required string name = 1; + optional string table = 2; // not needed for delete, but checked for in taking snapshot + optional int64 creationTime = 3 [default = 0]; + enum Type { + DISABLED = 0; + FLUSH = 1; + } + optional Type type = 4 [default = FLUSH]; + optional int32 version = 5; +} Index: src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionDispatcher.java (revision 0) @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The dispatcher acts as the state holding entity for foreign error handling. The first + * exception received by the dispatcher get passed directly to the listeners. Subsequent + * exceptions are dropped. + *

+ * If there are multiple dispatchers that are all in the same foreign exception monitoring group, + * ideally all these monitors are "peers" -- any error on one dispatcher should get propagated to + * all others (via rpc, or some other mechanism). Due to racing error conditions the exact reason + * for failure may be different on different peers, but the fact that they are in error state + * should eventually hold on all. + *

+ * This is thread-safe and must be because this is expected to be used to propagate exceptions + * from foreign threads. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ForeignExceptionDispatcher implements ForeignExceptionListener, ForeignExceptionSnare { + public static final Log LOG = LogFactory.getLog(ForeignExceptionDispatcher.class); + protected final String name; + protected final List listeners = + new ArrayList(); + private ForeignException exception; + + public ForeignExceptionDispatcher(String name) { + this.name = name; + } + + public ForeignExceptionDispatcher() { + this(""); + } + + public String getName() { + return name; + } + + @Override + public synchronized void receive(ForeignException e) { + // if we already have an exception, then ignore it + if (exception != null) return; + + LOG.debug(name + " accepting received exception" , e); + // mark that we got the error + if (e != null) { + exception = e; + } else { + exception = new ForeignException(name, ""); + } + + // notify all the listeners + dispatch(e); + } + + @Override + public synchronized void rethrowException() throws ForeignException { + if (exception != null) { + // This gets the stack where this is caused, (instead of where it was deserialized). + // This is much more useful for debugging + throw new ForeignException(exception.getSource(), exception.getCause()); + } + } + + @Override + public synchronized boolean hasException() { + return exception != null; + } + + @Override + synchronized public ForeignException getException() { + return exception; + } + + /** + * Sends an exception to all listeners. + * @param message human readable message passed to the listener + * @param e {@link ForeignException} containing the cause. Can be null. + */ + private void dispatch(ForeignException e) { + // update all the listeners with the passed error + for (ForeignExceptionListener l: listeners) { + l.receive(e); + } + } + + /** + * Listen for failures to a given process. This method should only be used during + * initialization and not added to after exceptions are accepted. + * @param errorable listener for the errors. may be null. + */ + public synchronized void addListener(ForeignExceptionListener errorable) { + this.listeners.add(errorable); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionSnare.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionSnare.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionSnare.java (revision 0) @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * This is an interface for a cooperative exception throwing mechanism. Implementations are + * containers that holds an exception from a separate thread. This can be used to receive + * exceptions from 'foreign' threads or from separate 'foreign' processes. + *

+ * To use, one would pass an implementation of this object to a long running method and + * periodically check by calling {@link #rethrowException()}. If any foreign exceptions have + * been received, the calling thread is then responsible for handling the rethrown exception. + *

+ * One could use the boolean {@link #hasException()} to determine if there is an exceptoin as well. + *

+ * NOTE: This is very similar to the InterruptedException/interrupt/interrupted pattern. There, + * the notification state is bound to a Thread. Using this, applications receive Exceptions in + * the snare. The snare is referenced and checked by multiple threads which enables exception + * notification in all the involved threads/processes. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface ForeignExceptionSnare { + + /** + * Rethrow an exception currently held by the {@link ForeignExceptionSnare}. If there is + * no exception this is a no-op + * + * @throws ForeignException + * all exceptions from remote sources are procedure exceptions + */ + public void rethrowException() throws ForeignException; + + /** + * Non-exceptional form of {@link #rethrowException()}. Checks to see if any + * process to which the exception checkers is bound has created an error that + * would cause a failure. + * + * @return true if there has been an error,false otherwise + */ + public boolean hasException(); + + /** + * Get the value of the captured exception. + * + * @return the captured foreign exception or null if no exception captured. + */ + public ForeignException getException(); +} Index: src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutExceptionInjector.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutExceptionInjector.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutExceptionInjector.java (revision 0) @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import java.util.Timer; +import java.util.TimerTask; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; + +/** + * Time a given process/operation and report a failure if the elapsed time exceeds the max allowed + * time. + *

+ * The timer won't start tracking time until calling {@link #start()}. If {@link #complete()} or + * {@link #trigger()} is called before {@link #start()}, calls to {@link #start()} will fail. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class TimeoutExceptionInjector { + + private static final Log LOG = LogFactory.getLog(TimeoutExceptionInjector.class); + + private final long maxTime; + private volatile boolean complete; + private final Timer timer; + private final TimerTask timerTask; + private long start = -1; + + /** + * Create a generic timer for a task/process. + * @param listener listener to notify if the process times out + * @param maxTime max allowed running time for the process. Timer starts on calls to + * {@link #start()} + */ + public TimeoutExceptionInjector(final ForeignExceptionListener listener, final long maxTime) { + this.maxTime = maxTime; + timer = new Timer(); + timerTask = new TimerTask() { + @Override + public void run() { + // ensure we don't run this task multiple times + synchronized (this) { + // quick exit if we already marked the task complete + if (TimeoutExceptionInjector.this.complete) return; + // mark the task is run, to avoid repeats + TimeoutExceptionInjector.this.complete = true; + } + long end = EnvironmentEdgeManager.currentTimeMillis(); + TimeoutException tee = new TimeoutException( + "Timeout caused Foreign Exception", start, end, maxTime); + String source = "timer-" + timer; + listener.receive(new ForeignException(source, tee)); + } + }; + } + + public long getMaxTime() { + return maxTime; + } + + /** + * For all time forward, do not throw an error because the process has completed. + */ + public void complete() { + // warn if the timer is already marked complete. This isn't going to be thread-safe, but should + // be good enough and its not worth locking just for a warning. + if (this.complete) { + LOG.warn("Timer already marked completed, ignoring!"); + return; + } + LOG.debug("Marking timer as complete - no error notifications will be received for this timer."); + synchronized (this.timerTask) { + this.complete = true; + } + this.timer.cancel(); + } + + /** + * Start a timer to fail a process if it takes longer than the expected time to complete. + *

+ * Non-blocking. + * @throws IllegalStateException if the timer has already been marked done via {@link #complete()} + * or {@link #trigger()} + */ + public synchronized void start() throws IllegalStateException { + if (this.start >= 0) { + LOG.warn("Timer already started, can't be started again. Ignoring second request."); + return; + } + LOG.debug("Scheduling process timer to run in: " + maxTime + " ms"); + timer.schedule(timerTask, maxTime); + this.start = EnvironmentEdgeManager.currentTimeMillis(); + } + + /** + * Trigger the timer immediately. + *

+ * Exposed for testing. + */ + public void trigger() { + synchronized (timerTask) { + if (this.complete) { + LOG.warn("Timer already completed, not triggering."); + return; + } + LOG.debug("Triggering timer immediately!"); + this.timer.cancel(); + this.timerTask.run(); + } + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/errorhandling/TimeoutException.java (revision 0) @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Exception for timeout of a task. + * @see TimeoutExceptionInjector + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +@SuppressWarnings("serial") +public class TimeoutException extends Exception { + + private final String sourceName; + private final long start; + private final long end; + private final long expected; + + /** + * Exception indicating that an operation attempt has timed out + * @param start time the operation started (ms since epoch) + * @param end time the timeout was triggered (ms since epoch) + * @param expected expected amount of time for the operation to complete (ms) (ideally, expected <= end-start) + */ + public TimeoutException(String sourceName, long start, long end, long expected) { + super("Timeout elapsed! Source:" + sourceName + " Start:" + start + ", End:" + end + + ", diff:" + (end - start) + ", max:" + expected + " ms"); + this.sourceName = sourceName; + this.start = start; + this.end = end; + this.expected = expected; + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; + } + + public long getMaxAllowedOperationTime() { + return expected; + } + + public String getSourceName() { + return sourceName; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignException.java (revision 0) @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage; +import org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage; +import org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage; + +import com.google.protobuf.InvalidProtocolBufferException; + +/** + * A ForeignException is an exception from another thread or process. + *

+ * ForeignExceptions are sent to 'remote' peers to signal an abort in the face of failures. + * When serialized for transmission we encode using Protobufs to ensure version compatibility. + *

+ * Foreign exceptions contain a Throwable as its cause. This can be a "regular" exception + * generated locally or a ProxyThrowable that is a representation of the original exception + * created on original 'remote' source. These ProxyThrowables have their their stacks traces and + * messages overridden to reflect the original 'remote' exception. The only way these + * ProxyThrowables are generated are by this class's {@link #deserialize(byte[])} method. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +@SuppressWarnings("serial") +public class ForeignException extends IOException { + + /** + * Name of the throwable's source such as a host or thread name. Must be non-null. + */ + private final String source; + + /** + * Create a new ForeignException that can be serialized. It is assumed that this came form a + * local source. + * @param source + * @param cause + */ + public ForeignException(String source, Throwable cause) { + super(cause); + assert source != null; + assert cause != null; + this.source = source; + } + + /** + * Create a new ForeignException that can be serialized. It is assumed that this is locally + * generated. + * @param source + * @param msg + */ + public ForeignException(String source, String msg) { + super(new IllegalArgumentException(msg)); + this.source = source; + } + + public String getSource() { + return source; + } + + /** + * The cause of a ForeignException can be an exception that was generated on a local in process + * thread, or a thread from a 'remote' separate process. + * + * If the cause is a ProxyThrowable, we know it came from deserialization which usually means + * it came from not only another thread, but also from a remote thread. + * + * @return true if went through deserialization, false if locally generated + */ + public boolean isRemote() { + return getCause() instanceof ProxyThrowable; + } + + @Override + public String toString() { + String className = getCause().getClass().getName() ; + return className + " via " + getSource() + ":" + getLocalizedMessage(); + } + + /** + * Convert a stack trace to list of {@link StackTraceElement}. + * @param trace the stack trace to convert to protobuf message + * @return null if the passed stack is null. + */ + private static List toStackTraceElementMessages( + StackTraceElement[] trace) { + // if there is no stack trace, ignore it and just return the message + if (trace == null) return null; + // build the stack trace for the message + List pbTrace = + new ArrayList(trace.length); + for (StackTraceElement elem : trace) { + StackTraceElementMessage.Builder stackBuilder = StackTraceElementMessage.newBuilder(); + stackBuilder.setDeclaringClass(elem.getClassName()); + stackBuilder.setFileName(elem.getFileName()); + stackBuilder.setLineNumber(elem.getLineNumber()); + stackBuilder.setMethodName(elem.getMethodName()); + pbTrace.add(stackBuilder.build()); + } + return pbTrace; + } + + /** + * This is a Proxy Throwable that contains the information of the original remote exception + */ + private static class ProxyThrowable extends Throwable { + ProxyThrowable(String msg, StackTraceElement[] trace) { + super(msg); + this.setStackTrace(trace); + } + } + + /** + * Converts a ForeignException to an array of bytes. + * @param source the name of the external exception source + * @param t the "local" external exception (local) + * @return protobuf serialized version of ForeignException + */ + public static byte[] serialize(String source, Throwable t) { + GenericExceptionMessage.Builder gemBuilder = GenericExceptionMessage.newBuilder(); + gemBuilder.setClassName(t.getClass().getName()); + if (t.getMessage() != null) { + gemBuilder.setMessage(t.getMessage()); + } + // set the stack trace, if there is one + List stack = + ForeignException.toStackTraceElementMessages(t.getStackTrace()); + if (stack != null) { + gemBuilder.addAllTrace(stack); + } + GenericExceptionMessage payload = gemBuilder.build(); + ForeignExceptionMessage.Builder exception = ForeignExceptionMessage.newBuilder(); + exception.setGenericException(payload).setSource(source); + ForeignExceptionMessage eem = exception.build(); + return eem.toByteArray(); + } + + /** + * Takes a series of bytes and tries to generate an ForeignException instance for it. + * @param bytes + * @return the ForeignExcpetion instance + * @throws InvalidProtocolBufferException if there was deserialization problem this is thrown. + */ + public static ForeignException deserialize(byte[] bytes) throws InvalidProtocolBufferException { + // figure out the data we need to pass + ForeignExceptionMessage eem = ForeignExceptionMessage.parseFrom(bytes); + GenericExceptionMessage gem = eem.getGenericException(); + StackTraceElement [] trace = ForeignException.toStackTrace(gem.getTraceList()); + ProxyThrowable dfe = new ProxyThrowable(gem.getMessage(), trace); + ForeignException e = new ForeignException(eem.getSource(), dfe); + return e; + } + + /** + * Unwind a serialized array of {@link StackTraceElementMessage}s to a + * {@link StackTraceElement}s. + * @param traceList list that was serialized + * @return the deserialized list or null if it couldn't be unwound (e.g. wasn't set on + * the sender). + */ + private static StackTraceElement[] toStackTrace(List traceList) { + if (traceList == null || traceList.size() == 0) { + return new StackTraceElement[0]; // empty array + } + StackTraceElement[] trace = new StackTraceElement[traceList.size()]; + for (int i = 0; i < traceList.size(); i++) { + StackTraceElementMessage elem = traceList.get(i); + trace[i] = new StackTraceElement( + elem.getDeclaringClass(), elem.getMethodName(), elem.getFileName(), elem.getLineNumber()); + } + return trace; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionListener.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionListener.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/errorhandling/ForeignExceptionListener.java (revision 0) @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.errorhandling; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The ForeignExceptionListener is an interface for objects that can receive a ForeignException. + *

+ * Implementations must be thread-safe, because this is expected to be used to propagate exceptions + * from foreign threads. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface ForeignExceptionListener { + + /** + * Receive a ForeignException. + *

+ * Implementers must ensure that this method is thread-safe. + * @param e exception causing the error. Implementations must accept and handle null here. + */ + public void receive(ForeignException e); +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/protobuf/generated/ErrorHandlingProtos.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/protobuf/generated/ErrorHandlingProtos.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/protobuf/generated/ErrorHandlingProtos.java (revision 0) @@ -0,0 +1,2185 @@ +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: ErrorHandling.proto + +package org.apache.hadoop.hbase.protobuf.generated; + +public final class ErrorHandlingProtos { + private ErrorHandlingProtos() {} + public static void registerAllExtensions( + com.google.protobuf.ExtensionRegistry registry) { + } + public interface StackTraceElementMessageOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional string declaringClass = 1; + boolean hasDeclaringClass(); + String getDeclaringClass(); + + // optional string methodName = 2; + boolean hasMethodName(); + String getMethodName(); + + // optional string fileName = 3; + boolean hasFileName(); + String getFileName(); + + // optional int32 lineNumber = 4; + boolean hasLineNumber(); + int getLineNumber(); + } + public static final class StackTraceElementMessage extends + com.google.protobuf.GeneratedMessage + implements StackTraceElementMessageOrBuilder { + // Use StackTraceElementMessage.newBuilder() to construct. + private StackTraceElementMessage(Builder builder) { + super(builder); + } + private StackTraceElementMessage(boolean noInit) {} + + private static final StackTraceElementMessage defaultInstance; + public static StackTraceElementMessage getDefaultInstance() { + return defaultInstance; + } + + public StackTraceElementMessage getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_StackTraceElementMessage_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_StackTraceElementMessage_fieldAccessorTable; + } + + private int bitField0_; + // optional string declaringClass = 1; + public static final int DECLARINGCLASS_FIELD_NUMBER = 1; + private java.lang.Object declaringClass_; + public boolean hasDeclaringClass() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getDeclaringClass() { + java.lang.Object ref = declaringClass_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + declaringClass_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getDeclaringClassBytes() { + java.lang.Object ref = declaringClass_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + declaringClass_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional string methodName = 2; + public static final int METHODNAME_FIELD_NUMBER = 2; + private java.lang.Object methodName_; + public boolean hasMethodName() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getMethodName() { + java.lang.Object ref = methodName_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + methodName_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getMethodNameBytes() { + java.lang.Object ref = methodName_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + methodName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional string fileName = 3; + public static final int FILENAME_FIELD_NUMBER = 3; + private java.lang.Object fileName_; + public boolean hasFileName() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public String getFileName() { + java.lang.Object ref = fileName_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + fileName_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getFileNameBytes() { + java.lang.Object ref = fileName_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + fileName_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional int32 lineNumber = 4; + public static final int LINENUMBER_FIELD_NUMBER = 4; + private int lineNumber_; + public boolean hasLineNumber() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public int getLineNumber() { + return lineNumber_; + } + + private void initFields() { + declaringClass_ = ""; + methodName_ = ""; + fileName_ = ""; + lineNumber_ = 0; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getDeclaringClassBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeBytes(2, getMethodNameBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeBytes(3, getFileNameBytes()); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeInt32(4, lineNumber_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getDeclaringClassBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(2, getMethodNameBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(3, getFileNameBytes()); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(4, lineNumber_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage)) { + return super.equals(obj); + } + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage other = (org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage) obj; + + boolean result = true; + result = result && (hasDeclaringClass() == other.hasDeclaringClass()); + if (hasDeclaringClass()) { + result = result && getDeclaringClass() + .equals(other.getDeclaringClass()); + } + result = result && (hasMethodName() == other.hasMethodName()); + if (hasMethodName()) { + result = result && getMethodName() + .equals(other.getMethodName()); + } + result = result && (hasFileName() == other.hasFileName()); + if (hasFileName()) { + result = result && getFileName() + .equals(other.getFileName()); + } + result = result && (hasLineNumber() == other.hasLineNumber()); + if (hasLineNumber()) { + result = result && (getLineNumber() + == other.getLineNumber()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + @java.lang.Override + public int hashCode() { + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasDeclaringClass()) { + hash = (37 * hash) + DECLARINGCLASS_FIELD_NUMBER; + hash = (53 * hash) + getDeclaringClass().hashCode(); + } + if (hasMethodName()) { + hash = (37 * hash) + METHODNAME_FIELD_NUMBER; + hash = (53 * hash) + getMethodName().hashCode(); + } + if (hasFileName()) { + hash = (37 * hash) + FILENAME_FIELD_NUMBER; + hash = (53 * hash) + getFileName().hashCode(); + } + if (hasLineNumber()) { + hash = (37 * hash) + LINENUMBER_FIELD_NUMBER; + hash = (53 * hash) + getLineNumber(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + return hash; + } + + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessageOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_StackTraceElementMessage_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_StackTraceElementMessage_fieldAccessorTable; + } + + // Construct using org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + declaringClass_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + methodName_ = ""; + bitField0_ = (bitField0_ & ~0x00000002); + fileName_ = ""; + bitField0_ = (bitField0_ & ~0x00000004); + lineNumber_ = 0; + bitField0_ = (bitField0_ & ~0x00000008); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.getDescriptor(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage getDefaultInstanceForType() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.getDefaultInstance(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage build() { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage buildPartial() { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage result = new org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.declaringClass_ = declaringClass_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.methodName_ = methodName_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.fileName_ = fileName_; + if (((from_bitField0_ & 0x00000008) == 0x00000008)) { + to_bitField0_ |= 0x00000008; + } + result.lineNumber_ = lineNumber_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage) { + return mergeFrom((org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage other) { + if (other == org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.getDefaultInstance()) return this; + if (other.hasDeclaringClass()) { + setDeclaringClass(other.getDeclaringClass()); + } + if (other.hasMethodName()) { + setMethodName(other.getMethodName()); + } + if (other.hasFileName()) { + setFileName(other.getFileName()); + } + if (other.hasLineNumber()) { + setLineNumber(other.getLineNumber()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + declaringClass_ = input.readBytes(); + break; + } + case 18: { + bitField0_ |= 0x00000002; + methodName_ = input.readBytes(); + break; + } + case 26: { + bitField0_ |= 0x00000004; + fileName_ = input.readBytes(); + break; + } + case 32: { + bitField0_ |= 0x00000008; + lineNumber_ = input.readInt32(); + break; + } + } + } + } + + private int bitField0_; + + // optional string declaringClass = 1; + private java.lang.Object declaringClass_ = ""; + public boolean hasDeclaringClass() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getDeclaringClass() { + java.lang.Object ref = declaringClass_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + declaringClass_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setDeclaringClass(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + declaringClass_ = value; + onChanged(); + return this; + } + public Builder clearDeclaringClass() { + bitField0_ = (bitField0_ & ~0x00000001); + declaringClass_ = getDefaultInstance().getDeclaringClass(); + onChanged(); + return this; + } + void setDeclaringClass(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000001; + declaringClass_ = value; + onChanged(); + } + + // optional string methodName = 2; + private java.lang.Object methodName_ = ""; + public boolean hasMethodName() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getMethodName() { + java.lang.Object ref = methodName_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + methodName_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setMethodName(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + methodName_ = value; + onChanged(); + return this; + } + public Builder clearMethodName() { + bitField0_ = (bitField0_ & ~0x00000002); + methodName_ = getDefaultInstance().getMethodName(); + onChanged(); + return this; + } + void setMethodName(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000002; + methodName_ = value; + onChanged(); + } + + // optional string fileName = 3; + private java.lang.Object fileName_ = ""; + public boolean hasFileName() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public String getFileName() { + java.lang.Object ref = fileName_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + fileName_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setFileName(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000004; + fileName_ = value; + onChanged(); + return this; + } + public Builder clearFileName() { + bitField0_ = (bitField0_ & ~0x00000004); + fileName_ = getDefaultInstance().getFileName(); + onChanged(); + return this; + } + void setFileName(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000004; + fileName_ = value; + onChanged(); + } + + // optional int32 lineNumber = 4; + private int lineNumber_ ; + public boolean hasLineNumber() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public int getLineNumber() { + return lineNumber_; + } + public Builder setLineNumber(int value) { + bitField0_ |= 0x00000008; + lineNumber_ = value; + onChanged(); + return this; + } + public Builder clearLineNumber() { + bitField0_ = (bitField0_ & ~0x00000008); + lineNumber_ = 0; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:StackTraceElementMessage) + } + + static { + defaultInstance = new StackTraceElementMessage(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:StackTraceElementMessage) + } + + public interface GenericExceptionMessageOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional string className = 1; + boolean hasClassName(); + String getClassName(); + + // optional string message = 2; + boolean hasMessage(); + String getMessage(); + + // optional bytes errorInfo = 3; + boolean hasErrorInfo(); + com.google.protobuf.ByteString getErrorInfo(); + + // repeated .StackTraceElementMessage trace = 4; + java.util.List + getTraceList(); + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage getTrace(int index); + int getTraceCount(); + java.util.List + getTraceOrBuilderList(); + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessageOrBuilder getTraceOrBuilder( + int index); + } + public static final class GenericExceptionMessage extends + com.google.protobuf.GeneratedMessage + implements GenericExceptionMessageOrBuilder { + // Use GenericExceptionMessage.newBuilder() to construct. + private GenericExceptionMessage(Builder builder) { + super(builder); + } + private GenericExceptionMessage(boolean noInit) {} + + private static final GenericExceptionMessage defaultInstance; + public static GenericExceptionMessage getDefaultInstance() { + return defaultInstance; + } + + public GenericExceptionMessage getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_GenericExceptionMessage_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_GenericExceptionMessage_fieldAccessorTable; + } + + private int bitField0_; + // optional string className = 1; + public static final int CLASSNAME_FIELD_NUMBER = 1; + private java.lang.Object className_; + public boolean hasClassName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getClassName() { + java.lang.Object ref = className_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + className_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getClassNameBytes() { + java.lang.Object ref = className_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + className_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional string message = 2; + public static final int MESSAGE_FIELD_NUMBER = 2; + private java.lang.Object message_; + public boolean hasMessage() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getMessage() { + java.lang.Object ref = message_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + message_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getMessageBytes() { + java.lang.Object ref = message_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + message_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional bytes errorInfo = 3; + public static final int ERRORINFO_FIELD_NUMBER = 3; + private com.google.protobuf.ByteString errorInfo_; + public boolean hasErrorInfo() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public com.google.protobuf.ByteString getErrorInfo() { + return errorInfo_; + } + + // repeated .StackTraceElementMessage trace = 4; + public static final int TRACE_FIELD_NUMBER = 4; + private java.util.List trace_; + public java.util.List getTraceList() { + return trace_; + } + public java.util.List + getTraceOrBuilderList() { + return trace_; + } + public int getTraceCount() { + return trace_.size(); + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage getTrace(int index) { + return trace_.get(index); + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessageOrBuilder getTraceOrBuilder( + int index) { + return trace_.get(index); + } + + private void initFields() { + className_ = ""; + message_ = ""; + errorInfo_ = com.google.protobuf.ByteString.EMPTY; + trace_ = java.util.Collections.emptyList(); + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getClassNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeBytes(2, getMessageBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeBytes(3, errorInfo_); + } + for (int i = 0; i < trace_.size(); i++) { + output.writeMessage(4, trace_.get(i)); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getClassNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(2, getMessageBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(3, errorInfo_); + } + for (int i = 0; i < trace_.size(); i++) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(4, trace_.get(i)); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage)) { + return super.equals(obj); + } + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage other = (org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage) obj; + + boolean result = true; + result = result && (hasClassName() == other.hasClassName()); + if (hasClassName()) { + result = result && getClassName() + .equals(other.getClassName()); + } + result = result && (hasMessage() == other.hasMessage()); + if (hasMessage()) { + result = result && getMessage() + .equals(other.getMessage()); + } + result = result && (hasErrorInfo() == other.hasErrorInfo()); + if (hasErrorInfo()) { + result = result && getErrorInfo() + .equals(other.getErrorInfo()); + } + result = result && getTraceList() + .equals(other.getTraceList()); + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + @java.lang.Override + public int hashCode() { + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasClassName()) { + hash = (37 * hash) + CLASSNAME_FIELD_NUMBER; + hash = (53 * hash) + getClassName().hashCode(); + } + if (hasMessage()) { + hash = (37 * hash) + MESSAGE_FIELD_NUMBER; + hash = (53 * hash) + getMessage().hashCode(); + } + if (hasErrorInfo()) { + hash = (37 * hash) + ERRORINFO_FIELD_NUMBER; + hash = (53 * hash) + getErrorInfo().hashCode(); + } + if (getTraceCount() > 0) { + hash = (37 * hash) + TRACE_FIELD_NUMBER; + hash = (53 * hash) + getTraceList().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + return hash; + } + + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessageOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_GenericExceptionMessage_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_GenericExceptionMessage_fieldAccessorTable; + } + + // Construct using org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + getTraceFieldBuilder(); + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + className_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + message_ = ""; + bitField0_ = (bitField0_ & ~0x00000002); + errorInfo_ = com.google.protobuf.ByteString.EMPTY; + bitField0_ = (bitField0_ & ~0x00000004); + if (traceBuilder_ == null) { + trace_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000008); + } else { + traceBuilder_.clear(); + } + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDescriptor(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage getDefaultInstanceForType() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDefaultInstance(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage build() { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage buildPartial() { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage result = new org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.className_ = className_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.message_ = message_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.errorInfo_ = errorInfo_; + if (traceBuilder_ == null) { + if (((bitField0_ & 0x00000008) == 0x00000008)) { + trace_ = java.util.Collections.unmodifiableList(trace_); + bitField0_ = (bitField0_ & ~0x00000008); + } + result.trace_ = trace_; + } else { + result.trace_ = traceBuilder_.build(); + } + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage) { + return mergeFrom((org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage other) { + if (other == org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDefaultInstance()) return this; + if (other.hasClassName()) { + setClassName(other.getClassName()); + } + if (other.hasMessage()) { + setMessage(other.getMessage()); + } + if (other.hasErrorInfo()) { + setErrorInfo(other.getErrorInfo()); + } + if (traceBuilder_ == null) { + if (!other.trace_.isEmpty()) { + if (trace_.isEmpty()) { + trace_ = other.trace_; + bitField0_ = (bitField0_ & ~0x00000008); + } else { + ensureTraceIsMutable(); + trace_.addAll(other.trace_); + } + onChanged(); + } + } else { + if (!other.trace_.isEmpty()) { + if (traceBuilder_.isEmpty()) { + traceBuilder_.dispose(); + traceBuilder_ = null; + trace_ = other.trace_; + bitField0_ = (bitField0_ & ~0x00000008); + traceBuilder_ = + com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders ? + getTraceFieldBuilder() : null; + } else { + traceBuilder_.addAllMessages(other.trace_); + } + } + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + className_ = input.readBytes(); + break; + } + case 18: { + bitField0_ |= 0x00000002; + message_ = input.readBytes(); + break; + } + case 26: { + bitField0_ |= 0x00000004; + errorInfo_ = input.readBytes(); + break; + } + case 34: { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder subBuilder = org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.newBuilder(); + input.readMessage(subBuilder, extensionRegistry); + addTrace(subBuilder.buildPartial()); + break; + } + } + } + } + + private int bitField0_; + + // optional string className = 1; + private java.lang.Object className_ = ""; + public boolean hasClassName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getClassName() { + java.lang.Object ref = className_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + className_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setClassName(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + className_ = value; + onChanged(); + return this; + } + public Builder clearClassName() { + bitField0_ = (bitField0_ & ~0x00000001); + className_ = getDefaultInstance().getClassName(); + onChanged(); + return this; + } + void setClassName(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000001; + className_ = value; + onChanged(); + } + + // optional string message = 2; + private java.lang.Object message_ = ""; + public boolean hasMessage() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getMessage() { + java.lang.Object ref = message_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + message_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setMessage(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + message_ = value; + onChanged(); + return this; + } + public Builder clearMessage() { + bitField0_ = (bitField0_ & ~0x00000002); + message_ = getDefaultInstance().getMessage(); + onChanged(); + return this; + } + void setMessage(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000002; + message_ = value; + onChanged(); + } + + // optional bytes errorInfo = 3; + private com.google.protobuf.ByteString errorInfo_ = com.google.protobuf.ByteString.EMPTY; + public boolean hasErrorInfo() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public com.google.protobuf.ByteString getErrorInfo() { + return errorInfo_; + } + public Builder setErrorInfo(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000004; + errorInfo_ = value; + onChanged(); + return this; + } + public Builder clearErrorInfo() { + bitField0_ = (bitField0_ & ~0x00000004); + errorInfo_ = getDefaultInstance().getErrorInfo(); + onChanged(); + return this; + } + + // repeated .StackTraceElementMessage trace = 4; + private java.util.List trace_ = + java.util.Collections.emptyList(); + private void ensureTraceIsMutable() { + if (!((bitField0_ & 0x00000008) == 0x00000008)) { + trace_ = new java.util.ArrayList(trace_); + bitField0_ |= 0x00000008; + } + } + + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessageOrBuilder> traceBuilder_; + + public java.util.List getTraceList() { + if (traceBuilder_ == null) { + return java.util.Collections.unmodifiableList(trace_); + } else { + return traceBuilder_.getMessageList(); + } + } + public int getTraceCount() { + if (traceBuilder_ == null) { + return trace_.size(); + } else { + return traceBuilder_.getCount(); + } + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage getTrace(int index) { + if (traceBuilder_ == null) { + return trace_.get(index); + } else { + return traceBuilder_.getMessage(index); + } + } + public Builder setTrace( + int index, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage value) { + if (traceBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureTraceIsMutable(); + trace_.set(index, value); + onChanged(); + } else { + traceBuilder_.setMessage(index, value); + } + return this; + } + public Builder setTrace( + int index, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder builderForValue) { + if (traceBuilder_ == null) { + ensureTraceIsMutable(); + trace_.set(index, builderForValue.build()); + onChanged(); + } else { + traceBuilder_.setMessage(index, builderForValue.build()); + } + return this; + } + public Builder addTrace(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage value) { + if (traceBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureTraceIsMutable(); + trace_.add(value); + onChanged(); + } else { + traceBuilder_.addMessage(value); + } + return this; + } + public Builder addTrace( + int index, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage value) { + if (traceBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureTraceIsMutable(); + trace_.add(index, value); + onChanged(); + } else { + traceBuilder_.addMessage(index, value); + } + return this; + } + public Builder addTrace( + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder builderForValue) { + if (traceBuilder_ == null) { + ensureTraceIsMutable(); + trace_.add(builderForValue.build()); + onChanged(); + } else { + traceBuilder_.addMessage(builderForValue.build()); + } + return this; + } + public Builder addTrace( + int index, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder builderForValue) { + if (traceBuilder_ == null) { + ensureTraceIsMutable(); + trace_.add(index, builderForValue.build()); + onChanged(); + } else { + traceBuilder_.addMessage(index, builderForValue.build()); + } + return this; + } + public Builder addAllTrace( + java.lang.Iterable values) { + if (traceBuilder_ == null) { + ensureTraceIsMutable(); + super.addAll(values, trace_); + onChanged(); + } else { + traceBuilder_.addAllMessages(values); + } + return this; + } + public Builder clearTrace() { + if (traceBuilder_ == null) { + trace_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000008); + onChanged(); + } else { + traceBuilder_.clear(); + } + return this; + } + public Builder removeTrace(int index) { + if (traceBuilder_ == null) { + ensureTraceIsMutable(); + trace_.remove(index); + onChanged(); + } else { + traceBuilder_.remove(index); + } + return this; + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder getTraceBuilder( + int index) { + return getTraceFieldBuilder().getBuilder(index); + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessageOrBuilder getTraceOrBuilder( + int index) { + if (traceBuilder_ == null) { + return trace_.get(index); } else { + return traceBuilder_.getMessageOrBuilder(index); + } + } + public java.util.List + getTraceOrBuilderList() { + if (traceBuilder_ != null) { + return traceBuilder_.getMessageOrBuilderList(); + } else { + return java.util.Collections.unmodifiableList(trace_); + } + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder addTraceBuilder() { + return getTraceFieldBuilder().addBuilder( + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.getDefaultInstance()); + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder addTraceBuilder( + int index) { + return getTraceFieldBuilder().addBuilder( + index, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.getDefaultInstance()); + } + public java.util.List + getTraceBuilderList() { + return getTraceFieldBuilder().getBuilderList(); + } + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessageOrBuilder> + getTraceFieldBuilder() { + if (traceBuilder_ == null) { + traceBuilder_ = new com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessageOrBuilder>( + trace_, + ((bitField0_ & 0x00000008) == 0x00000008), + getParentForChildren(), + isClean()); + trace_ = null; + } + return traceBuilder_; + } + + // @@protoc_insertion_point(builder_scope:GenericExceptionMessage) + } + + static { + defaultInstance = new GenericExceptionMessage(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:GenericExceptionMessage) + } + + public interface ForeignExceptionMessageOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // optional string source = 1; + boolean hasSource(); + String getSource(); + + // optional .GenericExceptionMessage genericException = 2; + boolean hasGenericException(); + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage getGenericException(); + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessageOrBuilder getGenericExceptionOrBuilder(); + } + public static final class ForeignExceptionMessage extends + com.google.protobuf.GeneratedMessage + implements ForeignExceptionMessageOrBuilder { + // Use ForeignExceptionMessage.newBuilder() to construct. + private ForeignExceptionMessage(Builder builder) { + super(builder); + } + private ForeignExceptionMessage(boolean noInit) {} + + private static final ForeignExceptionMessage defaultInstance; + public static ForeignExceptionMessage getDefaultInstance() { + return defaultInstance; + } + + public ForeignExceptionMessage getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_ForeignExceptionMessage_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_ForeignExceptionMessage_fieldAccessorTable; + } + + private int bitField0_; + // optional string source = 1; + public static final int SOURCE_FIELD_NUMBER = 1; + private java.lang.Object source_; + public boolean hasSource() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getSource() { + java.lang.Object ref = source_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + source_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getSourceBytes() { + java.lang.Object ref = source_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + source_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional .GenericExceptionMessage genericException = 2; + public static final int GENERICEXCEPTION_FIELD_NUMBER = 2; + private org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage genericException_; + public boolean hasGenericException() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage getGenericException() { + return genericException_; + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessageOrBuilder getGenericExceptionOrBuilder() { + return genericException_; + } + + private void initFields() { + source_ = ""; + genericException_ = org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDefaultInstance(); + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getSourceBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeMessage(2, genericException_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getSourceBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(2, genericException_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage)) { + return super.equals(obj); + } + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage other = (org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage) obj; + + boolean result = true; + result = result && (hasSource() == other.hasSource()); + if (hasSource()) { + result = result && getSource() + .equals(other.getSource()); + } + result = result && (hasGenericException() == other.hasGenericException()); + if (hasGenericException()) { + result = result && getGenericException() + .equals(other.getGenericException()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + @java.lang.Override + public int hashCode() { + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasSource()) { + hash = (37 * hash) + SOURCE_FIELD_NUMBER; + hash = (53 * hash) + getSource().hashCode(); + } + if (hasGenericException()) { + hash = (37 * hash) + GENERICEXCEPTION_FIELD_NUMBER; + hash = (53 * hash) + getGenericException().hashCode(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + return hash; + } + + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessageOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_ForeignExceptionMessage_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.internal_static_ForeignExceptionMessage_fieldAccessorTable; + } + + // Construct using org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + getGenericExceptionFieldBuilder(); + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + source_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + if (genericExceptionBuilder_ == null) { + genericException_ = org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDefaultInstance(); + } else { + genericExceptionBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000002); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage.getDescriptor(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage getDefaultInstanceForType() { + return org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage.getDefaultInstance(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage build() { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage buildPartial() { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage result = new org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.source_ = source_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + if (genericExceptionBuilder_ == null) { + result.genericException_ = genericException_; + } else { + result.genericException_ = genericExceptionBuilder_.build(); + } + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage) { + return mergeFrom((org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage other) { + if (other == org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage.getDefaultInstance()) return this; + if (other.hasSource()) { + setSource(other.getSource()); + } + if (other.hasGenericException()) { + mergeGenericException(other.getGenericException()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + source_ = input.readBytes(); + break; + } + case 18: { + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.Builder subBuilder = org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.newBuilder(); + if (hasGenericException()) { + subBuilder.mergeFrom(getGenericException()); + } + input.readMessage(subBuilder, extensionRegistry); + setGenericException(subBuilder.buildPartial()); + break; + } + } + } + } + + private int bitField0_; + + // optional string source = 1; + private java.lang.Object source_ = ""; + public boolean hasSource() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getSource() { + java.lang.Object ref = source_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + source_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setSource(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + source_ = value; + onChanged(); + return this; + } + public Builder clearSource() { + bitField0_ = (bitField0_ & ~0x00000001); + source_ = getDefaultInstance().getSource(); + onChanged(); + return this; + } + void setSource(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000001; + source_ = value; + onChanged(); + } + + // optional .GenericExceptionMessage genericException = 2; + private org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage genericException_ = org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDefaultInstance(); + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.Builder, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessageOrBuilder> genericExceptionBuilder_; + public boolean hasGenericException() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage getGenericException() { + if (genericExceptionBuilder_ == null) { + return genericException_; + } else { + return genericExceptionBuilder_.getMessage(); + } + } + public Builder setGenericException(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage value) { + if (genericExceptionBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + genericException_ = value; + onChanged(); + } else { + genericExceptionBuilder_.setMessage(value); + } + bitField0_ |= 0x00000002; + return this; + } + public Builder setGenericException( + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.Builder builderForValue) { + if (genericExceptionBuilder_ == null) { + genericException_ = builderForValue.build(); + onChanged(); + } else { + genericExceptionBuilder_.setMessage(builderForValue.build()); + } + bitField0_ |= 0x00000002; + return this; + } + public Builder mergeGenericException(org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage value) { + if (genericExceptionBuilder_ == null) { + if (((bitField0_ & 0x00000002) == 0x00000002) && + genericException_ != org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDefaultInstance()) { + genericException_ = + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.newBuilder(genericException_).mergeFrom(value).buildPartial(); + } else { + genericException_ = value; + } + onChanged(); + } else { + genericExceptionBuilder_.mergeFrom(value); + } + bitField0_ |= 0x00000002; + return this; + } + public Builder clearGenericException() { + if (genericExceptionBuilder_ == null) { + genericException_ = org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.getDefaultInstance(); + onChanged(); + } else { + genericExceptionBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000002); + return this; + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.Builder getGenericExceptionBuilder() { + bitField0_ |= 0x00000002; + onChanged(); + return getGenericExceptionFieldBuilder().getBuilder(); + } + public org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessageOrBuilder getGenericExceptionOrBuilder() { + if (genericExceptionBuilder_ != null) { + return genericExceptionBuilder_.getMessageOrBuilder(); + } else { + return genericException_; + } + } + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.Builder, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessageOrBuilder> + getGenericExceptionFieldBuilder() { + if (genericExceptionBuilder_ == null) { + genericExceptionBuilder_ = new com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.Builder, org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessageOrBuilder>( + genericException_, + getParentForChildren(), + isClean()); + genericException_ = null; + } + return genericExceptionBuilder_; + } + + // @@protoc_insertion_point(builder_scope:ForeignExceptionMessage) + } + + static { + defaultInstance = new ForeignExceptionMessage(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:ForeignExceptionMessage) + } + + private static com.google.protobuf.Descriptors.Descriptor + internal_static_StackTraceElementMessage_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_StackTraceElementMessage_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_GenericExceptionMessage_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_GenericExceptionMessage_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_ForeignExceptionMessage_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_ForeignExceptionMessage_fieldAccessorTable; + + public static com.google.protobuf.Descriptors.FileDescriptor + getDescriptor() { + return descriptor; + } + private static com.google.protobuf.Descriptors.FileDescriptor + descriptor; + static { + java.lang.String[] descriptorData = { + "\n\023ErrorHandling.proto\"l\n\030StackTraceEleme" + + "ntMessage\022\026\n\016declaringClass\030\001 \001(\t\022\022\n\nmet" + + "hodName\030\002 \001(\t\022\020\n\010fileName\030\003 \001(\t\022\022\n\nlineN" + + "umber\030\004 \001(\005\"z\n\027GenericExceptionMessage\022\021" + + "\n\tclassName\030\001 \001(\t\022\017\n\007message\030\002 \001(\t\022\021\n\ter" + + "rorInfo\030\003 \001(\014\022(\n\005trace\030\004 \003(\0132\031.StackTrac" + + "eElementMessage\"]\n\027ForeignExceptionMessa" + + "ge\022\016\n\006source\030\001 \001(\t\0222\n\020genericException\030\002" + + " \001(\0132\030.GenericExceptionMessageBF\n*org.ap" + + "ache.hadoop.hbase.protobuf.generatedB\023Er", + "rorHandlingProtosH\001\240\001\001" + }; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = + new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors( + com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + internal_static_StackTraceElementMessage_descriptor = + getDescriptor().getMessageTypes().get(0); + internal_static_StackTraceElementMessage_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_StackTraceElementMessage_descriptor, + new java.lang.String[] { "DeclaringClass", "MethodName", "FileName", "LineNumber", }, + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.class, + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.StackTraceElementMessage.Builder.class); + internal_static_GenericExceptionMessage_descriptor = + getDescriptor().getMessageTypes().get(1); + internal_static_GenericExceptionMessage_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_GenericExceptionMessage_descriptor, + new java.lang.String[] { "ClassName", "Message", "ErrorInfo", "Trace", }, + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.class, + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.GenericExceptionMessage.Builder.class); + internal_static_ForeignExceptionMessage_descriptor = + getDescriptor().getMessageTypes().get(2); + internal_static_ForeignExceptionMessage_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_ForeignExceptionMessage_descriptor, + new java.lang.String[] { "Source", "GenericException", }, + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage.class, + org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage.Builder.class); + return null; + } + }; + com.google.protobuf.Descriptors.FileDescriptor + .internalBuildGeneratedFileFrom(descriptorData, + new com.google.protobuf.Descriptors.FileDescriptor[] { + }, assigner); + } + + // @@protoc_insertion_point(outer_class_scope) +} Index: src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java (revision 0) @@ -0,0 +1,851 @@ +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: hbase.proto + +package org.apache.hadoop.hbase.protobuf.generated; + +public final class HBaseProtos { + private HBaseProtos() {} + public static void registerAllExtensions( + com.google.protobuf.ExtensionRegistry registry) { + } + public interface SnapshotDescriptionOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required string name = 1; + boolean hasName(); + String getName(); + + // optional string table = 2; + boolean hasTable(); + String getTable(); + + // optional int64 creationTime = 3 [default = 0]; + boolean hasCreationTime(); + long getCreationTime(); + + // optional .SnapshotDescription.Type type = 4 [default = FLUSH]; + boolean hasType(); + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type getType(); + + // optional int32 version = 5; + boolean hasVersion(); + int getVersion(); + } + public static final class SnapshotDescription extends + com.google.protobuf.GeneratedMessage + implements SnapshotDescriptionOrBuilder { + // Use SnapshotDescription.newBuilder() to construct. + private SnapshotDescription(Builder builder) { + super(builder); + } + private SnapshotDescription(boolean noInit) {} + + private static final SnapshotDescription defaultInstance; + public static SnapshotDescription getDefaultInstance() { + return defaultInstance; + } + + public SnapshotDescription getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.internal_static_SnapshotDescription_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.internal_static_SnapshotDescription_fieldAccessorTable; + } + + public enum Type + implements com.google.protobuf.ProtocolMessageEnum { + DISABLED(0, 0), + FLUSH(1, 1), + ; + + public static final int DISABLED_VALUE = 0; + public static final int FLUSH_VALUE = 1; + + + public final int getNumber() { return value; } + + public static Type valueOf(int value) { + switch (value) { + case 0: return DISABLED; + case 1: return FLUSH; + default: return null; + } + } + + public static com.google.protobuf.Internal.EnumLiteMap + internalGetValueMap() { + return internalValueMap; + } + private static com.google.protobuf.Internal.EnumLiteMap + internalValueMap = + new com.google.protobuf.Internal.EnumLiteMap() { + public Type findValueByNumber(int number) { + return Type.valueOf(number); + } + }; + + public final com.google.protobuf.Descriptors.EnumValueDescriptor + getValueDescriptor() { + return getDescriptor().getValues().get(index); + } + public final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptorForType() { + return getDescriptor(); + } + public static final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.getDescriptor().getEnumTypes().get(0); + } + + private static final Type[] VALUES = { + DISABLED, FLUSH, + }; + + public static Type valueOf( + com.google.protobuf.Descriptors.EnumValueDescriptor desc) { + if (desc.getType() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "EnumValueDescriptor is not for this type."); + } + return VALUES[desc.getIndex()]; + } + + private final int index; + private final int value; + + private Type(int index, int value) { + this.index = index; + this.value = value; + } + + // @@protoc_insertion_point(enum_scope:SnapshotDescription.Type) + } + + private int bitField0_; + // required string name = 1; + public static final int NAME_FIELD_NUMBER = 1; + private java.lang.Object name_; + public boolean hasName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getName() { + java.lang.Object ref = name_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + name_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getNameBytes() { + java.lang.Object ref = name_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + name_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional string table = 2; + public static final int TABLE_FIELD_NUMBER = 2; + private java.lang.Object table_; + public boolean hasTable() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getTable() { + java.lang.Object ref = table_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + table_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getTableBytes() { + java.lang.Object ref = table_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + table_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // optional int64 creationTime = 3 [default = 0]; + public static final int CREATIONTIME_FIELD_NUMBER = 3; + private long creationTime_; + public boolean hasCreationTime() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public long getCreationTime() { + return creationTime_; + } + + // optional .SnapshotDescription.Type type = 4 [default = FLUSH]; + public static final int TYPE_FIELD_NUMBER = 4; + private org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type type_; + public boolean hasType() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type getType() { + return type_; + } + + // optional int32 version = 5; + public static final int VERSION_FIELD_NUMBER = 5; + private int version_; + public boolean hasVersion() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public int getVersion() { + return version_; + } + + private void initFields() { + name_ = ""; + table_ = ""; + creationTime_ = 0L; + type_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type.FLUSH; + version_ = 0; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasName()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeBytes(2, getTableBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeInt64(3, creationTime_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeEnum(4, type_.getNumber()); + } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeInt32(5, version_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(2, getTableBytes()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeInt64Size(3, creationTime_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(4, type_.getNumber()); + } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(5, version_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription)) { + return super.equals(obj); + } + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription other = (org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription) obj; + + boolean result = true; + result = result && (hasName() == other.hasName()); + if (hasName()) { + result = result && getName() + .equals(other.getName()); + } + result = result && (hasTable() == other.hasTable()); + if (hasTable()) { + result = result && getTable() + .equals(other.getTable()); + } + result = result && (hasCreationTime() == other.hasCreationTime()); + if (hasCreationTime()) { + result = result && (getCreationTime() + == other.getCreationTime()); + } + result = result && (hasType() == other.hasType()); + if (hasType()) { + result = result && + (getType() == other.getType()); + } + result = result && (hasVersion() == other.hasVersion()); + if (hasVersion()) { + result = result && (getVersion() + == other.getVersion()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + @java.lang.Override + public int hashCode() { + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasName()) { + hash = (37 * hash) + NAME_FIELD_NUMBER; + hash = (53 * hash) + getName().hashCode(); + } + if (hasTable()) { + hash = (37 * hash) + TABLE_FIELD_NUMBER; + hash = (53 * hash) + getTable().hashCode(); + } + if (hasCreationTime()) { + hash = (37 * hash) + CREATIONTIME_FIELD_NUMBER; + hash = (53 * hash) + hashLong(getCreationTime()); + } + if (hasType()) { + hash = (37 * hash) + TYPE_FIELD_NUMBER; + hash = (53 * hash) + hashEnum(getType()); + } + if (hasVersion()) { + hash = (37 * hash) + VERSION_FIELD_NUMBER; + hash = (53 * hash) + getVersion(); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + return hash; + } + + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescriptionOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.internal_static_SnapshotDescription_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.internal_static_SnapshotDescription_fieldAccessorTable; + } + + // Construct using org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + name_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + table_ = ""; + bitField0_ = (bitField0_ & ~0x00000002); + creationTime_ = 0L; + bitField0_ = (bitField0_ & ~0x00000004); + type_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type.FLUSH; + bitField0_ = (bitField0_ & ~0x00000008); + version_ = 0; + bitField0_ = (bitField0_ & ~0x00000010); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.getDescriptor(); + } + + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription getDefaultInstanceForType() { + return org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.getDefaultInstance(); + } + + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription build() { + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription buildPartial() { + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription result = new org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.name_ = name_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.table_ = table_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.creationTime_ = creationTime_; + if (((from_bitField0_ & 0x00000008) == 0x00000008)) { + to_bitField0_ |= 0x00000008; + } + result.type_ = type_; + if (((from_bitField0_ & 0x00000010) == 0x00000010)) { + to_bitField0_ |= 0x00000010; + } + result.version_ = version_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription) { + return mergeFrom((org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription other) { + if (other == org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.getDefaultInstance()) return this; + if (other.hasName()) { + setName(other.getName()); + } + if (other.hasTable()) { + setTable(other.getTable()); + } + if (other.hasCreationTime()) { + setCreationTime(other.getCreationTime()); + } + if (other.hasType()) { + setType(other.getType()); + } + if (other.hasVersion()) { + setVersion(other.getVersion()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasName()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + name_ = input.readBytes(); + break; + } + case 18: { + bitField0_ |= 0x00000002; + table_ = input.readBytes(); + break; + } + case 24: { + bitField0_ |= 0x00000004; + creationTime_ = input.readInt64(); + break; + } + case 32: { + int rawValue = input.readEnum(); + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type value = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(4, rawValue); + } else { + bitField0_ |= 0x00000008; + type_ = value; + } + break; + } + case 40: { + bitField0_ |= 0x00000010; + version_ = input.readInt32(); + break; + } + } + } + } + + private int bitField0_; + + // required string name = 1; + private java.lang.Object name_ = ""; + public boolean hasName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getName() { + java.lang.Object ref = name_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + name_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setName(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + name_ = value; + onChanged(); + return this; + } + public Builder clearName() { + bitField0_ = (bitField0_ & ~0x00000001); + name_ = getDefaultInstance().getName(); + onChanged(); + return this; + } + void setName(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000001; + name_ = value; + onChanged(); + } + + // optional string table = 2; + private java.lang.Object table_ = ""; + public boolean hasTable() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public String getTable() { + java.lang.Object ref = table_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + table_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setTable(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + table_ = value; + onChanged(); + return this; + } + public Builder clearTable() { + bitField0_ = (bitField0_ & ~0x00000002); + table_ = getDefaultInstance().getTable(); + onChanged(); + return this; + } + void setTable(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000002; + table_ = value; + onChanged(); + } + + // optional int64 creationTime = 3 [default = 0]; + private long creationTime_ ; + public boolean hasCreationTime() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public long getCreationTime() { + return creationTime_; + } + public Builder setCreationTime(long value) { + bitField0_ |= 0x00000004; + creationTime_ = value; + onChanged(); + return this; + } + public Builder clearCreationTime() { + bitField0_ = (bitField0_ & ~0x00000004); + creationTime_ = 0L; + onChanged(); + return this; + } + + // optional .SnapshotDescription.Type type = 4 [default = FLUSH]; + private org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type type_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type.FLUSH; + public boolean hasType() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type getType() { + return type_; + } + public Builder setType(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000008; + type_ = value; + onChanged(); + return this; + } + public Builder clearType() { + bitField0_ = (bitField0_ & ~0x00000008); + type_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type.FLUSH; + onChanged(); + return this; + } + + // optional int32 version = 5; + private int version_ ; + public boolean hasVersion() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public int getVersion() { + return version_; + } + public Builder setVersion(int value) { + bitField0_ |= 0x00000010; + version_ = value; + onChanged(); + return this; + } + public Builder clearVersion() { + bitField0_ = (bitField0_ & ~0x00000010); + version_ = 0; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:SnapshotDescription) + } + + static { + defaultInstance = new SnapshotDescription(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:SnapshotDescription) + } + + private static com.google.protobuf.Descriptors.Descriptor + internal_static_SnapshotDescription_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_SnapshotDescription_fieldAccessorTable; + + public static com.google.protobuf.Descriptors.FileDescriptor + getDescriptor() { + return descriptor; + } + private static com.google.protobuf.Descriptors.FileDescriptor + descriptor; + static { + java.lang.String[] descriptorData = { + "\n\013hbase.proto\"\255\001\n\023SnapshotDescription\022\014\n" + + "\004name\030\001 \002(\t\022\r\n\005table\030\002 \001(\t\022\027\n\014creationTi" + + "me\030\003 \001(\003:\0010\022.\n\004type\030\004 \001(\0162\031.SnapshotDesc" + + "ription.Type:\005FLUSH\022\017\n\007version\030\005 \001(\005\"\037\n\004" + + "Type\022\014\n\010DISABLED\020\000\022\t\n\005FLUSH\020\001B>\n*org.apa" + + "che.hadoop.hbase.protobuf.generatedB\013HBa" + + "seProtosH\001\240\001\001" + }; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = + new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors( + com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + internal_static_SnapshotDescription_descriptor = + getDescriptor().getMessageTypes().get(0); + internal_static_SnapshotDescription_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_SnapshotDescription_descriptor, + new java.lang.String[] { "Name", "Table", "CreationTime", "Type", "Version", }, + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.class, + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Builder.class); + return null; + } + }; + com.google.protobuf.Descriptors.FileDescriptor + .internalBuildGeneratedFileFrom(descriptorData, + new com.google.protobuf.Descriptors.FileDescriptor[] { + }, assigner); + } + + // @@protoc_insertion_point(outer_class_scope) +} Index: src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java (revision 0) @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.protobuf; + +import java.io.IOException; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Protobufs utility. + */ +@SuppressWarnings("deprecation") +public final class ProtobufUtil { + + private ProtobufUtil() { + } + + /** + * Magic we put ahead of a serialized protobuf message. + * For example, all znode content is protobuf messages with the below magic + * for preamble. + */ + public static final byte [] PB_MAGIC = new byte [] {'P', 'B', 'U', 'F'}; + private static final String PB_MAGIC_STR = Bytes.toString(PB_MAGIC); + + /** + * Prepend the passed bytes with four bytes of magic, {@link #PB_MAGIC}, to flag what + * follows as a protobuf in hbase. Prepend these bytes to all content written to znodes, etc. + * @param bytes Bytes to decorate + * @return The passed bytes with magic prepended (Creates a new + * byte array that is bytes.length plus {@link #PB_MAGIC}.length. + */ + public static byte [] prependPBMagic(final byte [] bytes) { + return Bytes.add(PB_MAGIC, bytes); + } + + /** + * @param bytes Bytes to check. + * @return True if passed bytes has {@link #PB_MAGIC} for a prefix. + */ + public static boolean isPBMagicPrefix(final byte [] bytes) { + if (bytes == null || bytes.length < PB_MAGIC.length) return false; + return Bytes.compareTo(PB_MAGIC, 0, PB_MAGIC.length, bytes, 0, PB_MAGIC.length) == 0; + } + + /** + * @return Length of {@link #PB_MAGIC} + */ + public static int lengthOfPBMagic() { + return PB_MAGIC.length; + } +} Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java (working copy) @@ -61,9 +61,9 @@ import org.apache.zookeeper.Watcher; import org.apache.zookeeper.ZooDefs.Ids; import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.client.ZooKeeperSaslClient; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Stat; -import org.apache.zookeeper.client.ZooKeeperSaslClient; import org.apache.zookeeper.server.ZooKeeperSaslServer; import org.apache.zookeeper.proto.CreateRequest; import org.apache.zookeeper.proto.DeleteRequest; @@ -855,6 +855,10 @@ /** * Set data into node creating node if it doesn't yet exist. * Does not set watch. + * + * WARNING: this is not atomic -- it is possible to get a 0-byte data value in the znode before + * data is written + * * @param zkw zk reference * @param znode path of node * @param data data to set for node @@ -1067,7 +1071,7 @@ } /** - * Creates the specified node, if the node does not exist. Does not set a + * Creates the specified node, iff the node does not exist. Does not set a * watch and fails silently if the node already exists. * * The node created is persistent and open access. @@ -1078,8 +1082,24 @@ */ public static void createAndFailSilent(ZooKeeperWatcher zkw, String znode) throws KeeperException { + createAndFailSilent(zkw, znode, new byte[0]); + } + + /** + * Creates the specified node containing specified data, iff the node does not exist. Does + * not set a watch and fails silently if the node already exists. + * + * The node created is persistent and open access. + * + * @param zkw zk reference + * @param znode path of node + * @param data a byte array data to store in the znode + * @throws KeeperException if unexpected zookeeper exception + */ + public static void createAndFailSilent(ZooKeeperWatcher zkw, + String znode, byte[] data) throws KeeperException { createAndFailSilent(zkw, - (CreateAndFailSilent)ZKUtilOp.createAndFailSilent(znode, new byte[0])); + (CreateAndFailSilent)ZKUtilOp.createAndFailSilent(znode, data)); } private static void createAndFailSilent(ZooKeeperWatcher zkw, CreateAndFailSilent cafs) @@ -1120,11 +1140,29 @@ */ public static void createWithParents(ZooKeeperWatcher zkw, String znode) throws KeeperException { + createWithParents(zkw, znode, new byte[0]); + } + + /** + * Creates the specified node and all parent nodes required for it to exist. The creation of + * parent znodes is not atomic with the leafe znode creation but the data is written atomically + * when the leaf node is created. + * + * No watches are set and no errors are thrown if the node already exists. + * + * The nodes created are persistent and open access. + * + * @param zkw zk reference + * @param znode path of node + * @throws KeeperException if unexpected zookeeper exception + */ + public static void createWithParents(ZooKeeperWatcher zkw, String znode, byte[] data) + throws KeeperException { try { if(znode == null) { return; } - zkw.getRecoverableZooKeeper().create(znode, new byte[0], createACL(zkw, znode), + zkw.getRecoverableZooKeeper().create(znode, data, createACL(zkw, znode), CreateMode.PERSISTENT); } catch(KeeperException.NodeExistsException nee) { return; @@ -1606,4 +1644,37 @@ throw new IOException(keeperEx); } } + + /** + * Recursively print the current state of ZK (non-transactional) + * @param root name of the root directory in zk to print + * @throws KeeperException + */ + public static void logZKTree(ZooKeeperWatcher zkw, String root) { + if (!LOG.isDebugEnabled()) return; + LOG.debug("Current zk system:"); + String prefix = "|-"; + LOG.debug(prefix + root); + try { + logZKTree(zkw, root, prefix); + } catch (KeeperException e) { + throw new RuntimeException(e); + } + } + + /** + * Helper method to print the current state of the ZK tree. + * @see #logZKTree(ZooKeeperWatcher, String) + * @throws KeeperException if an unexpected exception occurs + */ + protected static void logZKTree(ZooKeeperWatcher zkw, String root, String prefix) throws KeeperException { + List children = ZKUtil.listChildrenNoWatch(zkw, root); + if (children == null) return; + for (String child : children) { + LOG.debug(prefix + child); + String node = ZKUtil.joinZNode(root.equals("/") ? "" : root, child); + logZKTree(zkw, node, prefix + "---"); + } + } + } Index: src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -671,11 +671,21 @@ /** Directory under /hbase where archived hfiles are stored */ public static final String HFILE_ARCHIVE_DIRECTORY = ".archive"; + /** + * Name of the directory to store all snapshots. See SnapshotDescriptionUtils for + * remaining snapshot constants; this is here to keep HConstants dependencies at a minimum and + * uni-directional. + */ + public static final String SNAPSHOT_DIR_NAME = ".snapshot"; + + /** Temporary directory used for table creation and deletion */ + public static final String HBASE_TEMP_DIRECTORY = ".tmp"; + /** Directories that are not HBase table directories */ public static final List HBASE_NON_TABLE_DIRS = Collections.unmodifiableList(Arrays.asList(new String[] { HREGION_LOGDIR_NAME, HREGION_OLDLOGDIR_NAME, CORRUPT_DIR_NAME, SPLIT_LOGDIR_NAME, - HBCK_SIDELINEDIR_NAME, HFILE_ARCHIVE_DIRECTORY })); + HBCK_SIDELINEDIR_NAME, HFILE_ARCHIVE_DIRECTORY, SNAPSHOT_DIR_NAME, HBASE_TEMP_DIRECTORY })); /** Directories that are not HBase user table directories */ public static final List HBASE_NON_USER_TABLE_DIRS = Index: src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java (working copy) @@ -358,6 +358,9 @@ Bytes.equals(tableName, HConstants.META_TABLE_NAME); } + // A non-capture group so that this can be embedded. + public static final String VALID_USER_TABLE_REGEX = "(?:[a-zA-Z_0-9][a-zA-Z_0-9.-]*)"; + /** * Check passed byte buffer, "tableName", is legal user-space table name. * @return Returns passed tableName param Index: src/main/java/org/apache/hadoop/hbase/Chore.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/Chore.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/Chore.java (working copy) @@ -91,6 +91,14 @@ this.sleeper.skipSleepCycle(); } + /* + * Exposed for TESTING! + * calls directly the chore method, from the current thread. + */ + public void choreForTesting() { + chore(); + } + /** * Override to run a task before we start looping. * @return true if initial chore was successful Index: src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMember.java (revision 0) @@ -0,0 +1,232 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.DaemonThreadFactory; +import org.apache.hadoop.hbase.errorhandling.ForeignException; + +import com.google.common.collect.MapMaker; + +/** + * Process to kick off and manage a running {@link Subprocedure} on a member. This is the + * specialized part of a {@link Procedure} that actually does procedure type-specific work + * and reports back to the coordinator as it completes each phase. + *

+ * If there is a connection error ({@link #controllerConnectionFailure(String, IOException)}), all + * currently running subprocedures are notify to failed since there is no longer a way to reach any + * other members or coordinators since the rpcs are down. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ProcedureMember implements Closeable { + private static final Log LOG = LogFactory.getLog(ProcedureMember.class); + + private final SubprocedureFactory builder; + private final ProcedureMemberRpcs rpcs; + + private final ConcurrentMap subprocs = + new MapMaker().concurrencyLevel(4).weakValues().makeMap(); + private final ExecutorService pool; + + /** + * Instantiate a new ProcedureMember. This is a slave that executes subprocedures. + * + * @param rpcs controller used to send notifications to the procedure coordinator + * @param pool thread pool to submit subprocedures + * @param factory class that creates instances of a subprocedure. + */ + public ProcedureMember(ProcedureMemberRpcs rpcs, ThreadPoolExecutor pool, + SubprocedureFactory factory) { + this.pool = pool; + this.rpcs = rpcs; + this.builder = factory; + } + + public static ThreadPoolExecutor defaultPool(long wakeFrequency, long keepAlive, + int procThreads, String memberName) { + return new ThreadPoolExecutor(1, procThreads, keepAlive, TimeUnit.SECONDS, + new SynchronousQueue(), + new DaemonThreadFactory("member: '" + memberName + "' subprocedure-pool")); + } + + /** + * Package exposed. Not for public use. + * + * @return reference to the Procedure member's rpcs object + */ + ProcedureMemberRpcs getRpcs() { + return rpcs; + } + + + /** + * This is separated from execution so that we can detect and handle the case where the + * subprocedure is invalid and inactionable due to bad info (like DISABLED snapshot type being + * sent here) + * @param opName + * @param data + * @return subprocedure + */ + public Subprocedure createSubprocedure(String opName, byte[] data) { + return builder.buildSubprocedure(opName, data); + } + + /** + * Submit an subprocedure for execution. This starts the local acquire phase. + * @param subproc the subprocedure to execute. + * @return true if the subprocedure was started correctly, false if it + * could not be started. In the latter case, the subprocedure holds a reference to + * the exception that caused the failure. + */ + public boolean submitSubprocedure(Subprocedure subproc) { + // if the submitted subprocedure was null, bail. + if (subproc == null) { + LOG.warn("Submitted null subprocedure, nothing to run here."); + return false; + } + + String procName = subproc.getName(); + if (procName == null || procName.length() == 0) { + LOG.error("Subproc name cannot be null or the empty string"); + return false; + } + + // make sure we aren't already running an subprocedure of that name + Subprocedure rsub; + synchronized (subprocs) { + rsub = subprocs.get(procName); + } + if (rsub != null) { + if (!rsub.isComplete()) { + LOG.error("Subproc '" + procName + "' is already running. Bailing out"); + return false; + } + LOG.warn("A completed old subproc " + procName + " is still present, removing"); + subprocs.remove(procName); + } + + LOG.debug("Submitting new Subprocedure:" + procName); + + // kick off the subprocedure + Future future = null; + try { + future = this.pool.submit(subproc); + synchronized (subprocs) { + subprocs.put(procName, subproc); + } + return true; + } catch (RejectedExecutionException e) { + // the thread pool is full and we can't run the subprocedure + String msg = "Subprocedure pool is full!"; + subproc.cancel(msg, e.getCause()); + + // cancel all subprocedures proactively + if (future != null) { + future.cancel(true); + } + } + + LOG.error("Failed to start subprocedure '" + procName + "'"); + return false; + } + + /** + * Notification that procedure coordinator has reached the global barrier + * @param procName name of the subprocedure that should start running the the in-barrier phase + */ + public void receivedReachedGlobalBarrier(String procName) { + Subprocedure subproc = subprocs.get(procName); + if (subproc == null) { + LOG.warn("Unexpected reached glabal barrier message for Sub-Procedure '" + procName + "'"); + return; + } + subproc.receiveReachedGlobalBarrier(); + } + + /** + * Best effort attempt to close the threadpool via Thread.interrupt. + */ + @Override + public void close() throws IOException { + // have to use shutdown now to break any latch waiting + pool.shutdownNow(); + } + + /** + * Shutdown the threadpool, and wait for upto timeoutMs millis before bailing + * @param timeoutMs timeout limit in millis + * @return true if successfully, false if bailed due to timeout. + * @throws InterruptedException + */ + boolean closeAndWait(long timeoutMs) throws InterruptedException { + pool.shutdown(); + return pool.awaitTermination(timeoutMs, TimeUnit.MILLISECONDS); + } + + /** + * The connection to the rest of the procedure group (member and coordinator) has been + * broken/lost/failed. This should fail any interested subprocedure, but not attempt to notify + * other members since we cannot reach them anymore. + * @param message description of the error + * @param cause the actual cause of the failure + * + * TODO i'm tempted to just remove this code completely and treat it like any other abort. + * Implementation wise, if this happens it is a ZK failure which means the RS will abort. + */ + public void controllerConnectionFailure(final String message, final IOException cause) { + Collection toNotify = subprocs.values(); + LOG.error(message, cause); + for (Subprocedure sub : toNotify) { + // TODO notify the elements, if they aren't null + sub.cancel(message, cause); + } + } + + /** + * Send abort to the specified procedure + * @param procName name of the procedure to about + * @param ee exception information about the abort + */ + public void receiveAbortProcedure(String procName, ForeignException ee) { + LOG.debug("Request received to abort procedure " + procName, ee); + // if we know about the procedure, notify it + Subprocedure sub = subprocs.get(procName); + if (sub == null) { + LOG.info("Received abort on procedure with no local subprocedure " + procName + + ", ignoring it.", ee); + return; // Procedure has already completed + } + LOG.error("Propagating foreign exception to subprocedure " + sub.getName(), ee); + sub.monitor.receive(ee); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/procedure/Subprocedure.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/Subprocedure.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/Subprocedure.java (revision 0) @@ -0,0 +1,331 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.IOException; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionListener; +import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector; + +/** + * Distributed procedure member's Subprocedure. A procedure is sarted on a ProcedureCoordinator + * which communicates with ProcedureMembers who create and start its part of the Procedure. This + * sub part is called a Subprocedure + * + * Users should subclass this and implement {@link #acquireBarrier()} (get local barrier for this + * member), {@link #insideBarrier()} (execute while globally barriered and release barrier) and + * {@link #cleanup(Exception)} (release state associated with subprocedure.) + * + * When submitted to a ProcedureMemeber, the call method is executed in a separate thread. + * Latches are use too block its progress and trigger continuations when barrier conditions are + * met. + * + * Exception that makes it out of calls to {@link #acquireBarrier()} or {@link #insideBarrier()} + * gets converted into {@link ForeignException}, which will get propagated to the + * {@link ProcedureCoordinator}. + * + * There is a category of procedure (ex: online-snapshots), and a user-specified instance-specific + * barrierName. (ex: snapshot121126). + */ +abstract public class Subprocedure implements Callable { + private static final Log LOG = LogFactory.getLog(Subprocedure.class); + + // Name of the procedure + final private String barrierName; + + // + // Execution state + // + + /** wait on before allowing the in barrier phase to proceed */ + private final CountDownLatch inGlobalBarrier; + /** counted down when the Subprocedure has completed */ + private final CountDownLatch releasedLocalBarrier; + + // + // Error handling + // + /** monitor to check for errors */ + protected final ForeignExceptionDispatcher monitor; + /** frequency to check for errors (ms) */ + protected final long wakeFrequency; + protected final TimeoutExceptionInjector executionTimeoutTimer; + protected final ProcedureMemberRpcs rpcs; + + private volatile boolean complete = false; + + /** + * @param member reference to the member managing this subprocedure + * @param procName name of the procedure this subprocedure is associated with + * @param monitor notified if there is an error in the subprocedure + * @param wakeFrequency time in millis to wake to check if there is an error via the monitor (in + * milliseconds). + * @param timeout time in millis that will trigger a subprocedure abort if it has not completed + */ + public Subprocedure(ProcedureMember member, String procName, ForeignExceptionDispatcher monitor, + long wakeFrequency, long timeout) { + // Asserts should be caught during unit testing + assert member != null : "procedure member should be non-null"; + assert member.getRpcs() != null : "rpc handlers should be non-null"; + assert procName != null : "procedure name should be non-null"; + assert monitor != null : "monitor should be non-null"; + + // Default to a very large timeout + this.rpcs = member.getRpcs(); + this.barrierName = procName; + this.monitor = monitor; + // forward any failures to coordinator. Since this is a dispatcher, resend loops should not be + // possible. + this.monitor.addListener(new ForeignExceptionListener() { + @Override + public void receive(ForeignException ee) { + // if this is a notification from a remote source, just log + if (ee.isRemote()) { + LOG.debug("Was remote foreign exception, not redispatching error", ee); + return; + } + + // if it is local, then send it to the coordinator + try { + rpcs.sendMemberAborted(Subprocedure.this, ee); + } catch (IOException e) { + // this will fail all the running procedures, since the connection is down + LOG.error("Can't reach controller, not propagating error", e); + } + } + }); + + this.wakeFrequency = wakeFrequency; + this.inGlobalBarrier = new CountDownLatch(1); + this.releasedLocalBarrier = new CountDownLatch(1); + + // accept error from timer thread, this needs to be started. + this.executionTimeoutTimer = new TimeoutExceptionInjector(monitor, timeout); + } + + public String getName() { + return barrierName; + } + + public String getMemberName() { + return rpcs.getMemberName(); + } + + private void rethrowException() throws ForeignException { + monitor.rethrowException(); + } + + /** + * Execute the Subprocedure {@link #acquireBarrier()} and {@link #insideBarrier()} methods + * while keeping some state for other threads to access. + * + * This would normally be executed by the ProcedureMemeber when a acquire message comes from the + * coordinator. Rpcs are used to spend message back to the coordinator after different phases + * are executed. Any exceptions caught during the execution (except for InterrupedException) get + * converted and propagated to coordinator via {@link ProcedureMemberRpcs#sendMemberAborted( + * Subprocedure, ForeignException)}. + */ + @SuppressWarnings("finally") + final public Void call() { + LOG.debug("Starting subprocedure '" + barrierName + "' with timeout " + + executionTimeoutTimer.getMaxTime() + "ms"); + // start the execution timeout timer + executionTimeoutTimer.start(); + + try { + // start by checking for error first + rethrowException(); + LOG.debug("Subprocedure '" + barrierName + "' starting 'acquire' stage"); + acquireBarrier(); + LOG.debug("Subprocedure '" + barrierName + "' locally acquired"); + + // vote yes to coordinator about being prepared + rpcs.sendMemberAcquired(this); + LOG.debug("Subprocedure '" + barrierName + "' coordinator notified of 'acquire', waiting on" + + " 'reached' or 'abort' from coordinator"); + + // wait for the procedure to reach global barrier before proceding + waitForReachedGlobalBarrier(); + rethrowException(); // if Coordinator aborts, will bail from here with exception + + // In traditional 2PC, if a member reaches this state the TX has been committed and the + // member is responsible for rolling forward and recovering and completing the subsequent + // operations in the case of failure. It cannot rollback. + // + // This implementation is not 2PC since it can still rollback here, and thus has different + // semantics. + + LOG.debug("Subprocedure '" + barrierName + "' received 'reached' from coordinator."); + insideBarrier(); + LOG.debug("Subprocedure '" + barrierName + "' locally completed"); + + // Ack that the member has executed and released local barrier + rpcs.sendMemberCompleted(this); + LOG.debug("Subprocedure '" + barrierName + "' has notified controller of completion"); + + // make sure we didn't get an external exception + rethrowException(); + } catch (Exception e) { + String msg = null; + if (e instanceof InterruptedException) { + msg = "Procedure '" + barrierName + "' aborting due to interrupt!" + + " Likely due to pool shutdown."; + Thread.currentThread().interrupt(); + } else if (e instanceof ForeignException) { + msg = "Subprocedure '" + barrierName + "' aborting due to a ForeignException!"; + } else { + msg = "Subprocedure '" + barrierName + "' failed!"; + } + LOG.error(msg , e); + cancel(msg, e); + + LOG.debug("Subprocedure '" + barrierName + "' running cleanup."); + cleanup(e); + } finally { + releasedLocalBarrier.countDown(); + + // tell the timer we are done, if we get here successfully + executionTimeoutTimer.complete(); + complete = true; + LOG.debug("Subprocedure '" + barrierName + "' completed."); + return null; + } + } + + boolean isComplete() { + return complete; + } + + /** + * exposed for testing. + */ + ForeignExceptionSnare getErrorCheckable() { + return this.monitor; + } + + /** + * The implementation of this method should gather and hold required resources (locks, disk + * space, etc) to satisfy the Procedures barrier condition. For example, this would be where + * to make all the regions on a RS on the quiescent for an procedure that required all regions + * to be globally quiesed. + * + * Users should override this method. If a quiescent is not required, this is overkill but + * can still be used to execute a procedure on all members and to propagate any exceptions. + * + * @throws ForeignException + */ + abstract public void acquireBarrier() throws ForeignException; + + /** + * The implementation of this method should act with the assumption that the barrier condition + * has been satisfied. Continuing the previous example, a condition could be that all RS's + * globally have been quiesced, and procedures that require this precondition could be + * implemented here. + * + * Users should override this method. If quiescense is not required, this can be a no-op + * + * @throws ForeignException + */ + abstract public void insideBarrier() throws ForeignException; + + /** + * Users should override this method. This implementation of this method should rollback and + * cleanup any temporary or partially completed state that the {@link #acquireBarrier()} may have + * created. + * @param e + */ + abstract public void cleanup(Exception e); + + /** + * Method to cancel the Subprocedure by injecting an exception from and external source. + * @param cause + */ + public void cancel(String msg, Throwable cause) { + LOG.error(msg, cause); + if (cause instanceof ForeignException) { + monitor.receive((ForeignException) cause); + } else { + monitor.receive(new ForeignException(getMemberName(), cause)); + } + } + + /** + * Callback for the member rpcs to call when the global barrier has been reached. This + * unblocks the main subprocedure exectuion thread so that the Subprocedure's + * {@link #insideBarrier()} method can be run. + */ + public void receiveReachedGlobalBarrier() { + inGlobalBarrier.countDown(); + } + + // + // Subprocedure Internal State interface + // + + /** + * Wait for the reached global barrier notification. + * + * Package visibility for testing + * + * @throws ForeignException + * @throws InterruptedException + */ + void waitForReachedGlobalBarrier() throws ForeignException, InterruptedException { + Procedure.waitForLatch(inGlobalBarrier, monitor, wakeFrequency, + barrierName + ":remote acquired"); + } + + /** + * Waits until the entire procedure has globally completed, or has been aborted. + * @throws ForeignException + * @throws InterruptedException + */ + public void waitForLocallyCompleted() throws ForeignException, InterruptedException { + Procedure.waitForLatch(releasedLocalBarrier, monitor, wakeFrequency, + barrierName + ":completed"); + } + + /** + * Empty Subprocedure for testing. + * + * Must be public for stubbing used in testing to work. + */ + public static class SubprocedureImpl extends Subprocedure { + + public SubprocedureImpl(ProcedureMember member, String opName, + ForeignExceptionDispatcher monitor, long wakeFrequency, long timeout) { + super(member, opName, monitor, wakeFrequency, timeout); + } + + @Override + public void acquireBarrier() throws ForeignException {} + + @Override + public void insideBarrier() throws ForeignException {} + + @Override + public void cleanup(Exception e) {} + }; +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinatorRpcs.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinatorRpcs.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinatorRpcs.java (revision 0) @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.errorhandling.ForeignException; + +/** + * RPCs for the coordinator to run a barriered procedure with subprocedures executed at + * distributed members. + * @see ProcedureCoordinator + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface ProcedureCoordinatorRpcs extends Closeable { + + /** + * Initialize and start threads necessary to connect an implementation's rpc mechanisms. + * @param listener + * @return true if succeed, false if encountered initialization errors. + */ + public boolean start(final ProcedureCoordinator listener); + + /** + * Notify the members that the coordinator has aborted the procedure and that it should release + * barrier resources. + * + * @param procName name of the procedure that was aborted + * @param cause the reason why the procedure needs to be aborted + * @throws IOException if the rpcs can't reach the other members of the procedure (and can't + * recover). + */ + public void sendAbortToMembers(Procedure procName, ForeignException cause) throws IOException; + + /** + * Notify the members to acquire barrier for the procedure + * + * @param procName name of the procedure to start + * @param info information that should be passed to all members + * @param members names of the members requested to reach the acquired phase + * @throws IllegalArgumentException if the procedure was already marked as failed + * @throws IOException if we can't reach the remote notification mechanism + */ + public void sendGlobalBarrierAcquire(Procedure procName, byte[] info, List members) + throws IOException, IllegalArgumentException; + + /** + * Notify members that all members have acquired their parts of the barrier and that they can + * now execute under the global barrier. + * + * Must come after calling {@link #sendGlobalBarrierAcquire(Procedure, byte[], List)} + * + * @param procName name of the procedure to start + * @param members members to tell we have reached in-barrier phase + * @throws IOException if we can't reach the remote notification mechanism + */ + public void sendGlobalBarrierReached(Procedure procName, List members) throws IOException; + + /** + * Notify Members to reset the distributed state for procedure + * @param procName name of the procedure to reset + * @throws IOException if the remote notification mechanism cannot be reached + */ + public void resetMembers(Procedure procName) throws IOException; +} Index: src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/Procedure.java (revision 0) @@ -0,0 +1,377 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionListener; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare; +import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector; + +import com.google.common.collect.Lists; + +/** + * A globally-barriered distributed procedure. This class encapsulates state and methods for + * tracking and managing a distributed procedure, as well as aborting if any member encounters + * a problem or if a cancellation is requested. + *

+ * All procedures first attempt to reach a barrier point with the {@link #sendGlobalBarrierStart()} + * method. The procedure contacts all members and waits for all subprocedures to execute + * {@link Subprocedure#acquireBarrier} to acquire its local piece of the global barrier and then + * send acquisition info back to the coordinator. If all acquisitions at subprocedures succeed, + * the coordinator then will call {@link #sendGlobalBarrierReached()}. This notifies members to + * execute the {@link Subprocedure#insideBarrier()} method. The procedure is blocked until all + * {@link Subprocedure#insideBarrier} executions complete at the members. When + * {@link Subprocedure#insideBarrier} completes at each member, the member sends notification to + * the coordinator. Once all members complete, the coordinator calls + * {@link #sendGlobalBarrierComplete()}. + *

+ * If errors are encountered remotely, they are forwarded to the coordinator, and + * {@link Subprocedure#cleanup(Exception)} is called. + *

+ * Each Procedure and each Subprocedure enforces a time limit on the execution time. If the time + * limit expires before the procedure completes the {@link TimeoutExceptionInjector} will trigger + * an {@link ForeignException} to abort the procedure. This is particularly useful for situations + * when running a distributed {@link Subprocedure} so participants can avoid blocking for extreme + * amounts of time if one of the participants fails or takes a really long time (e.g. GC pause). + *

+ * Users should generally not directly create or subclass instances of this. They are created + * for them implicitly via {@link ProcedureCoordinator#startProcedure(ForeignExceptionDispatcher, + * String, byte[], List)}} + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Procedure implements Callable, ForeignExceptionListener { + private static final Log LOG = LogFactory.getLog(Procedure.class); + + // + // Arguments and naming + // + + // Name of the procedure + final private String procName; + // Arguments for this procedure execution + final private byte[] args; + + // + // Execution State + // + /** latch for waiting until all members have acquire in barrier state */ + final CountDownLatch acquiredBarrierLatch; + /** latch for waiting until all members have executed and released their in barrier state */ + final CountDownLatch releasedBarrierLatch; + /** latch for waiting until a procedure has completed */ + final CountDownLatch completedLatch; + /** monitor to check for errors */ + private final ForeignExceptionDispatcher monitor; + + // + // Execution Timeout Handling. + // + + /** frequency to check for errors (ms) */ + protected final long wakeFrequency; + protected final TimeoutExceptionInjector timeoutInjector; + + // + // Members' and Coordinator's state + // + + /** lock to prevent nodes from acquiring and then releasing before we can track them */ + private Object joinBarrierLock = new Object(); + private final List acquiringMembers; + private final List inBarrierMembers; + private ProcedureCoordinator coord; + + /** + * Creates a procedure. (FOR TESTING) + * + * {@link Procedure} state to be run by a {@link ProcedureCoordinator}. + * @param coord coordinator to call back to for general errors (e.g. + * {@link ProcedureCoordinator#rpcConnectionFailure(String, IOException)}). + * @param monitor error monitor to check for external errors + * @param wakeFreq frequency to check for errors while waiting + * @param timeout amount of time to allow the procedure to run before cancelling + * @param procName name of the procedure instance + * @param args argument data associated with the procedure instance + * @param expectedMembers names of the expected members + */ + public Procedure(ProcedureCoordinator coord, ForeignExceptionDispatcher monitor, long wakeFreq, + long timeout, String procName, byte[] args, List expectedMembers) { + this.coord = coord; + this.acquiringMembers = new ArrayList(expectedMembers); + this.inBarrierMembers = new ArrayList(acquiringMembers.size()); + this.procName = procName; + this.args = args; + this.monitor = monitor; + this.wakeFrequency = wakeFreq; + + int count = expectedMembers.size(); + this.acquiredBarrierLatch = new CountDownLatch(count); + this.releasedBarrierLatch = new CountDownLatch(count); + this.completedLatch = new CountDownLatch(1); + this.timeoutInjector = new TimeoutExceptionInjector(monitor, timeout); + } + + /** + * Create a procedure. + * + * Users should generally not directly create instances of this. They are created them + * implicitly via {@link ProcedureCoordinator#createProcedure(ForeignExceptionDispatcher, + * String, byte[], List)}} + * + * @param coord coordinator to call back to for general errors (e.g. + * {@link ProcedureCoordinator#rpcConnectionFailure(String, IOException)}). + * @param wakeFreq frequency to check for errors while waiting + * @param timeout amount of time to allow the procedure to run before cancelling + * @param procName name of the procedure instance + * @param args argument data associated with the procedure instance + * @param expectedMembers names of the expected members + */ + public Procedure(ProcedureCoordinator coord, long wakeFreq, long timeout, + String procName, byte[] args, List expectedMembers) { + this(coord, new ForeignExceptionDispatcher(), wakeFreq, timeout, procName, args, + expectedMembers); + } + + public String getName() { + return procName; + } + + /** + * @return String of the procedure members both trying to enter the barrier and already in barrier + */ + public String getStatus() { + String waiting, done; + synchronized (joinBarrierLock) { + waiting = acquiringMembers.toString(); + done = inBarrierMembers.toString(); + } + return "Procedure " + procName + " { waiting=" + waiting + " done="+ done + " }"; + } + + /** + * Get the ForeignExceptionDispatcher + * @return the Procedure's monitor. + */ + public ForeignExceptionDispatcher getErrorMonitor() { + return monitor; + } + + /** + * This call is the main execution thread of the barriered procedure. It sends messages and + * essentially blocks until all procedure members acquire or later complete but periodically + * checks for foreign exceptions. + */ + @Override + @SuppressWarnings("finally") + final public Void call() { + LOG.info("Starting procedure '" + procName + "'"); + // start the timer + timeoutInjector.start(); + + // run the procedure + try { + // start by checking for error first + monitor.rethrowException(); + LOG.debug("Procedure '" + procName + "' starting 'acquire'"); + sendGlobalBarrierStart(); + + // wait for all the members to report acquisition + LOG.debug("Waiting for all members to 'acquire'"); + waitForLatch(acquiredBarrierLatch, monitor, wakeFrequency, "acquired"); + monitor.rethrowException(); + + LOG.debug("Procedure '" + procName + "' starting 'in-barrier' execution."); + sendGlobalBarrierReached(); + + // wait for all members to report barrier release + waitForLatch(releasedBarrierLatch, monitor, wakeFrequency, "released"); + + // make sure we didn't get an error during in barrier execution and release + monitor.rethrowException(); + LOG.info("Procedure '" + procName + "' execution completed"); + } catch (Exception e) { + if (e instanceof InterruptedException) { + Thread.currentThread().interrupt(); + } + String msg = "Procedure '" + procName +"' execution failed!"; + LOG.error(msg, e); + receive(new ForeignException(getName(), e)); + } finally { + LOG.debug("Running finish phase."); + sendGlobalBarrierComplete(); + completedLatch.countDown(); + + // tell the timer we are done, if we get here successfully + timeoutInjector.complete(); + return null; + } + } + + /** + * Sends a message to Members to create a new {@link Subprocedure} for this Procedure and execute + * the {@link Subprocedure#acquireBarrier} step. + * @throws ForeignException + */ + public void sendGlobalBarrierStart() throws ForeignException { + // start the procedure + LOG.debug("Starting procedure '" + procName + "', kicking off acquire phase on members."); + try { + // send procedure barrier start to specified list of members. cloning the list to avoid + // concurrent modification from the controller setting the prepared nodes + coord.getRpcs().sendGlobalBarrierAcquire(this, args, Lists.newArrayList(this.acquiringMembers)); + } catch (IOException e) { + coord.rpcConnectionFailure("Can't reach controller.", e); + } catch (IllegalArgumentException e) { + throw new ForeignException(getName(), e); + } + } + + /** + * Sends a message to all members that the global barrier condition has been satisfied. This + * should only be executed after all members have completed its + * {@link Subprocedure#acquireBarrier()} call successfully. This triggers the member + * {@link Subprocedure#insideBarrier} method. + * @throws ForeignException + */ + public void sendGlobalBarrierReached() throws ForeignException { + try { + // trigger to have member run {@link Subprocedure#insideBarrier} + coord.getRpcs().sendGlobalBarrierReached(this, Lists.newArrayList(inBarrierMembers)); + } catch (IOException e) { + coord.rpcConnectionFailure("Can't reach controller.", e); + } + } + + /** + * Sends a message to members that all {@link Subprocedure#insideBarrier} calls have completed. + * After this executes, the coordinator can assume that any state resources about this barrier + * procedure state has been released. + */ + public void sendGlobalBarrierComplete() { + LOG.debug("Finished coordinator procedure - removing self from list of running procedures"); + try { + coord.getRpcs().resetMembers(this); + } catch (IOException e) { + coord.rpcConnectionFailure("Failed to reset procedure:" + procName, e); + } + } + + // + // Call backs from other external processes. + // + + /** + * Call back triggered by an individual member upon successful local barrier acquisition + * @param member + */ + public void barrierAcquiredByMember(String member) { + LOG.debug("member: '" + member + "' joining prepared barrier for procedure '" + procName + + "' on coordinator"); + if (this.acquiringMembers.contains(member)) { + synchronized (joinBarrierLock) { + if (this.acquiringMembers.remove(member)) { + this.inBarrierMembers.add(member); + acquiredBarrierLatch.countDown(); + } + } + LOG.debug("Waiting on: " + acquiredBarrierLatch + " remaining members to acquire global barrier"); + } else { + LOG.warn("Member " + member + " joined barrier, but we weren't waiting on it to join." + + " Continuing on."); + } + } + + /** + * Call back triggered by a individual member upon successful local in-barrier execution and + * release + * @param member + */ + public void barrierReleasedByMember(String member) { + boolean removed = false; + synchronized (joinBarrierLock) { + removed = this.inBarrierMembers.remove(member); + if (removed) { + releasedBarrierLatch.countDown(); + } + } + if (removed) { + LOG.debug("Member: '" + member + "' released barrier for procedure'" + procName + + "', counting down latch. Waiting for " + releasedBarrierLatch.getCount() + + " more"); + } else { + LOG.warn("Member: '" + member + "' released barrier for procedure'" + procName + + "', but we weren't waiting on it to release!"); + } + } + + /** + * Waits until the entire procedure has globally completed, or has been aborted. + * @throws ForeignException + * @throws InterruptedException + */ + public void waitForCompleted() throws ForeignException, InterruptedException { + waitForLatch(completedLatch, monitor, wakeFrequency, procName + " completed"); + } + + /** + * A callback that handles incoming ForeignExceptions. + */ + @Override + public void receive(ForeignException e) { + monitor.receive(e); + } + + /** + * Wait for latch to count to zero, ignoring any spurious wake-ups, but waking periodically to + * check for errors + * @param latch latch to wait on + * @param monitor monitor to check for errors while waiting + * @param wakeFrequency frequency to wake up and check for errors (in + * {@link TimeUnit#MILLISECONDS}) + * @param latchDescription description of the latch, for logging + * @throws ForeignException type of error the monitor can throw, if the task fails + * @throws InterruptedException if we are interrupted while waiting on latch + */ + public static void waitForLatch(CountDownLatch latch, ForeignExceptionSnare monitor, + long wakeFrequency, String latchDescription) throws ForeignException, + InterruptedException { + boolean released = false; + while (!released) { + if (monitor != null) { + monitor.rethrowException(); + } + /* + ForeignExceptionDispatcher.LOG.debug("Waiting for '" + latchDescription + "' latch. (sleep:" + + wakeFrequency + " ms)"); */ + released = latch.await(wakeFrequency, TimeUnit.MILLISECONDS); + } + } +} Index: src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMemberRpcs.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMemberRpcs.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/ProcedureMemberRpcs.java (revision 0) @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.Closeable; +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.errorhandling.ForeignException; + +/** + * This is the notification interface for Procedures that encapsulates message passing from + * members to a coordinator. Each of these calls should send a message to the coordinator. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface ProcedureMemberRpcs extends Closeable { + + /** + * Initialize and start any threads or connections the member needs. + */ + public void start(ProcedureMember member); + + /** + * Each subprocedure is being executed on a member. This is the identifier for the member. + * @return the member name + */ + public String getMemberName(); + + /** + * Notify the coordinator that we aborted the specified {@link Subprocedure} + * + * @param sub the {@link Subprocedure} we are aborting + * @param cause the reason why the member's subprocedure aborted + * @throws IOException thrown when the rpcs can't reach the other members of the procedure (and + * thus can't recover). + */ + public void sendMemberAborted(Subprocedure sub, ForeignException cause) throws IOException; + + /** + * Notify the coordinator that the specified {@link Subprocedure} has acquired the locally required + * barrier condition. + * + * @param sub the specified {@link Subprocedure} + * @throws IOException if we can't reach the coordinator + */ + public void sendMemberAcquired(Subprocedure sub) throws IOException; + + /** + * Notify the coordinator that the specified {@link Subprocedure} has completed the work that + * needed to be done under the global barrier. + * + * @param sub the specified {@link Subprocedure} + * @throws IOException if we can't reach the coordinator + */ + public void sendMemberCompleted(Subprocedure sub) throws IOException; +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java (revision 0) @@ -0,0 +1,267 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; + +import com.google.protobuf.InvalidProtocolBufferException; + +/** + * ZooKeeper based {@link ProcedureCoordinatorRpcs} for a {@link ProcedureCoordinator} + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { + public static final Log LOG = LogFactory.getLog(ZKProcedureCoordinatorRpcs.class); + private ZKProcedureUtil zkProc = null; + protected ProcedureCoordinator coordinator = null; // if started this should be non-null + + ZooKeeperWatcher watcher; + String procedureType; + String coordName; + + /** + * @param watcher zookeeper watcher. Owned by this and closed via {@link #close()} + * @param procedureClass procedure type name is a category for when there are multiple kinds of + * procedures.-- this becomes a znode so be aware of the naming restrictions + * @param coordName name of the node running the coordinator + * @throws KeeperException if an unexpected zk error occurs + */ + public ZKProcedureCoordinatorRpcs(ZooKeeperWatcher watcher, + String procedureClass, String coordName) throws KeeperException { + this.watcher = watcher; + this.procedureType = procedureClass; + this.coordName = coordName; + } + + /** + * The "acquire" phase. The coordinator creates a new procType/acquired/ znode dir. If znodes + * appear, first acquire to relevant listener or sets watch waiting for notification of + * the acquire node + * + * @param proc the Procedure + * @param info data to be stored in the acquire node + * @param nodeNames children of the acquire phase + * @throws IOException if any failure occurs. + */ + @Override + final public void sendGlobalBarrierAcquire(Procedure proc, byte[] info, List nodeNames) + throws IOException, IllegalArgumentException { + String procName = proc.getName(); + // start watching for the abort node + String abortNode = zkProc.getAbortZNode(procName); + try { + // check to see if the abort node already exists + if (ZKUtil.watchAndCheckExists(zkProc.getWatcher(), abortNode)) { + abort(abortNode); + } + // If we get an abort node watch triggered here, we'll go complete creating the acquired + // znode but then handle the acquire znode and bail out + } catch (KeeperException e) { + LOG.error("Failed to watch abort", e); + throw new IOException("Failed while watching abort node:" + abortNode, e); + } + + // create the acquire barrier + String acquire = zkProc.getAcquiredBarrierNode(procName); + LOG.debug("Creating acquire znode:" + acquire); + try { + // notify all the procedure listeners to look for the acquire node + byte[] data = ProtobufUtil.prependPBMagic(info); + ZKUtil.createWithParents(zkProc.getWatcher(), acquire, data); + // loop through all the children of the acquire phase and watch for them + for (String node : nodeNames) { + String znode = ZKUtil.joinZNode(acquire, node); + LOG.debug("Watching for acquire node:" + znode); + if (ZKUtil.watchAndCheckExists(zkProc.getWatcher(), znode)) { + coordinator.memberAcquiredBarrier(procName, node); + } + } + } catch (KeeperException e) { + throw new IOException("Failed while creating acquire node:" + acquire, e); + } + } + + @Override + public void sendGlobalBarrierReached(Procedure proc, List nodeNames) throws IOException { + String procName = proc.getName(); + String reachedNode = zkProc.getReachedBarrierNode(procName); + LOG.debug("Creating reached barrier zk node:" + reachedNode); + try { + // create the reached znode and watch for the reached znodes + ZKUtil.createWithParents(zkProc.getWatcher(), reachedNode); + // loop through all the children of the acquire phase and watch for them + for (String node : nodeNames) { + String znode = ZKUtil.joinZNode(reachedNode, node); + if (ZKUtil.watchAndCheckExists(zkProc.getWatcher(), znode)) { + coordinator.memberFinishedBarrier(procName, node); + } + } + } catch (KeeperException e) { + throw new IOException("Failed while creating reached node:" + reachedNode, e); + } + } + + + /** + * Delete znodes that are no longer in use. + */ + @Override + final public void resetMembers(Procedure proc) throws IOException { + String procName = proc.getName(); + boolean stillGettingNotifications = false; + do { + try { + LOG.debug("Attempting to clean out zk node for op:" + procName); + zkProc.clearZNodes(procName); + stillGettingNotifications = false; + } catch (KeeperException.NotEmptyException e) { + // recursive delete isn't transactional (yet) so we need to deal with cases where we get + // children trickling in + stillGettingNotifications = true; + } catch (KeeperException e) { + throw new IOException("Failed to complete reset procedure " + procName, e); + } + } while (stillGettingNotifications); + } + + /** + * Start monitoring znodes in ZK - subclass hook to start monitoring znodes they are about. + * @return true if succeed, false if encountered initialization errors. + */ + final public boolean start(final ProcedureCoordinator coordinator) { + if (this.coordinator != null) { + throw new IllegalStateException( + "ZKProcedureCoordinator already started and already has listener installed"); + } + this.coordinator = coordinator; + + try { + this.zkProc = new ZKProcedureUtil(watcher, procedureType, coordName) { + @Override + public void nodeCreated(String path) { + if (!isInProcedurePath(path)) return; + LOG.debug("Node created: " + path); + logZKTree(this.baseZNode); + if (isAcquiredPathNode(path)) { + // node wasn't present when we created the watch so zk event triggers acquire + coordinator.memberAcquiredBarrier(ZKUtil.getNodeName(ZKUtil.getParent(path)), + ZKUtil.getNodeName(path)); + } else if (isReachedPathNode(path)) { + // node was absent when we created the watch so zk event triggers the finished barrier. + + // TODO Nothing enforces that acquire and reached znodes from showing up in wrong order. + coordinator.memberFinishedBarrier(ZKUtil.getNodeName(ZKUtil.getParent(path)), + ZKUtil.getNodeName(path)); + } else if (isAbortPathNode(path)) { + abort(path); + } + } + }; + zkProc.clearChildZNodes(); + } catch (KeeperException e) { + LOG.error("Unable to start the ZK-based Procedure Coordinator rpcs.", e); + return false; + } + + LOG.debug("Starting the controller for procedure member:" + zkProc.getMemberName()); + return true; + } + + /** + * This is the abort message being sent by the coordinator to member + * + * TODO this code isn't actually used but can be used to issue a cancellation from the + * coordinator. + */ + @Override + final public void sendAbortToMembers(Procedure proc, ForeignException ee) { + String procName = proc.getName(); + LOG.debug("Aborting procedure '" + procName + "' in zk"); + String procAbortNode = zkProc.getAbortZNode(procName); + try { + LOG.debug("Creating abort znode:" + procAbortNode); + String source = (ee.getSource() == null) ? coordName : ee.getSource(); + byte[] errorInfo = ProtobufUtil.prependPBMagic(ForeignException.serialize(source, ee)); + // first create the znode for the procedure + ZKUtil.createAndFailSilent(zkProc.getWatcher(), procAbortNode, errorInfo); + LOG.debug("Finished creating abort node:" + procAbortNode); + } catch (KeeperException e) { + // possible that we get this error for the procedure if we already reset the zk state, but in + // that case we should still get an error for that procedure anyways + zkProc.logZKTree(zkProc.baseZNode); + coordinator.rpcConnectionFailure("Failed to post zk node:" + procAbortNode + + " to abort procedure '" + procName + "'", new IOException(e)); + } + } + + /** + * Receive a notification and propagate it to the local coordinator + * @param abortNode full znode path to the failed procedure information + */ + protected void abort(String abortNode) { + String procName = ZKUtil.getNodeName(abortNode); + ForeignException ee = null; + try { + byte[] data = ZKUtil.getData(zkProc.getWatcher(), abortNode); + if (!ProtobufUtil.isPBMagicPrefix(data)) { + LOG.warn("Got an error notification for op:" + abortNode + + " but we can't read the information. Killing the procedure."); + // we got a remote exception, but we can't describe it + ee = new ForeignException(coordName, "Data in abort node is illegally formatted. ignoring content."); + } else { + + data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); + ee = ForeignException.deserialize(data); + } + } catch (InvalidProtocolBufferException e) { + LOG.warn("Got an error notification for op:" + abortNode + + " but we can't read the information. Killing the procedure."); + // we got a remote exception, but we can't describe it + ee = new ForeignException(coordName, e); + } catch (KeeperException e) { + coordinator.rpcConnectionFailure("Failed to get data for abort node:" + abortNode + + zkProc.getAbortZnode(), new IOException(e)); + } + coordinator.abortProcedure(procName, ee); + } + + @Override + final public void close() throws IOException { + zkProc.close(); + } + + /** + * Used in testing + */ + final ZKProcedureUtil getZkProcedureUtil() { + return zkProc; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/procedure/SubprocedureFactory.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/SubprocedureFactory.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/SubprocedureFactory.java (revision 0) @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Task builder to build instances of a {@link ProcedureMember}'s {@link Subprocedure}s. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface SubprocedureFactory { + + /** + * Build {@link Subprocedure} when requested. + * @param procName name of the procedure associated with this subprocedure + * @param procArgs arguments passed from the coordinator about the procedure + * @return {@link Subprocedure} to run or null if the no operation should be run + * @throws IllegalArgumentException if the operation could not be run because of errors in the + * request + * @throws IllegalStateException if the current runner cannot accept any more new requests + */ + public Subprocedure buildSubprocedure(String procName, byte[] procArgs); +} Index: src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureUtil.java (revision 0) @@ -0,0 +1,286 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; + +/** + * This is a shared ZooKeeper-based znode management utils for distributed procedure. All znode + * operations should go through the provided methods in coordinators and members. + * + * Layout of nodes in ZK is + * /hbase/[op name]/acquired/ + * [op instance] - op data/ + * /[nodes that have acquired] + * /reached/ + * [op instance]/ + * /[nodes that have completed] + * /abort/ + * [op instance] - failure data + * + * NOTE: while acquired and completed are znode dirs, abort is actually just a znode. + * + * Assumption here that procedure names are unique + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class ZKProcedureUtil + extends ZooKeeperListener implements Closeable { + + private static final Log LOG = LogFactory.getLog(ZKProcedureUtil.class); + + public static final String ACQUIRED_BARRIER_ZNODE_DEFAULT = "acquired"; + public static final String REACHED_BARRIER_ZNODE_DEFAULT = "reached"; + public static final String ABORT_ZNODE_DEFAULT = "abort"; + + public final String baseZNode; + protected final String acquiredZnode; + protected final String reachedZnode; + protected final String abortZnode; + + protected final String memberName; + + /** + * Top-level watcher/controller for procedures across the cluster. + *

+ * On instantiation, this ensures the procedure znodes exist. This however requires the passed in + * watcher has been started. + * @param watcher watcher for the cluster ZK. Owned by this and closed via + * {@link #close()} + * @param procDescription name of the znode describing the procedure to run + * @param memberName name of the member from which we are interacting with running procedures + * @throws KeeperException when the procedure znodes cannot be created + */ + public ZKProcedureUtil(ZooKeeperWatcher watcher, String procDescription, + String memberName) throws KeeperException { + super(watcher); + this.memberName = memberName; + // make sure we are listening for events + watcher.registerListener(this); + // setup paths for the zknodes used in procedures + this.baseZNode = ZKUtil.joinZNode(watcher.baseZNode, procDescription); + acquiredZnode = ZKUtil.joinZNode(baseZNode, ACQUIRED_BARRIER_ZNODE_DEFAULT); + reachedZnode = ZKUtil.joinZNode(baseZNode, REACHED_BARRIER_ZNODE_DEFAULT); + abortZnode = ZKUtil.joinZNode(baseZNode, ABORT_ZNODE_DEFAULT); + + // first make sure all the ZK nodes exist + // make sure all the parents exist (sometimes not the case in tests) + ZKUtil.createWithParents(watcher, acquiredZnode); + // regular create because all the parents exist + ZKUtil.createAndFailSilent(watcher, reachedZnode); + ZKUtil.createAndFailSilent(watcher, abortZnode); + } + + @Override + public void close() throws IOException { + if (watcher != null) { + watcher.close(); + } + } + + public String getAcquiredBarrierNode(String opInstanceName) { + return ZKProcedureUtil.getAcquireBarrierNode(this, opInstanceName); + } + + public String getReachedBarrierNode(String opInstanceName) { + return ZKProcedureUtil.getReachedBarrierNode(this, opInstanceName); + } + + public String getAbortZNode(String opInstanceName) { + return ZKProcedureUtil.getAbortNode(this, opInstanceName); + } + + public String getAbortZnode() { + return abortZnode; + } + + public String getBaseZnode() { + return baseZNode; + } + + public String getAcquiredBarrier() { + return acquiredZnode; + } + + public String getMemberName() { + return memberName; + } + + /** + * Get the full znode path for the node used by the coordinator to trigger a global barrier + * acquire on each subprocedure. + * @param controller controller running the procedure + * @param opInstanceName name of the running procedure instance (not the procedure description). + * @return full znode path to the prepare barrier/start node + */ + public static String getAcquireBarrierNode(ZKProcedureUtil controller, + String opInstanceName) { + return ZKUtil.joinZNode(controller.acquiredZnode, opInstanceName); + } + + /** + * Get the full znode path for the node used by the coordinator to trigger a global barrier + * execution and release on each subprocedure. + * @param controller controller running the procedure + * @param opInstanceName name of the running procedure instance (not the procedure description). + * @return full znode path to the commit barrier + */ + public static String getReachedBarrierNode(ZKProcedureUtil controller, + String opInstanceName) { + return ZKUtil.joinZNode(controller.reachedZnode, opInstanceName); + } + + /** + * Get the full znode path for the node used by the coordinator or member to trigger an abort + * of the global barrier acquisition or execution in subprocedures. + * @param controller controller running the procedure + * @param opInstanceName name of the running procedure instance (not the procedure description). + * @return full znode path to the abort znode + */ + public static String getAbortNode(ZKProcedureUtil controller, String opInstanceName) { + return ZKUtil.joinZNode(controller.abortZnode, opInstanceName); + } + + public ZooKeeperWatcher getWatcher() { + return watcher; + } + + /** + * Is this a procedure related znode path? + * + * TODO: this is not strict, can return true if had name just starts with same prefix but is + * different zdir. + * + * @return true if starts with baseZnode + */ + boolean isInProcedurePath(String path) { + return path.startsWith(baseZNode); + } + + /** + * Is this the exact procedure barrier acquired znode + */ + boolean isAcquiredNode(String path) { + return path.equals(acquiredZnode); + } + + + /** + * Is this in the procedure barrier acquired znode path + */ + boolean isAcquiredPathNode(String path) { + return path.startsWith(this.acquiredZnode) && !path.equals(acquiredZnode); + } + + /** + * Is this the exact procedure barrier reached znode + */ + boolean isReachedNode(String path) { + return path.equals(reachedZnode); + } + + /** + * Is this in the procedure barrier reached znode path + */ + boolean isReachedPathNode(String path) { + return path.startsWith(this.reachedZnode) && !path.equals(reachedZnode); + } + + + /** + * Is this in the procedure barrier abort znode path + */ + boolean isAbortNode(String path) { + return path.equals(abortZnode); + } + + /** + * Is this in the procedure barrier abort znode path + */ + public boolean isAbortPathNode(String path) { + return path.startsWith(this.abortZnode) && !path.equals(abortZnode); + } + + // -------------------------------------------------------------------------- + // internal debugging methods + // -------------------------------------------------------------------------- + /** + * Recursively print the current state of ZK (non-transactional) + * @param root name of the root directory in zk to print + * @throws KeeperException + */ + void logZKTree(String root) { + if (!LOG.isDebugEnabled()) return; + LOG.debug("Current zk system:"); + String prefix = "|-"; + LOG.debug(prefix + root); + try { + logZKTree(root, prefix); + } catch (KeeperException e) { + throw new RuntimeException(e); + } + } + + /** + * Helper method to print the current state of the ZK tree. + * @see #logZKTree(String) + * @throws KeeperException if an unexpected exception occurs + */ + protected void logZKTree(String root, String prefix) throws KeeperException { + List children = ZKUtil.listChildrenNoWatch(watcher, root); + if (children == null) return; + for (String child : children) { + LOG.debug(prefix + child); + String node = ZKUtil.joinZNode(root.equals("/") ? "" : root, child); + logZKTree(node, prefix + "---"); + } + } + + public void clearChildZNodes() throws KeeperException { + // TODO This is potentially racy since not atomic. update when we support zk that has multi + LOG.info("Clearing all procedure znodes: " + acquiredZnode + " " + reachedZnode + " " + + abortZnode); + + // If the coordinator was shutdown mid-procedure, then we are going to lose + // an procedure that was previously started by cleaning out all the previous state. Its much + // harder to figure out how to keep an procedure going and the subject of HBASE-5487. + ZKUtil.deleteChildrenRecursively(watcher, acquiredZnode); + ZKUtil.deleteChildrenRecursively(watcher, reachedZnode); + ZKUtil.deleteChildrenRecursively(watcher, abortZnode); + } + + public void clearZNodes(String procedureName) throws KeeperException { + // TODO This is potentially racy since not atomic. update when we support zk that has multi + LOG.info("Clearing all znodes for procedure " + procedureName + "including nodes " + + acquiredZnode + " " + reachedZnode + " " + abortZnode); + ZKUtil.deleteNodeRecursively(watcher, getAcquiredBarrierNode(procedureName)); + ZKUtil.deleteNodeRecursively(watcher, getReachedBarrierNode(procedureName)); + ZKUtil.deleteNodeRecursively(watcher, getAbortZNode(procedureName)); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/ProcedureCoordinator.java (revision 0) @@ -0,0 +1,268 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.DaemonThreadFactory; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; + +import com.google.common.collect.MapMaker; + +/** + * This is the master side of a distributed complex procedure execution. + *

+ * The {@link Procedure} is generic and subclassing or customization shouldn't be + * necessary -- any customization should happen just in {@link Subprocedure}s. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ProcedureCoordinator { + private static final Log LOG = LogFactory.getLog(ProcedureCoordinator.class); + + final static long TIMEOUT_MILLIS_DEFAULT = 60000; + final static long WAKE_MILLIS_DEFAULT = 500; + + private final ProcedureCoordinatorRpcs rpcs; + private final ExecutorService pool; + + // Running procedure table. Maps procedure name to running procedure reference + private final ConcurrentMap procedures = + new MapMaker().concurrencyLevel(4).weakValues().makeMap(); + + /** + * Create and start a ProcedureCoordinator. + * + * The rpc object registers the ProcedureCoordinator and starts any threads in this + * constructor. + * + * @param rpcs + * @param pool Used for executing procedures. + */ + public ProcedureCoordinator(ProcedureCoordinatorRpcs rpcs, ThreadPoolExecutor pool) { + this.rpcs = rpcs; + this.pool = pool; + this.rpcs.start(this); + } + + /** + * Default thread pool for the procedure + */ + public static ThreadPoolExecutor defaultPool(String coordName, long keepAliveTime, int opThreads, + long wakeFrequency) { + return new ThreadPoolExecutor(1, opThreads, keepAliveTime, TimeUnit.SECONDS, + new SynchronousQueue(), + new DaemonThreadFactory("(" + coordName + ")-proc-coordinator-pool")); + } + + /** + * Shutdown the thread pools and release rpc resources + * @throws IOException + */ + public void close() throws IOException { + // have to use shutdown now to break any latch waiting + pool.shutdownNow(); + rpcs.close(); + } + + /** + * Submit an procedure to kick off its dependent subprocedures. + * @param proc Procedure to execute + * @return true if the procedure was started correctly, false if the + * procedure or any subprocedures could not be started. Failure could be due to + * submitting a procedure multiple times (or one with the same name), or some sort + * of IO problem. On errors, the procedure's monitor holds a reference to the exception + * that caused the failure. + */ + boolean submitProcedure(Procedure proc) { + // if the submitted procedure was null, then we don't want to run it + if (proc == null) { + return false; + } + String procName = proc.getName(); + + // make sure we aren't already running a procedure of that name + synchronized (procedures) { + Procedure oldProc = procedures.get(procName); + if (oldProc != null) { + // procedures are always eventually completed on both successful and failed execution + if (oldProc.completedLatch.getCount() != 0) { + LOG.warn("Procedure " + procName + " currently running. Rejecting new request"); + return false; + } + LOG.debug("Procedure " + procName + " was in running list but was completed. Accepting new attempt."); + procedures.remove(procName); + } + } + + // kick off the procedure's execution in a separate thread + Future f = null; + try { + synchronized (procedures) { + f = this.pool.submit(proc); + // if everything got started properly, we can add it known running procedures + this.procedures.put(procName, proc); + } + return true; + } catch (RejectedExecutionException e) { + LOG.warn("Procedure " + procName + " rejected by execution pool. Propagating error and " + + "cancelling operation.", e); + // the thread pool is full and we can't run the procedure + proc.receive(new ForeignException(procName, e)); + + // cancel procedure proactively + if (f != null) { + f.cancel(true); + } + } + return false; + } + + /** + * The connection to the rest of the procedure group (members and coordinator) has been + * broken/lost/failed. This should fail any interested procedures, but not attempt to notify other + * members since we cannot reach them anymore. + * @param message description of the error + * @param cause the actual cause of the failure + */ + void rpcConnectionFailure(final String message, final IOException cause) { + Collection toNotify = procedures.values(); + + for (Procedure proc : toNotify) { + if (proc == null) { + continue; + } + // notify the elements, if they aren't null + proc.receive(new ForeignException(proc.getName(), cause)); + } + } + + /** + * Abort the procedure with the given name + * @param procName name of the procedure to abort + * @param reason serialized information about the abort + */ + public void abortProcedure(String procName, ForeignException reason) { + // if we know about the Procedure, notify it + synchronized(procedures) { + Procedure proc = procedures.get(procName); + if (proc == null) { + return; + } + proc.receive(reason); + } + } + + /** + * Exposed for hooking with unit tests. + * @param procName + * @param procArgs + * @param expectedMembers + * @return + */ + Procedure createProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs, + List expectedMembers) { + // build the procedure + return new Procedure(this, fed, WAKE_MILLIS_DEFAULT, TIMEOUT_MILLIS_DEFAULT, + procName, procArgs, expectedMembers); + } + + /** + * Kick off the named procedure + * @param procName name of the procedure to start + * @param procArgs arguments for the procedure + * @param expectedMembers expected members to start + * @return handle to the running procedure, if it was started correctly, null otherwise + * @throws RejectedExecutionException if there are no more available threads to run the procedure + */ + public Procedure startProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs, + List expectedMembers) throws RejectedExecutionException { + Procedure proc = createProcedure(fed, procName, procArgs, expectedMembers); + if (!this.submitProcedure(proc)) { + LOG.error("Failed to submit procedure '" + procName + "'"); + return null; + } + return proc; + } + + /** + * Notification that the procedure had the specified member acquired its part of the barrier + * via {@link Subprocedure#acquireBarrier()}. + * @param procName name of the procedure that acquired + * @param member name of the member that acquired + */ + void memberAcquiredBarrier(String procName, final String member) { + Procedure proc = procedures.get(procName); + if (proc != null) { + proc.barrierAcquiredByMember(member); + } + } + + /** + * Notification that the procedure had another member finished executing its in-barrier subproc + * via {@link Subprocedure#insideBarrier()}. + * @param procName name of the subprocedure that finished + * @param member name of the member that executed and released its barrier + */ + void memberFinishedBarrier(String procName, final String member) { + Procedure proc = procedures.get(procName); + if (proc != null) { + proc.barrierReleasedByMember(member); + } + } + + /** + * @return the rpcs implementation for all current procedures + */ + ProcedureCoordinatorRpcs getRpcs() { + return rpcs; + } + + /** + * Returns the procedure. This Procedure is a live instance so should not be modified but can + * be inspected. + * @param name Name of the procedure + * @return Procedure or null if not present any more + */ + public Procedure getProcedure(String name) { + return procedures.get(name); + } + + /** + * @return Return set of all procedure names. + */ + public Set getProcedureNames() { + return new HashSet(procedures.keySet()); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java (revision 0) @@ -0,0 +1,350 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.procedure; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; + +import com.google.protobuf.InvalidProtocolBufferException; + +/** + * ZooKeeper based controller for a procedure member. + *

+ * There can only be one {@link ZKProcedureMemberRpcs} per procedure type per member, + * since each procedure type is bound to a single set of znodes. You can have multiple + * {@link ZKProcedureMemberRpcs} on the same server, each serving a different member + * name, but each individual rpcs is still bound to a single member name (and since they are + * used to determine global progress, its important to not get this wrong). + *

+ * To make this slightly more confusing, you can run multiple, concurrent procedures at the same + * time (as long as they have different types), from the same controller, but the same node name + * must be used for each procedure (though there is no conflict between the two procedure as long + * as they have distinct names). + *

+ * There is no real error recovery with this mechanism currently -- if any the coordinator fails, + * its re-initialization will delete the znodes and require all in progress subprocedures to start + * anew. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs { + + private static final Log LOG = LogFactory.getLog(ZKProcedureMemberRpcs.class); + private final String memberName; + + protected ProcedureMember member; + private ZKProcedureUtil zkController; + + /** + * Must call {@link #start(ProcedureMember)} before this can be used. + * @param watcher {@link ZooKeeperWatcher} to be owned by this. Closed via + * {@link #close()}. + * @param procType name of the znode describing the procedure type + * @param memberName name of the member to join the procedure + * @throws KeeperException if we can't reach zookeeper + */ + public ZKProcedureMemberRpcs(ZooKeeperWatcher watcher, + String procType, String memberName) throws KeeperException { + this.zkController = new ZKProcedureUtil(watcher, procType, memberName) { + @Override + public void nodeCreated(String path) { + if (!isInProcedurePath(path)) { + return; + } + + LOG.info("Received created event:" + path); + // if it is a simple start/end/abort then we just rewatch the node + if (isAcquiredNode(path)) { + waitForNewProcedures(); + return; + } else if (isAbortNode(path)) { + watchForAbortedProcedures(); + return; + } + String parent = ZKUtil.getParent(path); + // if its the end barrier, the procedure can be completed + if (isReachedNode(parent)) { + receivedReachedGlobalBarrier(path); + return; + } else if (isAbortNode(parent)) { + abort(path); + return; + } else if (isAcquiredNode(parent)) { + startNewSubprocedure(path); + } else { + LOG.debug("Ignoring created notification for node:" + path); + } + } + + @Override + public void nodeChildrenChanged(String path) { + if (path.equals(this.acquiredZnode)) { + LOG.info("Received procedure start children changed event: " + path); + waitForNewProcedures(); + } else if (path.equals(this.abortZnode)) { + LOG.info("Received procedure abort children changed event: " + path); + watchForAbortedProcedures(); + } + } + }; + this.memberName = memberName; + } + + public ZKProcedureUtil getZkController() { + return zkController; + } + + @Override + public String getMemberName() { + return memberName; + } + + /** + * Pass along the procedure global barrier notification to any listeners + * @param path full znode path that cause the notification + */ + private void receivedReachedGlobalBarrier(String path) { + LOG.debug("Recieved reached global barrier:" + path); + String procName = ZKUtil.getNodeName(path); + this.member.receivedReachedGlobalBarrier(procName); + } + + private void watchForAbortedProcedures() { + LOG.debug("Checking for aborted procedures on node: '" + zkController.getAbortZnode() + "'"); + try { + // this is the list of the currently aborted procedues + for (String node : ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAbortZnode())) { + String abortNode = ZKUtil.joinZNode(zkController.getAbortZnode(), node); + abort(abortNode); + } + } catch (KeeperException e) { + member.controllerConnectionFailure("Failed to list children for abort node:" + + zkController.getAbortZnode(), new IOException(e)); + } + } + + private void waitForNewProcedures() { + // watch for new procedues that we need to start subprocedures for + LOG.debug("Looking for new procedures under znode:'" + zkController.getAcquiredBarrier() + "'"); + List runningProcedures = null; + try { + runningProcedures = ZKUtil.listChildrenAndWatchForNewChildren(zkController.getWatcher(), + zkController.getAcquiredBarrier()); + if (runningProcedures == null) { + LOG.debug("No running procedures."); + return; + } + } catch (KeeperException e) { + member.controllerConnectionFailure("General failure when watching for new procedures", + new IOException(e)); + } + if (runningProcedures == null) { + LOG.debug("No running procedures."); + return; + } + for (String procName : runningProcedures) { + // then read in the procedure information + String path = ZKUtil.joinZNode(zkController.getAcquiredBarrier(), procName); + startNewSubprocedure(path); + } + } + + /** + * Kick off a new sub-procedure on the listener with the data stored in the passed znode. + *

+ * Will attempt to create the same procedure multiple times if an procedure znode with the same + * name is created. It is left up the coordinator to ensure this doesn't occur. + * @param path full path to the znode for the procedure to start + */ + private synchronized void startNewSubprocedure(String path) { + LOG.debug("Found procedure znode: " + path); + String opName = ZKUtil.getNodeName(path); + // start watching for an abort notification for the procedure + String abortZNode = zkController.getAbortZNode(opName); + try { + if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), abortZNode)) { + LOG.debug("Not starting:" + opName + " because we already have an abort notification."); + return; + } + } catch (KeeperException e) { + member.controllerConnectionFailure("Failed to get the abort znode (" + abortZNode + + ") for procedure :" + opName, new IOException(e)); + return; + } + + // get the data for the procedure + Subprocedure subproc = null; + try { + byte[] data = ZKUtil.getData(zkController.getWatcher(), path); + LOG.debug("start proc data length is " + data.length); + if (!ProtobufUtil.isPBMagicPrefix(data)) { + String msg = "Data in for starting procuedure " + opName + " is illegally formatted. " + + "Killing the procedure."; + LOG.error(msg); + throw new IllegalArgumentException(msg); + } + data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); + LOG.debug("Found data for znode:" + path); + subproc = member.createSubprocedure(opName, data); + member.submitSubprocedure(subproc); + } catch (IllegalArgumentException iae ) { + LOG.error("Illegal argument exception", iae); + sendMemberAborted(subproc, new ForeignException(getMemberName(), iae)); + } catch (IllegalStateException ise) { + LOG.error("Illegal state exception ", ise); + sendMemberAborted(subproc, new ForeignException(getMemberName(), ise)); + } catch (KeeperException e) { + member.controllerConnectionFailure("Failed to get data for new procedure:" + opName, + new IOException(e)); + } + } + + /** + * This attempts to create an acquired state znode for the procedure (snapshot name). + * + * It then looks for the reached znode to trigger in-barrier execution. If not present we + * have a watcher, if present then trigger the in-barrier action. + */ + @Override + public void sendMemberAcquired(Subprocedure sub) throws IOException { + String procName = sub.getName(); + try { + LOG.debug("Member: '" + memberName + "' joining acquired barrier for procedure (" + procName + + ") in zk"); + String acquiredZNode = ZKUtil.joinZNode(ZKProcedureUtil.getAcquireBarrierNode( + zkController, procName), memberName); + ZKUtil.createAndFailSilent(zkController.getWatcher(), acquiredZNode); + + // watch for the complete node for this snapshot + String reachedBarrier = zkController.getReachedBarrierNode(procName); + LOG.debug("Watch for global barrier reached:" + reachedBarrier); + if (ZKUtil.watchAndCheckExists(zkController.getWatcher(), reachedBarrier)) { + receivedReachedGlobalBarrier(reachedBarrier); + } + } catch (KeeperException e) { + member.controllerConnectionFailure("Failed to acquire barrier for procedure: " + + procName + " and member: " + memberName, new IOException(e)); + } + } + + /** + * This acts as the ack for a completed snapshot + */ + @Override + public void sendMemberCompleted(Subprocedure sub) throws IOException { + String procName = sub.getName(); + LOG.debug("Marking procedure '" + procName + "' completed for member '" + memberName + + "' in zk"); + String joinPath = ZKUtil.joinZNode(zkController.getReachedBarrierNode(procName), memberName); + try { + ZKUtil.createAndFailSilent(zkController.getWatcher(), joinPath); + } catch (KeeperException e) { + member.controllerConnectionFailure("Failed to post zk node:" + joinPath + + " to join procedure barrier.", new IOException(e)); + } + } + + /** + * This should be called by the member and should write a serialized root cause exception as + * to the abort znode. + */ + @Override + public void sendMemberAborted(Subprocedure sub, ForeignException ee) { + if (sub == null) { + LOG.error("Failed due to null subprocedure", ee); + return; + } + String procName = sub.getName(); + LOG.debug("Aborting procedure (" + procName + ") in zk"); + String procAbortZNode = zkController.getAbortZNode(procName); + try { + String source = (ee.getSource() == null) ? memberName: ee.getSource(); + byte[] errorInfo = ProtobufUtil.prependPBMagic(ForeignException.serialize(source, ee)); + ZKUtil.createAndFailSilent(zkController.getWatcher(), procAbortZNode, errorInfo); + LOG.debug("Finished creating abort znode:" + procAbortZNode); + } catch (KeeperException e) { + // possible that we get this error for the procedure if we already reset the zk state, but in + // that case we should still get an error for that procedure anyways + zkController.logZKTree(zkController.getBaseZnode()); + member.controllerConnectionFailure("Failed to post zk node:" + procAbortZNode + + " to abort procedure", new IOException(e)); + } + } + + /** + * Pass along the found abort notification to the listener + * @param abortZNode full znode path to the failed procedure information + */ + protected void abort(String abortZNode) { + LOG.debug("Aborting procedure member for znode " + abortZNode); + String opName = ZKUtil.getNodeName(abortZNode); + try { + byte[] data = ZKUtil.getData(zkController.getWatcher(), abortZNode); + + // figure out the data we need to pass + ForeignException ee; + try { + if (!ProtobufUtil.isPBMagicPrefix(data)) { + String msg = "Illegally formatted data in abort node for proc " + opName + + ". Killing the procedure."; + LOG.error(msg); + // we got a remote exception, but we can't describe it so just return exn from here + ee = new ForeignException(getMemberName(), new IllegalArgumentException(msg)); + } else { + data = Arrays.copyOfRange(data, ProtobufUtil.lengthOfPBMagic(), data.length); + ee = ForeignException.deserialize(data); + } + } catch (InvalidProtocolBufferException e) { + LOG.warn("Got an error notification for op:" + opName + + " but we can't read the information. Killing the procedure."); + // we got a remote exception, but we can't describe it so just return exn from here + ee = new ForeignException(getMemberName(), e); + } + + this.member.receiveAbortProcedure(opName, ee); + } catch (KeeperException e) { + member.controllerConnectionFailure("Failed to get data for abort znode:" + abortZNode + + zkController.getAbortZnode(), new IOException(e)); + } + } + + public void start(ProcedureMember listener) { + LOG.debug("Starting procedure member '" + this.memberName + "'"); + this.member = listener; + watchForAbortedProcedures(); + waitForNewProcedures(); + } + + @Override + public void close() throws IOException { + zkController.close(); + } + +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (working copy) @@ -128,6 +128,8 @@ C_M_DELETE_FAMILY (45), // Client asking Master to delete family of table C_M_MODIFY_FAMILY (46), // Client asking Master to modify family of table C_M_CREATE_TABLE (47), // Client asking Master to create a table + C_M_SNAPSHOT_TABLE (48), // Client asking Master to snapshot an offline table + C_M_RESTORE_SNAPSHOT (49), // Client asking Master to snapshot an offline table // Updates from master to ZK. This is done by the master and there is // nothing to process by either Master or RS Index: src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (working copy) @@ -136,6 +136,8 @@ case C_M_ENABLE_TABLE: case C_M_MODIFY_TABLE: case C_M_CREATE_TABLE: + case C_M_SNAPSHOT_TABLE: + case C_M_RESTORE_SNAPSHOT: return ExecutorType.MASTER_TABLE_OPERATIONS; // RegionServer executor services Index: src/main/java/org/apache/hadoop/hbase/snapshot/ReferenceServerWALsTask.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/ReferenceServerWALsTask.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/ReferenceServerWALsTask.java (revision 0) @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Reference all the WAL files under a server's WAL directory + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class ReferenceServerWALsTask extends SnapshotTask { + private static final Log LOG = LogFactory.getLog(ReferenceServerWALsTask.class); + private final FileSystem fs; + private final Configuration conf; + private final String serverName; + private Path logDir; + + /** + * @param snapshot snapshot being run + * @param failureListener listener to check for errors while running the operation and to + * propagate errors found while running the task + * @param logDir log directory for the server. Name of the directory is taken as the name of the + * server + * @param conf {@link Configuration} to extract filesystem information + * @param fs filesystem where the log files are stored and should be referenced + */ + public ReferenceServerWALsTask(SnapshotDescription snapshot, + ForeignExceptionDispatcher failureListener, final Path logDir, final Configuration conf, + final FileSystem fs) { + super(snapshot, failureListener); + this.fs = fs; + this.conf = conf; + this.serverName = logDir.getName(); + this.logDir = logDir; + } + + /** + * Create reference files (empty files with the same path and file name as original). + * @throws IOException exception from hdfs or network problems + * @throws ForeignException exception from an external procedure + */ + @Override + public Void call() throws IOException, ForeignException { + // TODO switch to using a single file to reference all required WAL files + + // Iterate through each of the log files and add a reference to it. + // assumes that all the files under the server's logs directory is a log + FileStatus[] serverLogs = FSUtils.listStatus(fs, logDir, null); + if (serverLogs == null) { + LOG.debug("No logs for server directory:" + logDir + ", done referencing files."); + return null; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Adding references for WAL files:" + Arrays.toString(serverLogs)); + } + + for (FileStatus file : serverLogs) { + this.rethrowException(); + + // add the reference to the file. ex: hbase/.snapshots/.logs// + Path rootDir = FSUtils.getRootDir(conf); + Path snapshotDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(this.snapshot, rootDir); + Path snapshotLogDir = TakeSnapshotUtils.getSnapshotHLogsDir(snapshotDir, serverName); + // actually store the reference on disk (small file) + Path ref = new Path(snapshotLogDir, file.getPath().getName()); + if (!fs.createNewFile(ref)) { + if (!fs.exists(ref)) { + throw new IOException("Couldn't create reference for:" + file.getPath()); + } + } + LOG.debug("Completed WAL referencing for: " + file.getPath() + " to " + ref); + } + + LOG.debug("Successfully completed WAL referencing for ALL files"); + return null; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java (revision 0) @@ -0,0 +1,701 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.snapshot; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileChecksum; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.io.HLogLink; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.ExportSnapshotException; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * Export the specified snapshot to a given FileSystem. + * + * The .snapshot/name folder is copied to the destination cluster + * and then all the hfiles/hlogs are copied using a Map-Reduce Job in the .archive/ location. + * When everything is done, the second cluster can restore the snapshot. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class ExportSnapshot extends Configured implements Tool { + private static final Log LOG = LogFactory.getLog(ExportSnapshot.class); + + private static final String CONF_TMP_DIR = "hbase.tmp.dir"; + private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user"; + private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group"; + private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode"; + private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify"; + private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root"; + private static final String CONF_INPUT_ROOT = "snapshot.export.input.root"; + + private static final String INPUT_FOLDER_PREFIX = "export-files."; + + // Export Map-Reduce Counters, to keep track of the progress + public enum Counter { MISSING_FILES, COPY_FAILED, BYTES_EXPECTED, BYTES_COPIED }; + + private static class ExportMapper extends Mapper { + final static int REPORT_SIZE = 1 * 1024 * 1024; + final static int BUFFER_SIZE = 64 * 1024; + + private boolean verifyChecksum; + private String filesGroup; + private String filesUser; + private short filesMode; + + private FileSystem outputFs; + private Path outputArchive; + private Path outputRoot; + + private FileSystem inputFs; + private Path inputArchive; + private Path inputRoot; + + @Override + public void setup(Context context) { + Configuration conf = context.getConfiguration(); + verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true); + + filesGroup = conf.get(CONF_FILES_GROUP); + filesUser = conf.get(CONF_FILES_USER); + filesMode = (short)conf.getInt(CONF_FILES_MODE, 0); + outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT)); + inputRoot = new Path(conf.get(CONF_INPUT_ROOT)); + + inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY); + outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY); + + try { + inputFs = FileSystem.get(inputRoot.toUri(), conf); + } catch (IOException e) { + throw new RuntimeException("Could not get the input FileSystem with root=" + inputRoot, e); + } + + try { + outputFs = FileSystem.get(outputRoot.toUri(), conf); + } catch (IOException e) { + throw new RuntimeException("Could not get the output FileSystem with root="+ outputRoot, e); + } + } + + @Override + public void map(Text key, NullWritable value, Context context) + throws InterruptedException, IOException { + Path inputPath = new Path(key.toString()); + Path outputPath = getOutputPath(inputPath); + + LOG.info("copy file input=" + inputPath + " output=" + outputPath); + if (copyFile(context, inputPath, outputPath)) { + LOG.info("copy completed for input=" + inputPath + " output=" + outputPath); + } + } + + /** + * Returns the location where the inputPath will be copied. + * - hfiles are encoded as hfile links hfile-region-table + * - logs are encoded as serverName/logName + */ + private Path getOutputPath(final Path inputPath) throws IOException { + Path path; + if (HFileLink.isHFileLink(inputPath)) { + String family = inputPath.getParent().getName(); + String table = HFileLink.getReferencedTableName(inputPath.getName()); + String region = HFileLink.getReferencedRegionName(inputPath.getName()); + String hfile = HFileLink.getReferencedHFileName(inputPath.getName()); + path = new Path(table, new Path(region, new Path(family, hfile))); + } else if (isHLogLinkPath(inputPath)) { + String logName = inputPath.getName(); + path = new Path(new Path(outputRoot, HConstants.HREGION_OLDLOGDIR_NAME), logName); + } else { + path = inputPath; + } + return new Path(outputArchive, path); + } + + private boolean copyFile(final Context context, final Path inputPath, final Path outputPath) + throws IOException { + FSDataInputStream in = openSourceFile(inputPath); + if (in == null) { + context.getCounter(Counter.MISSING_FILES).increment(1); + return false; + } + + try { + // Verify if the input file exists + FileStatus inputStat = getFileStatus(inputFs, inputPath); + if (inputStat == null) return false; + + // Verify if the output file exists and is the same that we want to copy + FileStatus outputStat = getFileStatus(outputFs, outputPath); + if (outputStat != null && sameFile(inputStat, outputStat)) { + LOG.info("Skip copy " + inputPath + " to " + outputPath + ", same file."); + return true; + } + + context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen()); + + // Ensure that the output folder is there and copy the file + outputFs.mkdirs(outputPath.getParent()); + FSDataOutputStream out = outputFs.create(outputPath, true); + try { + if (!copyData(context, inputPath, in, outputPath, out, inputStat.getLen())) + return false; + } finally { + out.close(); + } + + // Preserve attributes + return preserveAttributes(outputPath, inputStat); + } finally { + in.close(); + } + } + + /** + * Preserve the files attribute selected by the user copying them from the source file + */ + private boolean preserveAttributes(final Path path, final FileStatus refStat) { + FileStatus stat; + try { + stat = outputFs.getFileStatus(path); + } catch (IOException e) { + LOG.warn("Unable to get the status for file=" + path); + return false; + } + + try { + if (filesMode > 0 && stat.getPermission().toShort() != filesMode) { + outputFs.setPermission(path, new FsPermission(filesMode)); + } else if (!stat.getPermission().equals(refStat.getPermission())) { + outputFs.setPermission(path, refStat.getPermission()); + } + } catch (IOException e) { + LOG.error("Unable to set the permission for file=" + path, e); + return false; + } + + try { + String user = (filesUser != null) ? filesUser : refStat.getOwner(); + String group = (filesGroup != null) ? filesGroup : refStat.getGroup(); + if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) { + outputFs.setOwner(path, user, group); + } + } catch (IOException e) { + LOG.error("Unable to set the owner/group for file=" + path, e); + return false; + } + + return true; + } + + private boolean copyData(final Context context, + final Path inputPath, final FSDataInputStream in, + final Path outputPath, final FSDataOutputStream out, + final long inputFileSize) { + final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) + + " (%.3f%%) from " + inputPath + " to " + outputPath; + + try { + byte[] buffer = new byte[BUFFER_SIZE]; + long totalBytesWritten = 0; + int reportBytes = 0; + int bytesRead; + + while ((bytesRead = in.read(buffer)) > 0) { + out.write(buffer, 0, bytesRead); + totalBytesWritten += bytesRead; + reportBytes += bytesRead; + + if (reportBytes >= REPORT_SIZE) { + context.getCounter(Counter.BYTES_COPIED).increment(reportBytes); + context.setStatus(String.format(statusMessage, + StringUtils.humanReadableInt(totalBytesWritten), + reportBytes/(float)inputFileSize)); + reportBytes = 0; + } + } + + context.getCounter(Counter.BYTES_COPIED).increment(reportBytes); + context.setStatus(String.format(statusMessage, + StringUtils.humanReadableInt(totalBytesWritten), + reportBytes/(float)inputFileSize)); + + // Verify that the written size match + if (totalBytesWritten != inputFileSize) { + LOG.error("number of bytes copied not matching copied=" + totalBytesWritten + + " expected=" + inputFileSize + " for file=" + inputPath); + context.getCounter(Counter.COPY_FAILED).increment(1); + return false; + } + + return true; + } catch (IOException e) { + LOG.error("Error copying " + inputPath + " to " + outputPath, e); + context.getCounter(Counter.COPY_FAILED).increment(1); + return false; + } + } + + private FSDataInputStream openSourceFile(final Path path) { + try { + if (HFileLink.isHFileLink(path)) { + return new HFileLink(inputRoot, inputArchive, path).open(inputFs); + } else if (isHLogLinkPath(path)) { + String serverName = path.getParent().getName(); + String logName = path.getName(); + return new HLogLink(inputRoot, serverName, logName).open(inputFs); + } + return inputFs.open(path); + } catch (IOException e) { + LOG.error("Unable to open source file=" + path, e); + return null; + } + } + + private FileStatus getFileStatus(final FileSystem fs, final Path path) { + try { + if (HFileLink.isHFileLink(path)) { + HFileLink link = new HFileLink(inputRoot, inputArchive, path); + return link.getFileStatus(fs); + } else if (isHLogLinkPath(path)) { + String serverName = path.getParent().getName(); + String logName = path.getName(); + return new HLogLink(inputRoot, serverName, logName).getFileStatus(fs); + } + return fs.getFileStatus(path); + } catch (IOException e) { + LOG.warn("Unable to get the status for file=" + path); + return null; + } + } + + private FileChecksum getFileChecksum(final FileSystem fs, final Path path) { + try { + return fs.getFileChecksum(path); + } catch (IOException e) { + LOG.warn("Unable to get checksum for file=" + path, e); + return null; + } + } + + /** + * Check if the two files are equal by looking at the file length, + * and at the checksum (if user has specified the verifyChecksum flag). + */ + private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) { + // Not matching length + if (inputStat.getLen() != outputStat.getLen()) return false; + + // Mark files as equals, since user asked for no checksum verification + if (!verifyChecksum) return true; + + // If checksums are not available, files are not the same. + FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath()); + if (inChecksum == null) return false; + + FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath()); + if (outChecksum == null) return false; + + return inChecksum.equals(outChecksum); + } + + /** + * HLog files are encoded as serverName/logName + * and since all the other files should be in /hbase/table/..path.. + * we can rely on the depth, for now. + */ + private static boolean isHLogLinkPath(final Path path) { + return path.depth() == 2; + } + } + + /** + * Extract the list of files (HFiles/HLogs) to copy using Map-Reduce. + * @return list of files referenced by the snapshot (pair of path and size) + */ + private List> getSnapshotFiles(final FileSystem fs, final Path snapshotDir) + throws IOException { + SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); + + final List> files = new ArrayList>(); + final String table = snapshotDesc.getTable(); + final Configuration conf = getConf(); + + // Get snapshot files + SnapshotReferenceUtil.visitReferencedFiles(fs, snapshotDir, + new SnapshotReferenceUtil.FileVisitor() { + public void storeFile (final String region, final String family, final String hfile) + throws IOException { + Path path = new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); + long size = new HFileLink(conf, path).getFileStatus(fs).getLen(); + files.add(new Pair(path, size)); + } + + public void recoveredEdits (final String region, final String logfile) + throws IOException { + // copied with the snapshot referenecs + } + + public void logFile (final String server, final String logfile) + throws IOException { + long size = new HLogLink(conf, server, logfile).getFileStatus(fs).getLen(); + files.add(new Pair(new Path(server, logfile), size)); + } + }); + + return files; + } + + /** + * Given a list of file paths and sizes, create around ngroups in as balanced a way as possible. + * The groups created will have similar amounts of bytes. + *

+ * The algorithm used is pretty straightforward; the file list is sorted by size, + * and then each group fetch the bigger file available, iterating through groups + * alternating the direction. + */ + static List> getBalancedSplits(final List> files, int ngroups) { + // Sort files by size, from small to big + Collections.sort(files, new Comparator>() { + public int compare(Pair a, Pair b) { + long r = a.getSecond() - b.getSecond(); + return (r < 0) ? -1 : ((r > 0) ? 1 : 0); + } + }); + + // create balanced groups + List> fileGroups = new LinkedList>(); + long[] sizeGroups = new long[ngroups]; + int hi = files.size() - 1; + int lo = 0; + + List group; + int dir = 1; + int g = 0; + + while (hi >= lo) { + if (g == fileGroups.size()) { + group = new LinkedList(); + fileGroups.add(group); + } else { + group = fileGroups.get(g); + } + + Pair fileInfo = files.get(hi--); + + // add the hi one + sizeGroups[g] += fileInfo.getSecond(); + group.add(fileInfo.getFirst()); + + // change direction when at the end or the beginning + g += dir; + if (g == ngroups) { + dir = -1; + g = ngroups - 1; + } else if (g < 0) { + dir = 1; + g = 0; + } + } + + if (LOG.isDebugEnabled()) { + for (int i = 0; i < sizeGroups.length; ++i) { + LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i])); + } + } + + return fileGroups; + } + + private static Path getInputFolderPath(final FileSystem fs, final Configuration conf) + throws IOException, InterruptedException { + String stagingName = "exportSnapshot-" + EnvironmentEdgeManager.currentTimeMillis(); + Path stagingDir = new Path(conf.get(CONF_TMP_DIR), stagingName); + fs.mkdirs(stagingDir); + return new Path(stagingDir, INPUT_FOLDER_PREFIX + + String.valueOf(EnvironmentEdgeManager.currentTimeMillis())); + } + + /** + * Create the input files, with the path to copy, for the MR job. + * Each input files contains n files, and each input file has a similar amount data to copy. + * The number of input files created are based on the number of mappers provided as argument + * and the number of the files to copy. + */ + private static Path[] createInputFiles(final Configuration conf, + final List> snapshotFiles, int mappers) + throws IOException, InterruptedException { + FileSystem fs = FileSystem.get(conf); + Path inputFolderPath = getInputFolderPath(fs, conf); + LOG.debug("Input folder location: " + inputFolderPath); + + List> splits = getBalancedSplits(snapshotFiles, mappers); + Path[] inputFiles = new Path[splits.size()]; + + Text key = new Text(); + for (int i = 0; i < inputFiles.length; i++) { + List files = splits.get(i); + inputFiles[i] = new Path(inputFolderPath, String.format("export-%d.seq", i)); + SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inputFiles[i], + Text.class, NullWritable.class); + LOG.debug("Input split: " + i); + try { + for (Path file: files) { + LOG.debug(file.toString()); + key.set(file.toString()); + writer.append(key, NullWritable.get()); + } + } finally { + writer.close(); + } + } + + return inputFiles; + } + + /** + * Run Map-Reduce Job to perform the files copy. + */ + private boolean runCopyJob(final Path inputRoot, final Path outputRoot, + final List> snapshotFiles, final boolean verifyChecksum, + final String filesUser, final String filesGroup, final int filesMode, + final int mappers) throws IOException, InterruptedException, ClassNotFoundException { + Configuration conf = getConf(); + if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup); + if (filesUser != null) conf.set(CONF_FILES_USER, filesUser); + conf.setInt(CONF_FILES_MODE, filesMode); + conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum); + conf.set(CONF_OUTPUT_ROOT, outputRoot.toString()); + conf.set(CONF_INPUT_ROOT, inputRoot.toString()); + conf.setInt("mapreduce.job.maps", mappers); + + // job.setMapSpeculativeExecution(false) + conf.setBoolean("mapreduce.map.speculative", false); + conf.setBoolean("mapreduce.reduce.speculative", false); + conf.setBoolean("mapred.map.tasks.speculative.execution", false); + conf.setBoolean("mapred.reduce.tasks.speculative.execution", false); + + Job job = new Job(conf); + job.setJobName("ExportSnapshot"); + job.setJarByClass(ExportSnapshot.class); + job.setMapperClass(ExportMapper.class); + job.setInputFormatClass(SequenceFileInputFormat.class); + job.setOutputFormatClass(NullOutputFormat.class); + job.setNumReduceTasks(0); + for (Path path: createInputFiles(conf, snapshotFiles, mappers)) { + LOG.debug("Add Input Path=" + path); + SequenceFileInputFormat.addInputPath(job, path); + } + + return job.waitForCompletion(true); + } + + /** + * Execute the export snapshot by copying the snapshot metadata, hfiles and hlogs. + * @return 0 on success, and != 0 upon failure. + */ + @Override + public int run(String[] args) throws Exception { + boolean verifyChecksum = true; + String snapshotName = null; + String filesGroup = null; + String filesUser = null; + Path outputRoot = null; + int filesMode = 0; + int mappers = getConf().getInt("mapreduce.job.maps", 1); + + // Process command line args + for (int i = 0; i < args.length; i++) { + String cmd = args[i]; + try { + if (cmd.equals("-snapshot")) { + snapshotName = args[++i]; + } else if (cmd.equals("-copy-to")) { + outputRoot = new Path(args[++i]); + } else if (cmd.equals("-no-checksum-verify")) { + verifyChecksum = false; + } else if (cmd.equals("-mappers")) { + mappers = Integer.parseInt(args[++i]); + } else if (cmd.equals("-chuser")) { + filesUser = args[++i]; + } else if (cmd.equals("-chgroup")) { + filesGroup = args[++i]; + } else if (cmd.equals("-chmod")) { + filesMode = Integer.parseInt(args[++i], 8); + } else if (cmd.equals("-h") || cmd.equals("--help")) { + printUsageAndExit(); + } else { + System.err.println("UNEXPECTED: " + cmd); + printUsageAndExit(); + } + } catch (Exception e) { + printUsageAndExit(); + } + } + + // Check user options + if (snapshotName == null) { + System.err.println("Snapshot name not provided."); + printUsageAndExit(); + } + + if (outputRoot == null) { + System.err.println("Destination file-system not provided."); + printUsageAndExit(); + } + + Configuration conf = getConf(); + Path inputRoot = FSUtils.getRootDir(conf); + FileSystem inputFs = FileSystem.get(conf); + FileSystem outputFs = FileSystem.get(outputRoot.toUri(), conf); + + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot); + Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshotName, outputRoot); + Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, outputRoot); + + // Check if the snapshot already exists + if (outputFs.exists(outputSnapshotDir)) { + System.err.println("The snapshot '" + snapshotName + + "' already exists in the destination: " + outputSnapshotDir); + return 1; + } + + // Check if the snapshot already in-progress + if (outputFs.exists(snapshotTmpDir)) { + System.err.println("A snapshot with the same name '" + snapshotName + "' is in-progress"); + return 1; + } + + // Step 0 - Extract snapshot files to copy + final List> files = getSnapshotFiles(inputFs, snapshotDir); + + // Step 1 - Copy fs1:/.snapshot/ to fs2:/.snapshot/.tmp/ + // The snapshot references must be copied before the hfiles otherwise the cleaner + // will remove them because they are unreferenced. + try { + FileUtil.copy(inputFs, snapshotDir, outputFs, snapshotTmpDir, false, false, conf); + } catch (IOException e) { + System.err.println("Failed to copy the snapshot directory: from=" + snapshotDir + + " to=" + snapshotTmpDir); + e.printStackTrace(System.err); + return 1; + } + + // Step 2 - Start MR Job to copy files + // The snapshot references must be copied before the files otherwise the files gets removed + // by the HFileArchiver, since they have no references. + try { + if (!runCopyJob(inputRoot, outputRoot, files, verifyChecksum, + filesUser, filesGroup, filesMode, mappers)) { + throw new ExportSnapshotException("Snapshot export failed!"); + } + + // Step 3 - Rename fs2:/.snapshot/.tmp/ fs2:/.snapshot/ + if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) { + System.err.println("Snapshot export failed!"); + System.err.println("Unable to rename snapshot directory from=" + + snapshotTmpDir + " to=" + outputSnapshotDir); + return 1; + } + + return 0; + } catch (Exception e) { + System.err.println("Snapshot export failed!"); + e.printStackTrace(System.err); + outputFs.delete(outputSnapshotDir, true); + return 1; + } + } + + // ExportSnapshot + private void printUsageAndExit() { + System.err.printf("Usage: bin/hbase %s [options]%n", getClass().getName()); + System.err.println(" where [options] are:"); + System.err.println(" -h|-help Show this help and exit."); + System.err.println(" -snapshot NAME Snapshot to restore."); + System.err.println(" -copy-to NAME Remote destination hdfs://"); + System.err.println(" -no-checksum-verify Do not verify checksum."); + System.err.println(" -chuser USERNAME Change the owner of the files to the specified one."); + System.err.println(" -chgroup GROUP Change the group of the files to the specified one."); + System.err.println(" -chmod MODE Change the permission of the files to the specified one."); + System.err.println(" -mappers Number of mappers to use during the copy (mapreduce.job.maps)."); + System.err.println(); + System.err.println("Examples:"); + System.err.println(" hbase " + getClass() + " \\"); + System.err.println(" -snapshot MySnapshot -copy-to hdfs:///srv2:8082/hbase \\"); + System.err.println(" -chuser MyUser -chgroup MyGroup -chmod 700 -mappers 16"); + System.exit(1); + } + + /** + * The guts of the {@link #main} method. + * Call this method to avoid the {@link #main(String[])} System.exit. + * @param args + * @return errCode + * @throws Exception + */ + static int innerMain(final Configuration conf, final String [] args) throws Exception { + return ToolRunner.run(conf, new ExportSnapshot(), args); + } + + public static void main(String[] args) throws Exception { + System.exit(innerMain(HBaseConfiguration.create(), args)); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (revision 0) @@ -0,0 +1,588 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.snapshot; + +import java.io.InputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.backup.HFileArchiver; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.FSVisitor; +import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.io.IOUtils; + +/** + * Helper to Restore/Clone a Snapshot + * + *

The helper assumes that a table is already created, and by calling restore() + * the content present in the snapshot will be restored as the new content of the table. + * + *

Clone from Snapshot: If the target table is empty, the restore operation + * is just a "clone operation", where the only operations are: + *

    + *
  • for each region in the snapshot create a new region + * (note that the region will have a different name, since the encoding contains the table name) + *
  • for each file in the region create a new HFileLink to point to the original file. + *
  • restore the logs, if any + *
+ * + *

Restore from Snapshot: + *

    + *
  • for each region in the table verify which are available in the snapshot and which are not + *
      + *
    • if the region is not present in the snapshot, remove it. + *
    • if the region is present in the snapshot + *
        + *
      • for each file in the table region verify which are available in the snapshot + *
          + *
        • if the hfile is not present in the snapshot, remove it + *
        • if the hfile is present, keep it (nothing to do) + *
        + *
      • for each file in the snapshot region but not in the table + *
          + *
        • create a new HFileLink that point to the original file + *
        + *
      + *
    + *
  • for each region in the snapshot not present in the current table state + *
      + *
    • create a new region and for each file in the region create a new HFileLink + * (This is the same as the clone operation) + *
    + *
  • restore the logs, if any + *
+ */ +@InterfaceAudience.Private +public class RestoreSnapshotHelper { + private static final Log LOG = LogFactory.getLog(RestoreSnapshotHelper.class); + + private final Map regionsMap = + new TreeMap(Bytes.BYTES_COMPARATOR); + + private final ForeignExceptionDispatcher monitor; + + private final SnapshotDescription snapshotDesc; + private final Path snapshotDir; + + private final HTableDescriptor tableDesc; + private final Path tableDir; + + private final Configuration conf; + private final FileSystem fs; + + public RestoreSnapshotHelper(final Configuration conf, final FileSystem fs, + final SnapshotDescription snapshotDescription, final Path snapshotDir, + final HTableDescriptor tableDescriptor, final Path tableDir, + final ForeignExceptionDispatcher monitor) + { + this.fs = fs; + this.conf = conf; + this.snapshotDesc = snapshotDescription; + this.snapshotDir = snapshotDir; + this.tableDesc = tableDescriptor; + this.tableDir = tableDir; + this.monitor = monitor; + } + + /** + * Restore the on-disk table to a specified snapshot state. + * @return the set of regions touched by the restore operation + */ + public RestoreMetaChanges restoreHdfsRegions() throws IOException { + LOG.debug("starting restore"); + Set snapshotRegionNames = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir); + if (snapshotRegionNames == null) { + LOG.warn("Nothing to restore. Snapshot " + snapshotDesc + " looks empty"); + return null; + } + + RestoreMetaChanges metaChanges = new RestoreMetaChanges(); + + // Identify which region are still available and which not. + // NOTE: we rely upon the region name as: "table name, start key, end key" + List tableRegions = getTableRegions(); + if (tableRegions != null) { + monitor.rethrowException(); + for (HRegionInfo regionInfo: tableRegions) { + String regionName = regionInfo.getEncodedName(); + if (snapshotRegionNames.contains(regionName)) { + LOG.info("region to restore: " + regionName); + snapshotRegionNames.remove(regionName); + metaChanges.addRegionToRestore(regionInfo); + } else { + LOG.info("region to remove: " + regionName); + metaChanges.addRegionToRemove(regionInfo); + } + } + + // Restore regions using the snapshot data + monitor.rethrowException(); + restoreHdfsRegions(metaChanges.getRegionsToRestore()); + + // Remove regions from the current table + monitor.rethrowException(); + removeHdfsRegions(metaChanges.getRegionsToRemove()); + } + + // Regions to Add: present in the snapshot but not in the current table + if (snapshotRegionNames.size() > 0) { + List regionsToAdd = new LinkedList(); + + monitor.rethrowException(); + for (String regionName: snapshotRegionNames) { + LOG.info("region to add: " + regionName); + Path regionDir = new Path(snapshotDir, regionName); + regionsToAdd.add(HRegion.loadDotRegionInfoFileContent(fs, regionDir)); + } + + // Create new regions cloning from the snapshot + monitor.rethrowException(); + HRegionInfo[] clonedRegions = cloneHdfsRegions(regionsToAdd); + metaChanges.setNewRegions(clonedRegions); + } + + // Restore WALs + monitor.rethrowException(); + restoreWALs(); + + return metaChanges; + } + + /** + * Describe the set of operations needed to update META after restore. + */ + public static class RestoreMetaChanges { + private List regionsToRestore = null; + private List regionsToRemove = null; + private List regionsToAdd = null; + + /** + * @return true if there're new regions + */ + public boolean hasRegionsToAdd() { + return this.regionsToAdd != null && this.regionsToAdd.size() > 0; + } + + /** + * Returns the list of new regions added during the on-disk restore. + * The caller is responsible to add the regions to META. + * e.g MetaEditor.addRegionsToMeta(...) + * @return the list of regions to add to META + */ + public List getRegionsToAdd() { + return this.regionsToAdd; + } + + /** + * @return true if there're regions to restore + */ + public boolean hasRegionsToRestore() { + return this.regionsToRestore != null && this.regionsToRestore.size() > 0; + } + + /** + * Returns the list of 'restored regions' during the on-disk restore. + * The caller is responsible to add the regions to META if not present. + * @return the list of regions restored + */ + public List getRegionsToRestore() { + return this.regionsToRestore; + } + + /** + * @return true if there're regions to remove + */ + public boolean hasRegionsToRemove() { + return this.regionsToRemove != null && this.regionsToRemove.size() > 0; + } + + /** + * Returns the list of regions removed during the on-disk restore. + * The caller is responsible to remove the regions from META. + * e.g. MetaEditor.deleteRegions(...) + * @return the list of regions to remove from META + */ + public List getRegionsToRemove() { + return this.regionsToRemove; + } + + void setNewRegions(final HRegionInfo[] hris) { + if (hris != null) { + regionsToAdd = Arrays.asList(hris); + } else { + regionsToAdd = null; + } + } + + void addRegionToRemove(final HRegionInfo hri) { + if (regionsToRemove == null) { + regionsToRemove = new LinkedList(); + } + regionsToRemove.add(hri); + } + + void addRegionToRestore(final HRegionInfo hri) { + if (regionsToRestore == null) { + regionsToRestore = new LinkedList(); + } + regionsToRestore.add(hri); + } + } + + /** + * Remove specified regions from the file-system, using the archiver. + */ + private void removeHdfsRegions(final List regions) throws IOException { + if (regions != null && regions.size() > 0) { + for (HRegionInfo hri: regions) { + HFileArchiver.archiveRegion(conf, fs, hri); + } + } + } + + /** + * Restore specified regions by restoring content to the snapshot state. + */ + private void restoreHdfsRegions(final List regions) throws IOException { + if (regions == null || regions.size() == 0) return; + for (HRegionInfo hri: regions) restoreRegion(hri); + } + + /** + * Restore region by removing files not in the snapshot + * and adding the missing ones from the snapshot. + */ + private void restoreRegion(HRegionInfo regionInfo) throws IOException { + Path snapshotRegionDir = new Path(snapshotDir, regionInfo.getEncodedName()); + Map> snapshotFiles = + SnapshotReferenceUtil.getRegionHFileReferences(fs, snapshotRegionDir); + Path regionDir = new Path(tableDir, regionInfo.getEncodedName()); + String tableName = tableDesc.getNameAsString(); + + // Restore families present in the table + for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) { + byte[] family = Bytes.toBytes(familyDir.getName()); + Set familyFiles = getTableRegionFamilyFiles(familyDir); + List snapshotFamilyFiles = snapshotFiles.remove(familyDir.getName()); + if (snapshotFamilyFiles != null) { + List hfilesToAdd = new LinkedList(); + for (String hfileName: snapshotFamilyFiles) { + if (familyFiles.contains(hfileName)) { + // HFile already present + familyFiles.remove(hfileName); + } else { + // HFile missing + hfilesToAdd.add(hfileName); + } + } + + // Restore Missing files + for (String hfileName: hfilesToAdd) { + LOG.trace("Adding HFileLink " + hfileName + + " to region=" + regionInfo.getEncodedName() + " table=" + tableName); + restoreStoreFile(familyDir, regionInfo, hfileName); + } + + // Remove hfiles not present in the snapshot + for (String hfileName: familyFiles) { + Path hfile = new Path(familyDir, hfileName); + LOG.trace("Removing hfile=" + hfile + + " from region=" + regionInfo.getEncodedName() + " table=" + tableName); + HFileArchiver.archiveStoreFile(fs, regionInfo, conf, tableDir, family, hfile); + } + } else { + // Family doesn't exists in the snapshot + LOG.trace("Removing family=" + Bytes.toString(family) + + " from region=" + regionInfo.getEncodedName() + " table=" + tableName); + HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, family); + fs.delete(familyDir, true); + } + } + + // Add families not present in the table + for (Map.Entry> familyEntry: snapshotFiles.entrySet()) { + Path familyDir = new Path(regionDir, familyEntry.getKey()); + if (!fs.mkdirs(familyDir)) { + throw new IOException("Unable to create familyDir=" + familyDir); + } + + for (String hfileName: familyEntry.getValue()) { + LOG.trace("Adding HFileLink " + hfileName + " to table=" + tableName); + restoreStoreFile(familyDir, regionInfo, hfileName); + } + } + } + + /** + * @return The set of files in the specified family directory. + */ + private Set getTableRegionFamilyFiles(final Path familyDir) throws IOException { + Set familyFiles = new HashSet(); + + FileStatus[] hfiles = FSUtils.listStatus(fs, familyDir); + if (hfiles == null) return familyFiles; + + for (FileStatus hfileRef: hfiles) { + String hfileName = hfileRef.getPath().getName(); + familyFiles.add(hfileName); + } + + return familyFiles; + } + + /** + * Clone specified regions. For each region create a new region + * and create a HFileLink for each hfile. + */ + private HRegionInfo[] cloneHdfsRegions(final List regions) throws IOException { + if (regions == null || regions.size() == 0) return null; + + final Map snapshotRegions = + new HashMap(regions.size()); + + // clone region info (change embedded tableName with the new one) + HRegionInfo[] clonedRegionsInfo = new HRegionInfo[regions.size()]; + for (int i = 0; i < clonedRegionsInfo.length; ++i) { + // clone the region info from the snapshot region info + HRegionInfo snapshotRegionInfo = regions.get(i); + clonedRegionsInfo[i] = cloneRegionInfo(snapshotRegionInfo); + + // add the region name mapping between snapshot and cloned + String snapshotRegionName = snapshotRegionInfo.getEncodedName(); + String clonedRegionName = clonedRegionsInfo[i].getEncodedName(); + regionsMap.put(Bytes.toBytes(snapshotRegionName), Bytes.toBytes(clonedRegionName)); + LOG.info("clone region=" + snapshotRegionName + " as " + clonedRegionName); + + // Add mapping between cloned region name and snapshot region info + snapshotRegions.put(clonedRegionName, snapshotRegionInfo); + } + + // create the regions on disk + ModifyRegionUtils.createRegions(conf, tableDir.getParent(), + tableDesc, clonedRegionsInfo, new ModifyRegionUtils.RegionFillTask() { + public void fillRegion(final HRegion region) throws IOException { + cloneRegion(region, snapshotRegions.get(region.getRegionInfo().getEncodedName())); + } + }); + + return clonedRegionsInfo; + } + + /** + * Clone region directory content from the snapshot info. + * + * Each region is encoded with the table name, so the cloned region will have + * a different region name. + * + * Instead of copying the hfiles a HFileLink is created. + * + * @param region {@link HRegion} cloned + * @param snapshotRegionInfo + */ + private void cloneRegion(final HRegion region, final HRegionInfo snapshotRegionInfo) + throws IOException { + final Path snapshotRegionDir = new Path(snapshotDir, snapshotRegionInfo.getEncodedName()); + final Path regionDir = new Path(tableDir, region.getRegionInfo().getEncodedName()); + final String tableName = tableDesc.getNameAsString(); + SnapshotReferenceUtil.visitRegionStoreFiles(fs, snapshotRegionDir, + new FSVisitor.StoreFileVisitor() { + public void storeFile (final String region, final String family, final String hfile) + throws IOException { + LOG.info("Adding HFileLink " + hfile + " to table=" + tableName); + Path familyDir = new Path(regionDir, family); + restoreStoreFile(familyDir, snapshotRegionInfo, hfile); + } + }); + } + + /** + * Create a new {@link HFileLink} to reference the store file. + *

The store file in the snapshot can be a simple hfile, an HFileLink or a reference. + *

    + *
  • hfile: abc -> table=region-abc + *
  • reference: abc.1234 -> table=region-abc.1234 + *
  • hfilelink: table=region-hfile -> table=region-hfile + *
+ * @param familyDir destination directory for the store file + * @param regionInfo destination region info for the table + * @param hfileName store file name (can be a Reference, HFileLink or simple HFile) + */ + private void restoreStoreFile(final Path familyDir, final HRegionInfo regionInfo, + final String hfileName) throws IOException { + if (HFileLink.isHFileLink(hfileName)) { + HFileLink.createFromHFileLink(conf, fs, familyDir, hfileName); + } else if (StoreFile.isReference(hfileName)) { + restoreReferenceFile(familyDir, regionInfo, hfileName); + } else { + HFileLink.create(conf, fs, familyDir, regionInfo, hfileName); + } + } + + /** + * Create a new {@link Reference} as copy of the source one. + *

+   * The source table looks like:
+   *    1234/abc      (original file)
+   *    5678/abc.1234 (reference file)
+   *
+   * After the clone operation looks like:
+   *   wxyz/table=1234-abc
+   *   stuv/table=1234-abc.wxyz
+   *
+   * NOTE that the region name in the clone changes (md5 of regioninfo)
+   * and the reference should reflect that change.
+   * 
+ * @param familyDir destination directory for the store file + * @param regionInfo destination region info for the table + * @param hfileName reference file name + */ + private void restoreReferenceFile(final Path familyDir, final HRegionInfo regionInfo, + final String hfileName) throws IOException { + // Extract the referred information (hfile name and parent region) + String tableName = snapshotDesc.getTable(); + Path refPath = StoreFile.getReferredToFile(new Path(new Path(new Path(tableName, + regionInfo.getEncodedName()), familyDir.getName()), hfileName)); + String snapshotRegionName = refPath.getParent().getParent().getName(); + String fileName = refPath.getName(); + + // The new reference should have the cloned region name as parent, if it is a clone. + String clonedRegionName = Bytes.toString(regionsMap.get(Bytes.toBytes(snapshotRegionName))); + if (clonedRegionName == null) clonedRegionName = snapshotRegionName; + + // The output file should be a reference link table=snapshotRegion-fileName.clonedRegionName + String refLink = fileName; + if (!HFileLink.isHFileLink(fileName)) { + refLink = HFileLink.createHFileLinkName(tableName, snapshotRegionName, fileName); + } + Path outPath = new Path(familyDir, refLink + '.' + clonedRegionName); + + // Create the new reference + Path linkPath = new Path(familyDir, + HFileLink.createHFileLinkName(tableName, regionInfo.getEncodedName(), hfileName)); + InputStream in = new HFileLink(conf, linkPath).open(fs); + OutputStream out = fs.create(outPath); + IOUtils.copyBytes(in, out, conf); + } + + /** + * Create a new {@link HRegionInfo} from the snapshot region info. + * Keep the same startKey, endKey, regionId and split information but change + * the table name. + * + * @param snapshotRegionInfo Info for region to clone. + * @return the new HRegion instance + */ + public HRegionInfo cloneRegionInfo(final HRegionInfo snapshotRegionInfo) { + return new HRegionInfo(tableDesc.getName(), + snapshotRegionInfo.getStartKey(), snapshotRegionInfo.getEndKey(), + snapshotRegionInfo.isSplit(), snapshotRegionInfo.getRegionId()); + } + + /** + * Restore snapshot WALs. + * + * Global Snapshot keep a reference to region servers logs present during the snapshot. + * (/hbase/.snapshot/snapshotName/.logs/hostName/logName) + * + * Since each log contains different tables data, logs must be split to + * extract the table that we are interested in. + */ + private void restoreWALs() throws IOException { + final SnapshotLogSplitter logSplitter = new SnapshotLogSplitter(conf, fs, tableDir, + Bytes.toBytes(snapshotDesc.getTable()), regionsMap); + try { + // Recover.Edits + SnapshotReferenceUtil.visitRecoveredEdits(fs, snapshotDir, + new FSVisitor.RecoveredEditsVisitor() { + public void recoveredEdits (final String region, final String logfile) throws IOException { + Path path = SnapshotReferenceUtil.getRecoveredEdits(snapshotDir, region, logfile); + logSplitter.splitRecoveredEdit(path); + } + }); + + // Region Server Logs + SnapshotReferenceUtil.visitLogFiles(fs, snapshotDir, new FSVisitor.LogFileVisitor() { + public void logFile (final String server, final String logfile) throws IOException { + logSplitter.splitLog(server, logfile); + } + }); + } finally { + logSplitter.close(); + } + } + + /** + * @return the set of the regions contained in the table + */ + private List getTableRegions() throws IOException { + LOG.debug("get table regions: " + tableDir); + FileStatus[] regionDirs = FSUtils.listStatus(fs, tableDir, new FSUtils.RegionDirFilter(fs)); + if (regionDirs == null) return null; + + List regions = new LinkedList(); + for (FileStatus regionDir: regionDirs) { + HRegionInfo hri = HRegion.loadDotRegionInfoFileContent(fs, regionDir.getPath()); + regions.add(hri); + } + LOG.debug("found " + regions.size() + " regions for table=" + tableDesc.getNameAsString()); + return regions; + } + + /** + * Create a new table descriptor cloning the snapshot table schema. + * + * @param snapshotTableDescriptor + * @param tableName + * @return cloned table descriptor + * @throws IOException + */ + public static HTableDescriptor cloneTableSchema(final HTableDescriptor snapshotTableDescriptor, + final byte[] tableName) throws IOException { + HTableDescriptor htd = new HTableDescriptor(tableName); + for (HColumnDescriptor hcd: snapshotTableDescriptor.getColumnFamilies()) { + htd.addFamily(hcd); + } + return htd; + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotCreationException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotCreationException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotCreationException.java (revision 0) @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + +/** + * Thrown when a snapshot could not be created due to a server-side error when taking the snapshot. + */ +@SuppressWarnings("serial") +public class SnapshotCreationException extends HBaseSnapshotException { + + /** + * Used internally by the RPC engine to pass the exception back to the client. + * @param msg error message to pass back + */ + public SnapshotCreationException(String msg) { + super(msg); + } + + /** + * Failure to create the specified snapshot + * @param msg reason why the snapshot couldn't be completed + * @param desc description of the snapshot attempted + */ + public SnapshotCreationException(String msg, SnapshotDescription desc) { + super(msg, desc); + } + + /** + * Failure to create the specified snapshot due to an external cause + * @param msg reason why the snapshot couldn't be completed + * @param cause root cause of the failure + * @param desc description of the snapshot attempted + */ + public SnapshotCreationException(String msg, Throwable cause, SnapshotDescription desc) { + super(msg, cause, desc); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/CorruptedSnapshotException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/CorruptedSnapshotException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/CorruptedSnapshotException.java (revision 0) @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + + +/** + * Exception thrown when the found snapshot info from the filesystem is not valid + */ +@SuppressWarnings("serial") +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class CorruptedSnapshotException extends HBaseSnapshotException { + + /** + * @param message message describing the exception + * @param e cause + */ + public CorruptedSnapshotException(String message, Exception e) { + super(message, e); + } + + /** + * Snapshot was corrupt for some reason + * @param message full description of the failure + * @param snapshot snapshot that was expected + */ + public CorruptedSnapshotException(String message, SnapshotDescription snapshot) { + super(message, snapshot); + } + + /** + * @param message message describing the exception + */ + public CorruptedSnapshotException(String message) { + super(message, (SnapshotDescription)null); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/TablePartiallyOpenException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/TablePartiallyOpenException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/TablePartiallyOpenException.java (revision 0) @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Thrown if a table should be online/offline but is partially open + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class TablePartiallyOpenException extends IOException { + private static final long serialVersionUID = 3571982660065058361L; + + public TablePartiallyOpenException() { + super(); + } + + /** + * @param s message + */ + public TablePartiallyOpenException(String s) { + super(s); + } + + /** + * @param tableName Name of table that is partial open + */ + public TablePartiallyOpenException(byte[] tableName) { + this(Bytes.toString(tableName)); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/HSnapshotDescription.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/HSnapshotDescription.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/HSnapshotDescription.java (revision 0) @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.io.DataInput; +import java.io.DataOutput; + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Writable version of the SnapshotDescription used by the rpc + */ +public class HSnapshotDescription implements Writable { + private SnapshotDescription proto; + + public HSnapshotDescription() { + } + + public HSnapshotDescription(final SnapshotDescription proto) { + assert proto != null : "proto must be non-null"; + this.proto = proto; + } + + public String getName() { + return this.proto.getName(); + } + + public SnapshotDescription getProto() { + return this.proto; + } + + public SnapshotDescription.Type getType() { + return this.proto.getType(); + } + + public String getTable() { + return this.proto.getTable(); + } + + public boolean hasTable() { + return this.proto.hasTable(); + } + + public long getCreationTime() { + return this.proto.getCreationTime(); + } + + public int getVersion() { + return this.proto.getVersion(); + } + + public String toString() { + if (this.proto != null) { + return this.proto.toString(); + } + return "(no snapshot)"; + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof HSnapshotDescription)) { + return false; + } + SnapshotDescription oproto = ((HSnapshotDescription)obj).getProto(); + if (this.proto == oproto) { + return true; + } + if (this.proto == null && oproto != null) { + return false; + } + return this.proto.equals(oproto); + } + + // Writable + /** + * INTERNAL This method is a part of {@link Writable} interface + * and is used for de-serialization of the HTableDescriptor over RPC + */ + @Override + public void readFields(DataInput in) throws IOException { + byte[] data = Bytes.readByteArray(in); + if (data.length > 0) { + this.proto = SnapshotDescription.parseFrom(data); + } else { + this.proto = null; + } + } + + /** + * INTERNAL This method is a part of {@link Writable} interface + * and is used for serialization of the HTableDescriptor over RPC + */ + @Override + public void write(DataOutput out) throws IOException { + if (this.proto != null) { + Bytes.writeByteArray(out, this.proto.toByteArray()); + } else { + Bytes.writeByteArray(out, new byte[0]); + } + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotInfo.java (revision 0) @@ -0,0 +1,303 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.io.FileNotFoundException; +import java.text.SimpleDateFormat; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.Date; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.io.HLogLink; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.FSTableDescriptors; + +/** + * Tool for dumping snapshot information. + *
    + *
  1. Table Descriptor + *
  2. Snapshot creation time, type, format version, ... + *
  3. List of hfiles and hlogs + *
  4. Stats about hfiles and logs sizes, percentage of shared with the source table, ... + *
+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class SnapshotInfo extends Configured implements Tool { + private static final Log LOG = LogFactory.getLog(SnapshotInfo.class); + + private FileSystem fs; + private Path rootDir; + + private HTableDescriptor snapshotTableDesc; + private SnapshotDescription snapshotDesc; + private Path snapshotDir; + + @Override + public int run(String[] args) throws IOException, InterruptedException { + String snapshotName = null; + boolean showSchema = false; + boolean showFiles = false; + boolean showStats = false; + + // Process command line args + for (int i = 0; i < args.length; i++) { + String cmd = args[i]; + try { + if (cmd.equals("-snapshot")) { + snapshotName = args[++i]; + } else if (cmd.equals("-files")) { + showFiles = true; + } else if (cmd.equals("-stats")) { + showStats = true; + } else if (cmd.equals("-schema")) { + showSchema = true; + } else if (cmd.equals("-h") || cmd.equals("--help")) { + printUsageAndExit(); + } else { + System.err.println("UNEXPECTED: " + cmd); + printUsageAndExit(); + } + } catch (Exception e) { + printUsageAndExit(); + } + } + + if (snapshotName == null) { + System.err.println("Missing snapshot name!"); + printUsageAndExit(); + return 1; + } + + Configuration conf = getConf(); + fs = FileSystem.get(conf); + rootDir = FSUtils.getRootDir(conf); + + // Load snapshot information + if (!loadSnapshotInfo(snapshotName)) { + System.err.println("Snapshot '" + snapshotName + "' not found!"); + return 1; + } + + printInfo(); + if (showSchema) printSchema(); + if (showFiles || showStats) printFiles(showFiles); + + return 0; + } + + /** + * Load snapshot info and table descriptor for the specified snapshot + * @param snapshotName name of the snapshot to load + * @return false if snapshot is not found + */ + private boolean loadSnapshotInfo(final String snapshotName) throws IOException { + snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + if (!fs.exists(snapshotDir)) { + LOG.warn("Snapshot '" + snapshotName + "' not found in: " + snapshotDir); + return false; + } + + snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); + snapshotTableDesc = FSTableDescriptors.getTableDescriptor(fs, snapshotDir); + return true; + } + + /** + * Dump the {@link SnapshotDescription} + */ + private void printInfo() { + SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); + System.out.println("Snapshot Info"); + System.out.println("----------------------------------------"); + System.out.println(" Name: " + snapshotDesc.getName()); + System.out.println(" Type: " + snapshotDesc.getType()); + System.out.println(" Table: " + snapshotDesc.getTable()); + System.out.println(" Format: " + snapshotDesc.getVersion()); + System.out.println("Created: " + df.format(new Date(snapshotDesc.getCreationTime()))); + System.out.println(); + } + + /** + * Dump the {@link HTableDescriptor} + */ + private void printSchema() { + System.out.println("Table Descriptor"); + System.out.println("----------------------------------------"); + System.out.println(snapshotTableDesc.toString()); + System.out.println(); + } + + /** + * Collect the hfiles and logs statistics of the snapshot and + * dump the file list if requested and the collected information. + */ + private void printFiles(final boolean showFiles) throws IOException { + final String table = snapshotDesc.getTable(); + final Configuration conf = getConf(); + + if (showFiles) { + System.out.println("Snapshot Files"); + System.out.println("----------------------------------------"); + } + + // Collect information about hfiles and logs in the snapshot + final AtomicInteger hfileArchiveCount = new AtomicInteger(); + final AtomicInteger hfilesMissing = new AtomicInteger(); + final AtomicInteger hfilesCount = new AtomicInteger(); + final AtomicInteger logsMissing = new AtomicInteger(); + final AtomicInteger logsCount = new AtomicInteger(); + final AtomicLong hfileArchiveSize = new AtomicLong(); + final AtomicLong hfileSize = new AtomicLong(); + final AtomicLong logSize = new AtomicLong(); + SnapshotReferenceUtil.visitReferencedFiles(fs, snapshotDir, + new SnapshotReferenceUtil.FileVisitor() { + public void storeFile (final String region, final String family, final String hfile) + throws IOException { + Path path = new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); + HFileLink link = new HFileLink(conf, path); + boolean inArchive = false; + long size = -1; + try { + if ((inArchive = fs.exists(link.getArchivePath()))) { + size = fs.getFileStatus(link.getArchivePath()).getLen(); + hfileArchiveSize.addAndGet(size); + hfileArchiveCount.addAndGet(1); + } else { + size = link.getFileStatus(fs).getLen(); + hfileSize.addAndGet(size); + hfilesCount.addAndGet(1); + } + } catch (FileNotFoundException e) { + hfilesMissing.addAndGet(1); + } + + if (showFiles) { + System.out.printf("%8s %s/%s/%s/%s %s%n", + (size < 0 ? "-" : StringUtils.humanReadableInt(size)), + table, region, family, hfile, + (inArchive ? "(archive)" : (size < 0) ? "(NOT FOUND)" : "")); + } + } + + public void recoveredEdits (final String region, final String logfile) + throws IOException { + Path path = SnapshotReferenceUtil.getRecoveredEdits(snapshotDir, region, logfile); + long size = fs.getFileStatus(path).getLen(); + logSize.addAndGet(size); + logsCount.addAndGet(1); + + if (showFiles) { + System.out.printf("%8s recovered.edits %s on region %s%n", + StringUtils.humanReadableInt(size), logfile, region); + } + } + + public void logFile (final String server, final String logfile) + throws IOException { + HLogLink logLink = new HLogLink(conf, server, logfile); + long size = -1; + try { + size = logLink.getFileStatus(fs).getLen(); + logSize.addAndGet(size); + logsCount.addAndGet(1); + } catch (FileNotFoundException e) { + logsMissing.addAndGet(1); + } + + if (showFiles) { + System.out.printf("%8s log %s on server %s %s%n", + (size < 0 ? "-" : StringUtils.humanReadableInt(size)), + logfile, server, + (size < 0 ? "(NOT FOUND)" : "")); + } + } + }); + + // Dump the stats + System.out.println(); + if (hfilesMissing.get() > 0 || logsMissing.get() > 0) { + System.out.println("**************************************************************"); + System.out.printf("BAD SNAPSHOT: %d hfile(s) and %d log(s) missing.%n", + hfilesMissing.get(), logsMissing.get()); + System.out.println("**************************************************************"); + } + + System.out.printf("%d HFiles (%d in archive), total size %s (%.2f%% %s shared with the source table)%n", + hfilesCount.get() + hfileArchiveCount.get(), hfileArchiveCount.get(), + StringUtils.humanReadableInt(hfileSize.get() + hfileArchiveSize.get()), + ((float)hfileSize.get() / (hfileSize.get() + hfileArchiveSize.get())) * 100, + StringUtils.humanReadableInt(hfileSize.get()) + ); + System.out.printf("%d Logs, total size %s%n", + logsCount.get(), StringUtils.humanReadableInt(logSize.get())); + System.out.println(); + } + + private void printUsageAndExit() { + System.err.printf("Usage: bin/hbase %s [options]%n", getClass().getName()); + System.err.println(" where [options] are:"); + System.err.println(" -h|-help Show this help and exit."); + System.err.println(" -snapshot NAME Snapshot to examine."); + System.err.println(" -files Files and logs list."); + System.err.println(" -stats Files and logs stats."); + System.err.println(" -schema Describe the snapshotted table."); + System.err.println(); + System.err.println("Examples:"); + System.err.println(" hbase " + getClass() + " \\"); + System.err.println(" -snapshot MySnapshot -files"); + System.exit(1); + } + + /** + * The guts of the {@link #main} method. + * Call this method to avoid the {@link #main(String[])} System.exit. + * @param args + * @return errCode + * @throws Exception + */ + static int innerMain(final String [] args) throws Exception { + return ToolRunner.run(HBaseConfiguration.create(), new SnapshotInfo(), args); + } + + public static void main(String[] args) throws Exception { + System.exit(innerMain(args)); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java (revision 0) @@ -0,0 +1,251 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.util.HashSet; +import java.util.TreeMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileStatus; + +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.io.Reference; +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.FSVisitor; + +/** + * Utility methods for interacting with the snapshot referenced files. + */ +@InterfaceAudience.Private +public final class SnapshotReferenceUtil { + public interface FileVisitor extends FSVisitor.StoreFileVisitor, + FSVisitor.RecoveredEditsVisitor, FSVisitor.LogFileVisitor { + } + + private SnapshotReferenceUtil() { + // private constructor for utility class + } + + /** + * Get log directory for a server in a snapshot. + * + * @param snapshotDir directory where the specific snapshot is stored + * @param serverName name of the parent regionserver for the log files + * @return path to the log home directory for the archive files. + */ + public static Path getLogsDir(Path snapshotDir, String serverName) { + return new Path(snapshotDir, HLog.getHLogDirectoryName(serverName)); + } + + /** + * Get the snapshotted recovered.edits dir for the specified region. + * + * @param snapshotDir directory where the specific snapshot is stored + * @param regionName name of the region + * @return path to the recovered.edits directory for the specified region files. + */ + public static Path getRecoveredEditsDir(Path snapshotDir, String regionName) { + return HLog.getRegionDirRecoveredEditsDir(new Path(snapshotDir, regionName)); + } + + /** + * Get the snapshot recovered.edits file + * + * @param snapshotDir directory where the specific snapshot is stored + * @param regionName name of the region + * @param logfile name of the edit file + * @return full path of the log file for the specified region files. + */ + public static Path getRecoveredEdits(Path snapshotDir, String regionName, String logfile) { + return new Path(getRecoveredEditsDir(snapshotDir, regionName), logfile); + } + + /** + * Iterate over the snapshot store files, restored.edits and logs + * + * @param fs {@link FileSystem} + * @param snapshotDir {@link Path} to the Snapshot directory + * @param visitor callback object to get the referenced files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitReferencedFiles(final FileSystem fs, final Path snapshotDir, + final FileVisitor visitor) throws IOException { + visitTableStoreFiles(fs, snapshotDir, visitor); + visitRecoveredEdits(fs, snapshotDir, visitor); + visitLogFiles(fs, snapshotDir, visitor); + } + + /** + * Iterate over the snapshot store files + * + * @param fs {@link FileSystem} + * @param snapshotDir {@link Path} to the Snapshot directory + * @param visitor callback object to get the store files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitTableStoreFiles(final FileSystem fs, final Path snapshotDir, + final FSVisitor.StoreFileVisitor visitor) throws IOException { + FSVisitor.visitTableStoreFiles(fs, snapshotDir, visitor); + } + + /** + * Iterate over the snapshot store files in the specified region + * + * @param fs {@link FileSystem} + * @param regionDir {@link Path} to the Snapshot region directory + * @param visitor callback object to get the store files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitRegionStoreFiles(final FileSystem fs, final Path regionDir, + final FSVisitor.StoreFileVisitor visitor) throws IOException { + FSVisitor.visitRegionStoreFiles(fs, regionDir, visitor); + } + + /** + * Iterate over the snapshot recovered.edits + * + * @param fs {@link FileSystem} + * @param snapshotDir {@link Path} to the Snapshot directory + * @param visitor callback object to get the recovered.edits files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitRecoveredEdits(final FileSystem fs, final Path snapshotDir, + final FSVisitor.RecoveredEditsVisitor visitor) throws IOException { + FSVisitor.visitTableRecoveredEdits(fs, snapshotDir, visitor); + } + + /** + * Iterate over the snapshot log files + * + * @param fs {@link FileSystem} + * @param snapshotDir {@link Path} to the Snapshot directory + * @param visitor callback object to get the log files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitLogFiles(final FileSystem fs, final Path snapshotDir, + final FSVisitor.LogFileVisitor visitor) throws IOException { + FSVisitor.visitLogFiles(fs, snapshotDir, visitor); + } + + /** + * Returns the set of region names available in the snapshot. + * + * @param fs {@link FileSystem} + * @param snapshotDir {@link Path} to the Snapshot directory + * @throws IOException if an error occurred while scanning the directory + * @return the set of the regions contained in the snapshot + */ + public static Set getSnapshotRegionNames(final FileSystem fs, final Path snapshotDir) + throws IOException { + FileStatus[] regionDirs = FSUtils.listStatus(fs, snapshotDir, new FSUtils.RegionDirFilter(fs)); + if (regionDirs == null) return null; + + Set regions = new HashSet(); + for (FileStatus regionDir: regionDirs) { + regions.add(regionDir.getPath().getName()); + } + return regions; + } + + /** + * Get the list of hfiles for the specified snapshot region. + * NOTE: The current implementation keeps one empty file per HFile in the region. + * The file name matches the one in the original table, and by reconstructing + * the path you can quickly jump to the referenced file. + * + * @param fs {@link FileSystem} + * @param snapshotRegionDir {@link Path} to the Snapshot region directory + * @return Map of hfiles per family, the key is the family name and values are hfile names + * @throws IOException if an error occurred while scanning the directory + */ + public static Map> getRegionHFileReferences(final FileSystem fs, + final Path snapshotRegionDir) throws IOException { + final Map> familyFiles = new TreeMap>(); + + visitRegionStoreFiles(fs, snapshotRegionDir, + new FSVisitor.StoreFileVisitor() { + public void storeFile (final String region, final String family, final String hfile) + throws IOException { + List hfiles = familyFiles.get(family); + if (hfiles == null) { + hfiles = new LinkedList(); + familyFiles.put(family, hfiles); + } + hfiles.add(hfile); + } + }); + + return familyFiles; + } + + /** + * Returns the store file names in the snapshot. + * + * @param fs {@link FileSystem} + * @param snapshotDir {@link Path} to the Snapshot directory + * @throws IOException if an error occurred while scanning the directory + * @return the names of hfiles in the specified snaphot + */ + public static Set getHFileNames(final FileSystem fs, final Path snapshotDir) + throws IOException { + final Set names = new HashSet(); + visitTableStoreFiles(fs, snapshotDir, new FSVisitor.StoreFileVisitor() { + public void storeFile (final String region, final String family, final String hfile) + throws IOException { + if (HFileLink.isHFileLink(hfile)) { + names.add(HFileLink.getReferencedHFileName(hfile)); + } else { + names.add(hfile); + } + } + }); + return names; + } + + /** + * Returns the log file names available in the snapshot. + * + * @param fs {@link FileSystem} + * @param snapshotDir {@link Path} to the Snapshot directory + * @throws IOException if an error occurred while scanning the directory + * @return the names of hlogs in the specified snaphot + */ + public static Set getHLogNames(final FileSystem fs, final Path snapshotDir) + throws IOException { + final Set names = new HashSet(); + visitLogFiles(fs, snapshotDir, new FSVisitor.LogFileVisitor() { + public void logFile (final String server, final String logfile) throws IOException { + names.add(logfile); + } + }); + return names; + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotExistsException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotExistsException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotExistsException.java (revision 0) @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + +/** + * Thrown when a snapshot exists but should not + */ +@SuppressWarnings("serial") +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class SnapshotExistsException extends HBaseSnapshotException { + + /** + * Failure due to the snapshot already existing + * @param msg full description of the failure + * @param desc snapshot that was attempted + */ + public SnapshotExistsException(String msg, SnapshotDescription desc) { + super(msg, desc); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshotException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshotException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshotException.java (revision 0) @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Thrown when a snapshot could not be exported due to an error during the operation. + */ +@InterfaceAudience.Public +@SuppressWarnings("serial") +public class ExportSnapshotException extends HBaseSnapshotException { + + /** + * @param msg message describing the exception + */ + public ExportSnapshotException(String msg) { + super(msg); + } + + /** + * @param message message describing the exception + * @param e cause + */ + public ExportSnapshotException(String message, Exception e) { + super(message, e); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/ReferenceRegionHFilesTask.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/ReferenceRegionHFilesTask.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/ReferenceRegionHFilesTask.java (revision 0) @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Reference all the hfiles in a region for a snapshot. + *

+ * Doesn't take into acccount if the hfiles are valid or not, just keeps track of what's in the + * region's directory. + */ +public class ReferenceRegionHFilesTask extends SnapshotTask { + + public static final Log LOG = LogFactory.getLog(ReferenceRegionHFilesTask.class); + private final Path regiondir; + private final FileSystem fs; + private final PathFilter fileFilter; + private final Path snapshotDir; + + /** + * Reference all the files in the given region directory + * @param snapshot snapshot for which to add references + * @param monitor to check/send error + * @param regionDir region directory to look for errors + * @param fs {@link FileSystem} where the snapshot/region live + * @param regionSnapshotDir directory in the snapshot to store region files + */ + public ReferenceRegionHFilesTask(final SnapshotDescription snapshot, + ForeignExceptionDispatcher monitor, Path regionDir, final FileSystem fs, Path regionSnapshotDir) { + super(snapshot, monitor); + this.regiondir = regionDir; + this.fs = fs; + + this.fileFilter = new PathFilter() { + @Override + public boolean accept(Path path) { + try { + return fs.isFile(path); + } catch (IOException e) { + LOG.error("Failed to reach fs to check file:" + path + ", marking as not file"); + ReferenceRegionHFilesTask.this.snapshotFailure("Failed to reach fs to check file status", + e); + return false; + } + } + }; + this.snapshotDir = regionSnapshotDir; + } + + @Override + public Void call() throws IOException { + FileStatus[] families = FSUtils.listStatus(fs, regiondir, new FSUtils.FamilyDirFilter(fs)); + + // if no families, then we are done again + if (families == null || families.length == 0) { + LOG.info("No families under region directory:" + regiondir + + ", not attempting to add references."); + return null; + } + + // snapshot directories to store the hfile reference + List snapshotFamilyDirs = TakeSnapshotUtils.getFamilySnapshotDirectories(snapshot, + snapshotDir, families); + + LOG.debug("Add hfile references to snapshot directories:" + snapshotFamilyDirs); + for (int i = 0; i < families.length; i++) { + FileStatus family = families[i]; + Path familyDir = family.getPath(); + // get all the hfiles in the family + FileStatus[] hfiles = FSUtils.listStatus(fs, familyDir, fileFilter); + + // if no hfiles, then we are done with this family + if (hfiles == null || hfiles.length == 0) { + LOG.debug("Not hfiles found for family: " + familyDir + ", skipping."); + continue; + } + + // make the snapshot's family directory + Path snapshotFamilyDir = snapshotFamilyDirs.get(i); + fs.mkdirs(snapshotFamilyDir); + + // create a reference for each hfile + for (FileStatus hfile : hfiles) { + // references are 0-length files, relying on file name. + Path referenceFile = new Path(snapshotFamilyDir, hfile.getPath().getName()); + LOG.debug("Creating reference for:" + hfile.getPath() + " at " + referenceFile); + if (!fs.createNewFile(referenceFile)) { + throw new IOException("Failed to create reference file:" + referenceFile); + } + } + } + if (LOG.isDebugEnabled()) { + LOG.debug("Finished referencing hfiles, current region state:"); + FSUtils.logFileSystemState(fs, regiondir, LOG); + LOG.debug("and the snapshot directory:"); + FSUtils.logFileSystemState(fs, snapshotDir, LOG); + } + return null; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/snapshot/TableInfoCopyTask.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/TableInfoCopyTask.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/TableInfoCopyTask.java (revision 0) @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSTableDescriptors; + +/** + * Copy the table info into the snapshot directory + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class TableInfoCopyTask extends SnapshotTask { + + public static final Log LOG = LogFactory.getLog(TableInfoCopyTask.class); + private final FileSystem fs; + private final Path rootDir; + + /** + * Copy the table info for the given table into the snapshot + * @param monitor listen for errors while running the snapshot + * @param snapshot snapshot for which we are copying the table info + * @param fs {@link FileSystem} where the tableinfo is stored (and where the copy will be written) + * @param rootDir root of the {@link FileSystem} where the tableinfo is stored + */ + public TableInfoCopyTask(ForeignExceptionDispatcher monitor, + SnapshotDescription snapshot, FileSystem fs, Path rootDir) { + super(snapshot, monitor); + this.rootDir = rootDir; + this.fs = fs; + } + + @Override + public Void call() throws Exception { + LOG.debug("Running table info copy."); + this.rethrowException(); + LOG.debug("Attempting to copy table info for snapshot:" + + SnapshotDescriptionUtils.toString(this.snapshot)); + // get the HTable descriptor + HTableDescriptor orig = FSTableDescriptors.getTableDescriptor(fs, rootDir, + Bytes.toBytes(this.snapshot.getTable())); + this.rethrowException(); + // write a copy of descriptor to the snapshot directory + Path snapshotDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); + FSTableDescriptors.createTableDescriptorForTableDirectory(fs, snapshotDir, orig, false); + LOG.debug("Finished copying tableinfo."); + return null; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/snapshot/HBaseSnapshotException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/HBaseSnapshotException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/HBaseSnapshotException.java (revision 0) @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + +/** + * General exception base class for when a snapshot fails + */ +@SuppressWarnings("serial") +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class HBaseSnapshotException extends HBaseIOException { + + private SnapshotDescription description; + + /** + * Some exception happened for a snapshot and don't even know the snapshot that it was about + * @param msg Full description of the failure + */ + public HBaseSnapshotException(String msg) { + super(msg); + } + + /** + * Exception for the given snapshot that has no previous root cause + * @param msg reason why the snapshot failed + * @param desc description of the snapshot that is being failed + */ + public HBaseSnapshotException(String msg, SnapshotDescription desc) { + super(msg); + this.description = desc; + } + + /** + * Exception for the given snapshot due to another exception + * @param msg reason why the snapshot failed + * @param cause root cause of the failure + * @param desc description of the snapshot that is being failed + */ + public HBaseSnapshotException(String msg, Throwable cause, SnapshotDescription desc) { + super(msg, cause); + this.description = desc; + } + + /** + * Exception when the description of the snapshot cannot be determined, due to some root other + * root cause + * @param message description of what caused the failure + * @param e root cause + */ + public HBaseSnapshotException(String message, Exception e) { + super(message, e); + } + + public SnapshotDescription getSnapshotDescription() { + return this.description; + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotException.java (revision 0) @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + +/** + * Thrown when a snapshot could not be restored due to a server-side error when restoring it. + */ +@SuppressWarnings("serial") +public class RestoreSnapshotException extends HBaseSnapshotException { + public RestoreSnapshotException(String msg, SnapshotDescription desc) { + super(msg, desc); + } + + public RestoreSnapshotException(String msg, Throwable cause, SnapshotDescription desc) { + super(msg, cause, desc); + } + + public RestoreSnapshotException(String msg) { + super(msg); + } + + public RestoreSnapshotException(String message, Exception e) { + super(message, e); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotTask.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotTask.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotTask.java (revision 0) @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.util.concurrent.Callable; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + +/** + * General snapshot operation taken on a regionserver + */ +@InterfaceAudience.Private +public abstract class SnapshotTask implements ForeignExceptionSnare, Callable{ + + protected final SnapshotDescription snapshot; + protected final ForeignExceptionDispatcher errorMonitor; + + /** + * @param snapshot Description of the snapshot we are going to operate on + * @param monitor listener interested in failures to the snapshot caused by this operation + */ + public SnapshotTask(SnapshotDescription snapshot, ForeignExceptionDispatcher monitor) { + assert monitor != null : "ForeignExceptionDispatcher must not be null!"; + assert snapshot != null : "SnapshotDescription must not be null!"; + this.snapshot = snapshot; + this.errorMonitor = monitor; + } + + public void snapshotFailure(String message, Exception e) { + ForeignException ee = new ForeignException(message, e); + errorMonitor.receive(ee); + } + + @Override + public void rethrowException() throws ForeignException { + this.errorMonitor.rethrowException(); + } + + @Override + public boolean hasException() { + return this.errorMonitor.hasException(); + } + + @Override + public ForeignException getException() { + return this.errorMonitor.getException(); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java (revision 0) @@ -0,0 +1,360 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Utility class to help manage {@link SnapshotDescription SnapshotDesriptions}. + *

+ * Snapshots are laid out on disk like this: + * + *

+ * /hbase/.snapshots
+ *          /.tmp                <---- working directory
+ *          /[snapshot name]     <----- completed snapshot
+ * 
+ * + * A completed snapshot named 'completed' then looks like (multiple regions, servers, files, etc. + * signified by '...' on the same directory depth). + * + *
+ * /hbase/.snapshots/completed
+ *                   .snapshotinfo          <--- Description of the snapshot
+ *                   .tableinfo             <--- Copy of the tableinfo
+ *                    /.logs
+ *                        /[server_name]
+ *                            /... [log files]
+ *                         ...
+ *                   /[region name]           <---- All the region's information
+ *                   .regioninfo              <---- Copy of the HRegionInfo
+ *                      /[column family name]
+ *                          /[hfile name]     <--- name of the hfile in the real region
+ *                          ...
+ *                      ...
+ *                    ...
+ * 
+ * + * Utility methods in this class are useful for getting the correct locations for different parts of + * the snapshot, as well as moving completed snapshots into place (see + * {@link #completeSnapshot}, and writing the + * {@link SnapshotDescription} to the working snapshot directory. + */ +public class SnapshotDescriptionUtils { + + /** + * Filter that only accepts completed snapshot directories + */ + public static class CompletedSnaphotDirectoriesFilter extends FSUtils.DirFilter { + + /** + * @param fs + */ + public CompletedSnaphotDirectoriesFilter(FileSystem fs) { + super(fs); + } + + @Override + public boolean accept(Path path) { + // only accept directories that aren't the tmp directory + if (super.accept(path)) { + return !path.getName().equals(SNAPSHOT_TMP_DIR_NAME); + } + return false; + } + + } + + private static final Log LOG = LogFactory.getLog(SnapshotDescriptionUtils.class); + /** + * Version of the fs layout for a snapshot. Future snapshots may have different file layouts, + * which we may need to read in differently. + */ + public static final int SNAPSHOT_LAYOUT_VERSION = 0; + + // snapshot directory constants + /** + * The file contains the snapshot basic information and it is under the directory of a snapshot. + */ + public static final String SNAPSHOTINFO_FILE = ".snapshotinfo"; + + /** Temporary directory under the snapshot directory to store in-progress snapshots */ + public static final String SNAPSHOT_TMP_DIR_NAME = ".tmp"; + // snapshot operation values + /** Default value if no start time is specified */ + public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0; + + public static final String MASTER_SNAPSHOT_TIMEOUT_MILLIS = "hbase.snapshot.master.timeout.millis"; + + /** By default, wait 60 seconds for a snapshot to complete */ + public static final long DEFAULT_MAX_WAIT_TIME = 60000; + + private SnapshotDescriptionUtils() { + // private constructor for utility class + } + + /** + * Check to make sure that the description of the snapshot requested is valid + * @param snapshot description of the snapshot + * @throws IllegalArgumentException if the name of the snapshot or the name of the table to + * snapshot are not valid names. + */ + public static void assertSnapshotRequestIsValid(SnapshotDescription snapshot) + throws IllegalArgumentException { + // FIXME these method names is really bad - trunk will probably change + // .META. and -ROOT- snapshots are not allowed + if (HTableDescriptor.isMetaTable(Bytes.toBytes(snapshot.getTable()))) { + throw new IllegalArgumentException(".META. and -ROOT- snapshots are not allowed"); + } + // make sure the snapshot name is valid + HTableDescriptor.isLegalTableName(Bytes.toBytes(snapshot.getName())); + // make sure the table name is valid + HTableDescriptor.isLegalTableName(Bytes.toBytes(snapshot.getTable())); + } + + /** + * @param conf {@link Configuration} from which to check for the timeout + * @param type type of snapshot being taken + * @param defaultMaxWaitTime Default amount of time to wait, if none is in the configuration + * @return the max amount of time the master should wait for a snapshot to complete + */ + public static long getMaxMasterTimeout(Configuration conf, SnapshotDescription.Type type, + long defaultMaxWaitTime) { + String confKey; + switch (type) { + case DISABLED: + default: + confKey = MASTER_SNAPSHOT_TIMEOUT_MILLIS; + } + return conf.getLong(confKey, defaultMaxWaitTime); + } + + /** + * Get the snapshot root directory. All the snapshots are kept under this directory, i.e. + * ${hbase.rootdir}/.snapshot + * @param rootDir hbase root directory + * @return the base directory in which all snapshots are kept + */ + public static Path getSnapshotRootDir(final Path rootDir) { + return new Path(rootDir, HConstants.SNAPSHOT_DIR_NAME); + } + + /** + * Get the directory for a specified snapshot. This directory is a sub-directory of snapshot root + * directory and all the data files for a snapshot are kept under this directory. + * @param snapshot snapshot being taken + * @param rootDir hbase root directory + * @return the final directory for the completed snapshot + */ + public static Path getCompletedSnapshotDir(final SnapshotDescription snapshot, final Path rootDir) { + return getCompletedSnapshotDir(snapshot.getName(), rootDir); + } + + /** + * Get the directory for a completed snapshot. This directory is a sub-directory of snapshot root + * directory and all the data files for a snapshot are kept under this directory. + * @param snapshotName name of the snapshot being taken + * @param rootDir hbase root directory + * @return the final directory for the completed snapshot + */ + public static Path getCompletedSnapshotDir(final String snapshotName, final Path rootDir) { + return getCompletedSnapshotDir(getSnapshotsDir(rootDir), snapshotName); + } + + /** + * Get the general working directory for snapshots - where they are built, where they are + * temporarily copied on export, etc. + * @param rootDir root directory of the HBase installation + * @return Path to the snapshot tmp directory, relative to the passed root directory + */ + public static Path getWorkingSnapshotDir(final Path rootDir) { + return new Path(getSnapshotsDir(rootDir), SNAPSHOT_TMP_DIR_NAME); + } + + /** + * Get the directory to build a snapshot, before it is finalized + * @param snapshot snapshot that will be built + * @param rootDir root directory of the hbase installation + * @return {@link Path} where one can build a snapshot + */ + public static Path getWorkingSnapshotDir(SnapshotDescription snapshot, final Path rootDir) { + return getCompletedSnapshotDir(getWorkingSnapshotDir(rootDir), snapshot.getName()); + } + + /** + * Get the directory to build a snapshot, before it is finalized + * @param snapshotName name of the snapshot + * @param rootDir root directory of the hbase installation + * @return {@link Path} where one can build a snapshot + */ + public static Path getWorkingSnapshotDir(String snapshotName, final Path rootDir) { + return getCompletedSnapshotDir(getWorkingSnapshotDir(rootDir), snapshotName); + } + + /** + * Get the directory to store the snapshot instance + * @param snapshotsDir hbase-global directory for storing all snapshots + * @param snapshotName name of the snapshot to take + * @return + */ + private static final Path getCompletedSnapshotDir(final Path snapshotsDir, String snapshotName) { + return new Path(snapshotsDir, snapshotName); + } + + /** + * @param rootDir hbase root directory + * @return the directory for all completed snapshots; + */ + public static final Path getSnapshotsDir(Path rootDir) { + return new Path(rootDir, HConstants.SNAPSHOT_DIR_NAME); + } + + /** + * Convert the passed snapshot description into a 'full' snapshot description based on default + * parameters, if none have been supplied. This resolves any 'optional' parameters that aren't + * supplied to their default values. + * @param snapshot general snapshot descriptor + * @param conf Configuration to read configured snapshot defaults if snapshot is not complete + * @return a valid snapshot description + * @throws IllegalArgumentException if the {@link SnapshotDescription} is not a complete + * {@link SnapshotDescription}. + */ + public static SnapshotDescription validate(SnapshotDescription snapshot, Configuration conf) + throws IllegalArgumentException { + if (!snapshot.hasTable()) { + throw new IllegalArgumentException( + "Descriptor doesn't apply to a table, so we can't build it."); + } + + // set the creation time, if one hasn't been set + long time = snapshot.getCreationTime(); + if (time == SnapshotDescriptionUtils.NO_SNAPSHOT_START_TIME_SPECIFIED) { + time = EnvironmentEdgeManager.currentTimeMillis(); + LOG.debug("Creation time not specified, setting to:" + time + " (current time:" + + EnvironmentEdgeManager.currentTimeMillis() + ")."); + SnapshotDescription.Builder builder = snapshot.toBuilder(); + builder.setCreationTime(time); + snapshot = builder.build(); + } + return snapshot; + } + + /** + * Write the snapshot description into the working directory of a snapshot + * @param snapshot description of the snapshot being taken + * @param workingDir working directory of the snapshot + * @param fs {@link FileSystem} on which the snapshot should be taken + * @throws IOException if we can't reach the filesystem and the file cannot be cleaned up on + * failure + */ + public static void writeSnapshotInfo(SnapshotDescription snapshot, Path workingDir, FileSystem fs) + throws IOException { + FsPermission perms = FSUtils.getFilePermissions(fs, fs.getConf(), + HConstants.DATA_FILE_UMASK_KEY); + Path snapshotInfo = new Path(workingDir, SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); + try { + FSDataOutputStream out = FSUtils.create(fs, snapshotInfo, perms, true); + try { + snapshot.writeTo(out); + } finally { + out.close(); + } + } catch (IOException e) { + // if we get an exception, try to remove the snapshot info + if (!fs.delete(snapshotInfo, false)) { + String msg = "Couldn't delete snapshot info file: " + snapshotInfo; + LOG.error(msg); + throw new IOException(msg); + } + } + } + + /** + * Read in the {@link SnapshotDescription} stored for the snapshot in the passed directory + * @param fs filesystem where the snapshot was taken + * @param snapshotDir directory where the snapshot was stored + * @return the stored snapshot description + * @throws CorruptedSnapshotException if the snapshot cannot be read + */ + public static SnapshotDescription readSnapshotInfo(FileSystem fs, Path snapshotDir) + throws CorruptedSnapshotException { + Path snapshotInfo = new Path(snapshotDir, SNAPSHOTINFO_FILE); + try { + FSDataInputStream in = null; + try { + in = fs.open(snapshotInfo); + return SnapshotDescription.parseFrom(in); + } finally { + if (in != null) in.close(); + } + } catch (IOException e) { + throw new CorruptedSnapshotException("Couldn't read snapshot info from:" + snapshotInfo, e); + } + } + + /** + * Move the finished snapshot to its final, publicly visible directory - this marks the snapshot + * as 'complete'. + * @param snapshot description of the snapshot being tabken + * @param rootdir root directory of the hbase installation + * @param workingDir directory where the in progress snapshot was built + * @param fs {@link FileSystem} where the snapshot was built + * @throws SnapshotCreationException if the snapshot could not be moved + * @throws IOException the filesystem could not be reached + */ + public static void completeSnapshot(SnapshotDescription snapshot, Path rootdir, Path workingDir, + FileSystem fs) throws SnapshotCreationException, IOException { + Path finishedDir = getCompletedSnapshotDir(snapshot, rootdir); + LOG.debug("Snapshot is done, just moving the snapshot from " + workingDir + " to " + + finishedDir); + if (!fs.rename(workingDir, finishedDir)) { + throw new SnapshotCreationException("Failed to move working directory(" + workingDir + + ") to completed directory(" + finishedDir + ").", snapshot); + } + } + + /** + * Returns a single line (no \n) representation of snapshot metadata. Use this instead of + * {@link SnapshotDescription#toString()}. We don't replace SnapshotDescrpition's toString + * because it is auto-generated by protoc. + * @param ssd + * @return Single line string with a summary of the snapshot parameters + */ + public static String toString(SnapshotDescription ssd) { + if (ssd == null) { + return null; + } + return "{ ss=" + ssd.getName() + " table=" + ssd.getTable() + + " type=" + ssd.getType() + " }"; + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/CopyRecoveredEditsTask.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/CopyRecoveredEditsTask.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/CopyRecoveredEditsTask.java (revision 0) @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.util.NavigableSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.wal.HLog; + +/** + * Copy over each of the files in a region's recovered.edits directory to the region's snapshot + * directory. + *

+ * This is a serial operation over each of the files in the recovered.edits directory and also + * streams all the bytes to the client and then back to the filesystem, so the files being copied + * should be small or it will (a) suck up a lot of bandwidth, and (b) take a long time. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class CopyRecoveredEditsTask extends SnapshotTask { + + private static final Log LOG = LogFactory.getLog(CopyRecoveredEditsTask.class); + private final FileSystem fs; + private final Path regiondir; + private final Path outputDir; + + /** + * @param snapshot Snapshot being taken + * @param monitor error monitor for the snapshot + * @param fs {@link FileSystem} where the snapshot is being taken + * @param regionDir directory for the region to examine for edits + * @param snapshotRegionDir directory for the region in the snapshot + */ + public CopyRecoveredEditsTask(SnapshotDescription snapshot, ForeignExceptionDispatcher monitor, + FileSystem fs, Path regionDir, Path snapshotRegionDir) { + super(snapshot, monitor); + this.fs = fs; + this.regiondir = regionDir; + this.outputDir = HLog.getRegionDirRecoveredEditsDir(snapshotRegionDir); + } + + @Override + public Void call() throws IOException { + NavigableSet files = HLog.getSplitEditFilesSorted(this.fs, regiondir); + if (files == null || files.size() == 0) return null; + + // copy over each file. + // this is really inefficient (could be trivially parallelized), but is + // really simple to reason about. + for (Path source : files) { + // check to see if the file is zero length, in which case we can skip it + FileStatus stat = fs.getFileStatus(source); + if (stat.getLen() <= 0) continue; + + // its not zero length, so copy over the file + Path out = new Path(outputDir, source.getName()); + LOG.debug("Copying " + source + " to " + out); + FileUtil.copy(fs, source, fs, out, true, fs.getConf()); + + // check for errors to the running operation after each file + this.rethrowException(); + } + return null; + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/TakeSnapshotUtils.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/TakeSnapshotUtils.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/TakeSnapshotUtils.java (revision 0) @@ -0,0 +1,323 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionListener; +import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.Store; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +/** + * Utilities for useful when taking a snapshot + */ +public class TakeSnapshotUtils { + + private static final Log LOG = LogFactory.getLog(TakeSnapshotUtils.class); + + private TakeSnapshotUtils() { + // private constructor for util class + } + + /** + * Get the per-region snapshot description location. + *

+ * Under the per-snapshot directory, specific files per-region are kept in a similar layout as per + * the current directory layout. + * @param desc description of the snapshot + * @param rootDir root directory for the hbase installation + * @param regionName encoded name of the region (see {@link HRegionInfo#encodeRegionName(byte[])}) + * @return path to the per-region directory for the snapshot + */ + public static Path getRegionSnapshotDirectory(SnapshotDescription desc, Path rootDir, + String regionName) { + Path snapshotDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir); + return HRegion.getRegionDir(snapshotDir, regionName); + } + + /** + * Get the home directory for store-level snapshot files. + *

+ * Specific files per store are kept in a similar layout as per the current directory layout. + * @param regionDir snapshot directory for the parent region, not the standard region + * directory. See {@link #getRegionSnapshotDirectory} + * @param family name of the store to snapshot + * @return path to the snapshot home directory for the store/family + */ + public static Path getStoreSnapshotDirectory(Path regionDir, String family) { + return Store.getStoreHomedir(regionDir, Bytes.toBytes(family)); + } + + /** + * Get the snapshot directory for each family to be added to the the snapshot + * @param snapshot description of the snapshot being take + * @param snapshotRegionDir directory in the snapshot where the region directory information + * should be stored + * @param families families to be added (can be null) + * @return paths to the snapshot directory for each family, in the same order as the families + * passed in + */ + public static List getFamilySnapshotDirectories(SnapshotDescription snapshot, + Path snapshotRegionDir, FileStatus[] families) { + if (families == null || families.length == 0) return Collections.emptyList(); + + List familyDirs = new ArrayList(families.length); + for (FileStatus family : families) { + // build the reference directory name + familyDirs.add(getStoreSnapshotDirectory(snapshotRegionDir, family.getPath().getName())); + } + return familyDirs; + } + + /** + * Create a snapshot timer for the master which notifies the monitor when an error occurs + * @param snapshot snapshot to monitor + * @param conf configuration to use when getting the max snapshot life + * @param monitor monitor to notify when the snapshot life expires + * @return the timer to use update to signal the start and end of the snapshot + */ + public static TimeoutExceptionInjector getMasterTimerAndBindToMonitor(SnapshotDescription snapshot, + Configuration conf, ForeignExceptionListener monitor) { + long maxTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(), + SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME); + return new TimeoutExceptionInjector(monitor, maxTime); + } + + /** + * Verify that all the expected logs got referenced + * @param fs filesystem where the logs live + * @param logsDir original logs directory + * @param serverNames names of the servers that involved in the snapshot + * @param snapshot description of the snapshot being taken + * @param snapshotLogDir directory for logs in the snapshot + * @throws IOException + */ + public static void verifyAllLogsGotReferenced(FileSystem fs, Path logsDir, + Set serverNames, SnapshotDescription snapshot, Path snapshotLogDir) + throws IOException { + assertTrue(snapshot, "Logs directory doesn't exist in snapshot", fs.exists(logsDir)); + // for each of the server log dirs, make sure it matches the main directory + Multimap snapshotLogs = getMapOfServersAndLogs(fs, snapshotLogDir, serverNames); + Multimap realLogs = getMapOfServersAndLogs(fs, logsDir, serverNames); + if (realLogs != null) { + assertNotNull(snapshot, "No server logs added to snapshot", snapshotLogs); + } else { + assertNull(snapshot, "Snapshotted server logs that don't exist", snapshotLogs); + } + + // check the number of servers + Set>> serverEntries = realLogs.asMap().entrySet(); + Set>> snapshotEntries = snapshotLogs.asMap().entrySet(); + assertEquals(snapshot, "Not the same number of snapshot and original server logs directories", + serverEntries.size(), snapshotEntries.size()); + + // verify we snapshotted each of the log files + for (Entry> serverLogs : serverEntries) { + // if the server is not the snapshot, skip checking its logs + if (!serverNames.contains(serverLogs.getKey())) continue; + Collection snapshotServerLogs = snapshotLogs.get(serverLogs.getKey()); + assertNotNull(snapshot, "Snapshots missing logs for server:" + serverLogs.getKey(), + snapshotServerLogs); + + // check each of the log files + assertEquals(snapshot, + "Didn't reference all the log files for server:" + serverLogs.getKey(), serverLogs + .getValue().size(), snapshotServerLogs.size()); + for (String log : serverLogs.getValue()) { + assertTrue(snapshot, "Snapshot logs didn't include " + log, + snapshotServerLogs.contains(log)); + } + } + } + + /** + * Verify one of a snapshot's region's recovered.edits, has been at the surface (file names, + * length), match the original directory. + * @param fs filesystem on which the snapshot had been taken + * @param rootDir full path to the root hbase directory + * @param regionInfo info for the region + * @param snapshot description of the snapshot that was taken + * @throws IOException if there is an unexpected error talking to the filesystem + */ + public static void verifyRecoveredEdits(FileSystem fs, Path rootDir, HRegionInfo regionInfo, + SnapshotDescription snapshot) throws IOException { + Path regionDir = HRegion.getRegionDir(rootDir, regionInfo); + Path editsDir = HLog.getRegionDirRecoveredEditsDir(regionDir); + Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(snapshot, rootDir, + regionInfo.getEncodedName()); + Path snapshotEditsDir = HLog.getRegionDirRecoveredEditsDir(snapshotRegionDir); + + FileStatus[] edits = FSUtils.listStatus(fs, editsDir); + FileStatus[] snapshotEdits = FSUtils.listStatus(fs, snapshotEditsDir); + if (edits == null) { + assertNull(snapshot, "Snapshot has edits but table doesn't", snapshotEdits); + return; + } + + assertNotNull(snapshot, "Table has edits, but snapshot doesn't", snapshotEdits); + + // check each of the files + assertEquals(snapshot, "Not same number of edits in snapshot as table", edits.length, + snapshotEdits.length); + + // make sure we have a file with the same name as the original + // it would be really expensive to verify the content matches the original + for (FileStatus edit : edits) { + for (FileStatus sEdit : snapshotEdits) { + if (sEdit.getPath().equals(edit.getPath())) { + assertEquals(snapshot, "Snapshot file" + sEdit.getPath() + + " length not equal to the original: " + edit.getPath(), edit.getLen(), + sEdit.getLen()); + break; + } + } + assertTrue(snapshot, "No edit in snapshot with name:" + edit.getPath(), false); + } + } + + private static void assertNull(SnapshotDescription snapshot, String msg, Object isNull) + throws CorruptedSnapshotException { + if (isNull != null) { + throw new CorruptedSnapshotException(msg + ", Expected " + isNull + " to be null.", snapshot); + } + } + + private static void assertNotNull(SnapshotDescription snapshot, String msg, Object notNull) + throws CorruptedSnapshotException { + if (notNull == null) { + throw new CorruptedSnapshotException(msg + ", Expected object to not be null, but was null.", + snapshot); + } + } + + private static void assertTrue(SnapshotDescription snapshot, String msg, boolean isTrue) + throws CorruptedSnapshotException { + if (!isTrue) { + throw new CorruptedSnapshotException(msg + ", Expected true, but was false", snapshot); + } + } + + /** + * Assert that the expect matches the gotten amount + * @param msg message to add the to exception + * @param expected + * @param gotten + * @throws CorruptedSnapshotException thrown if the two elements don't match + */ + private static void assertEquals(SnapshotDescription snapshot, String msg, int expected, + int gotten) throws CorruptedSnapshotException { + if (expected != gotten) { + throw new CorruptedSnapshotException(msg + ". Expected:" + expected + ", got:" + gotten, + snapshot); + } + } + + /** + * Assert that the expect matches the gotten amount + * @param msg message to add the to exception + * @param expected + * @param gotten + * @throws CorruptedSnapshotException thrown if the two elements don't match + */ + private static void assertEquals(SnapshotDescription snapshot, String msg, long expected, + long gotten) throws CorruptedSnapshotException { + if (expected != gotten) { + throw new CorruptedSnapshotException(msg + ". Expected:" + expected + ", got:" + gotten, + snapshot); + } + } + + /** + * @param logdir + * @param toInclude list of servers to include. If empty or null, returns all servers + * @return maps of servers to all their log files. If there is no log directory, returns + * null + */ + private static Multimap getMapOfServersAndLogs(FileSystem fs, Path logdir, + Collection toInclude) throws IOException { + // create a path filter based on the passed directories to include + PathFilter filter = toInclude == null || toInclude.size() == 0 ? null + : new MatchesDirectoryNames(toInclude); + + // get all the expected directories + FileStatus[] serverLogDirs = FSUtils.listStatus(fs, logdir, filter); + if (serverLogDirs == null) return null; + + // map those into a multimap of servername -> [log files] + Multimap map = HashMultimap.create(); + for (FileStatus server : serverLogDirs) { + FileStatus[] serverLogs = FSUtils.listStatus(fs, server.getPath(), null); + if (serverLogs == null) continue; + for (FileStatus log : serverLogs) { + map.put(server.getPath().getName(), log.getPath().getName()); + } + } + return map; + } + + /** + * Path filter that only accepts paths where that have a {@link Path#getName()} that is contained + * in the specified collection. + */ + private static class MatchesDirectoryNames implements PathFilter { + + Collection paths; + + public MatchesDirectoryNames(Collection dirNames) { + this.paths = dirNames; + } + + @Override + public boolean accept(Path path) { + return paths.contains(path.getName()); + } + } + + /** + * Get the log directory for a specific snapshot + * @param snapshotDir directory where the specific snapshot will be store + * @param serverName name of the parent regionserver for the log files + * @return path to the log home directory for the archive files. + */ + public static Path getSnapshotHLogsDir(Path snapshotDir, String serverName) { + return new Path(snapshotDir, HLog.getHLogDirectoryName(serverName)); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDoesNotExistException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDoesNotExistException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDoesNotExistException.java (revision 0) @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + + +/** + * Thrown when the server is looking for a snapshot but can't find the snapshot on the filesystem + */ +@SuppressWarnings("serial") +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class SnapshotDoesNotExistException extends HBaseSnapshotException { + /** + * @param msg full description of the failure + */ + public SnapshotDoesNotExistException(String msg) { + super(msg); + } + + /** + * @param desc expected snapshot to find + */ + public SnapshotDoesNotExistException(SnapshotDescription desc) { + super("Snapshot '" + desc.getName() +"' doesn't exist on the filesystem", desc); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotLogSplitter.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotLogSplitter.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotLogSplitter.java (revision 0) @@ -0,0 +1,196 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.snapshot; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.io.HLogLink; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.regionserver.wal.HLogKey; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * If the snapshot has references to one or more log files, + * those must be split (each log contains multiple tables and regions) + * and must be placed in the region/recovered.edits folder. + * (recovered.edits files will be played on region startup) + * + * In case of Restore: the log can just be split in the recovered.edits folder. + * In case of Clone: each entry in the log must be modified to use the new region name. + * (region names are encoded with: tableName, startKey, regionIdTimeStamp) + * + * We can't use the normal split code, because the HLogKey contains the + * table name and the region name, and in case of "clone from snapshot" + * region name and table name will be different and must be replaced in + * the recovered.edits. + */ +@InterfaceAudience.Private +class SnapshotLogSplitter implements Closeable { + static final Log LOG = LogFactory.getLog(SnapshotLogSplitter.class); + + private final class LogWriter implements Closeable { + private HLog.Writer writer; + private Path logFile; + private long seqId; + + public LogWriter(final Configuration conf, final FileSystem fs, + final Path logDir, long seqId) throws IOException { + logFile = new Path(logDir, logFileName(seqId, true)); + this.writer = HLog.createWriter(fs, logFile, conf); + this.seqId = seqId; + } + + public void close() throws IOException { + writer.close(); + + Path finalFile = new Path(logFile.getParent(), logFileName(seqId, false)); + LOG.debug("LogWriter tmpLogFile=" + logFile + " -> logFile=" + finalFile); + fs.rename(logFile, finalFile); + } + + public void append(final HLog.Entry entry) throws IOException { + writer.append(entry); + if (seqId < entry.getKey().getLogSeqNum()) { + seqId = entry.getKey().getLogSeqNum(); + } + } + + private String logFileName(long seqId, boolean temp) { + String fileName = String.format("%019d", seqId); + if (temp) fileName += HLog.RECOVERED_LOG_TMPFILE_SUFFIX; + return fileName; + } + } + + private final Map regionLogWriters = + new TreeMap(Bytes.BYTES_COMPARATOR); + + private final Map regionsMap; + private final Configuration conf; + private final byte[] snapshotTableName; + private final byte[] tableName; + private final Path tableDir; + private final FileSystem fs; + + /** + * @params tableName snapshot table name + * @params regionsMap maps original region names to the new ones. + */ + public SnapshotLogSplitter(final Configuration conf, final FileSystem fs, + final Path tableDir, final byte[] snapshotTableName, + final Map regionsMap) { + this.regionsMap = regionsMap; + this.snapshotTableName = snapshotTableName; + this.tableName = Bytes.toBytes(tableDir.getName()); + this.tableDir = tableDir; + this.conf = conf; + this.fs = fs; + } + + public void close() throws IOException { + for (LogWriter writer: regionLogWriters.values()) { + writer.close(); + } + } + + public void splitLog(final String serverName, final String logfile) throws IOException { + LOG.debug("Restore log=" + logfile + " server=" + serverName + + " for snapshotTable=" + Bytes.toString(snapshotTableName) + + " to table=" + Bytes.toString(tableName)); + splitLog(new HLogLink(conf, serverName, logfile).getAvailablePath(fs)); + } + + public void splitRecoveredEdit(final Path editPath) throws IOException { + LOG.debug("Restore recover.edits=" + editPath + + " for snapshotTable=" + Bytes.toString(snapshotTableName) + + " to table=" + Bytes.toString(tableName)); + splitLog(editPath); + } + + /** + * Split the snapshot HLog reference into regions recovered.edits. + * + * The HLogKey contains the table name and the region name, + * and they must be changed to the restored table names. + * + * @param logPath Snapshot HLog reference path + */ + public void splitLog(final Path logPath) throws IOException { + HLog.Reader log = HLog.getReader(fs, logPath, conf); + try { + HLog.Entry entry; + LogWriter writer = null; + byte[] regionName = null; + byte[] newRegionName = null; + while ((entry = log.next()) != null) { + HLogKey key = entry.getKey(); + + // We're interested only in the snapshot table that we're restoring + if (!Bytes.equals(key.getTablename(), snapshotTableName)) continue; + + // Writer for region. + if (!Bytes.equals(regionName, key.getEncodedRegionName())) { + regionName = key.getEncodedRegionName().clone(); + + // Get the new region name in case of clone, or use the original one + newRegionName = regionsMap.get(regionName); + if (newRegionName == null) newRegionName = regionName; + + writer = getOrCreateWriter(newRegionName, key.getLogSeqNum()); + LOG.debug("+ regionName=" + Bytes.toString(regionName)); + } + + // Append Entry + key = new HLogKey(newRegionName, tableName, + key.getLogSeqNum(), key.getWriteTime(), key.getClusterId()); + writer.append(new HLog.Entry(key, entry.getEdit())); + } + } catch (IOException e) { + LOG.warn("Something wrong during the log split", e); + } finally { + log.close(); + } + } + + /** + * Create a LogWriter for specified region if not already created. + */ + private LogWriter getOrCreateWriter(final byte[] regionName, long seqId) throws IOException { + LogWriter writer = regionLogWriters.get(regionName); + if (writer == null) { + Path regionDir = HRegion.getRegionDir(tableDir, Bytes.toString(regionName)); + Path dir = HLog.getRegionDirRecoveredEditsDir(regionDir); + fs.mkdirs(dir); + + writer = new LogWriter(conf, fs, dir, seqId); + regionLogWriters.put(regionName, writer); + } + return(writer); + } +} Index: src/main/java/org/apache/hadoop/hbase/snapshot/UnknownSnapshotException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/snapshot/UnknownSnapshotException.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/snapshot/UnknownSnapshotException.java (revision 0) @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.snapshot; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Exception thrown when we get a request for a snapshot we don't recognize. + */ +@SuppressWarnings("serial") +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UnknownSnapshotException extends HBaseSnapshotException { + + /** + * @param msg full information about the failure + */ + public UnknownSnapshotException(String msg) { + super(msg); + } + + public UnknownSnapshotException(String msg, Exception e) { + super(msg, e); + } + +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/backup/HFileArchiver.java (working copy) @@ -56,7 +56,7 @@ private static final String SEPARATOR = "."; /** Number of retries in case of fs operation failure */ - private static final int DEFAULT_RETRIES_NUMBER = 3; + private static final int DEFAULT_RETRIES_NUMBER = 6; private HFileArchiver() { // hidden ctor since this is just a util @@ -73,14 +73,12 @@ public static void archiveRegion(Configuration conf, FileSystem fs, HRegionInfo info) throws IOException { Path rootDir = FSUtils.getRootDir(conf); - archiveRegion(conf, fs, rootDir, HTableDescriptor.getTableDir(rootDir, info.getTableName()), + archiveRegion(fs, rootDir, HTableDescriptor.getTableDir(rootDir, info.getTableName()), HRegion.getRegionDir(rootDir, info)); } - /** * Remove an entire region from the table directory via archiving the region's hfiles. - * @param conf the configuration to use * @param fs {@link FileSystem} from which to remove the region * @param rootdir {@link Path} to the root directory where hbase files are stored (for building * the archive path) @@ -90,8 +88,7 @@ * operations could not complete. * @throws IOException if the request cannot be completed */ - public static boolean archiveRegion(Configuration conf, FileSystem fs, Path rootdir, - Path tableDir, Path regionDir) + public static boolean archiveRegion(FileSystem fs, Path rootdir, Path tableDir, Path regionDir) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("ARCHIVING region " + regionDir.toString()); @@ -110,7 +107,7 @@ // make sure the regiondir lives under the tabledir Preconditions.checkArgument(regionDir.toString().startsWith(tableDir.toString())); - Path regionArchiveDir = HFileArchiveUtil.getRegionArchiveDir(conf, tableDir, regionDir); + Path regionArchiveDir = HFileArchiveUtil.getRegionArchiveDir(rootdir, tableDir, regionDir); LOG.debug("Have an archive directory, preparing to move files"); FileStatusConverter getAsFile = new FileStatusConverter(fs); @@ -150,7 +147,7 @@ } throw new IOException("Received error when attempting to archive files (" + toArchive - + "), cannot delete region directory. "); + + "), cannot delete region directory."); } /** @@ -240,6 +237,35 @@ } /** + * Archive the store file + * @param fs the filesystem where the store files live + * @param regionInfo region hosting the store files + * @param conf {@link Configuration} to examine to determine the archive directory + * @param tableDir {@link Path} to where the table is being stored (for building the archive path) + * @param family the family hosting the store files + * @param storeFile file to be archived + * @throws IOException if the files could not be correctly disposed. + */ + public static void archiveStoreFile(FileSystem fs, HRegionInfo regionInfo, + Configuration conf, Path tableDir, byte[] family, Path storeFile) throws IOException { + Path storeArchiveDir = HFileArchiveUtil.getStoreArchivePath(conf, regionInfo, tableDir, family); + // make sure we don't archive if we can't and that the archive dir exists + if (!fs.mkdirs(storeArchiveDir)) { + throw new IOException("Could not make archive directory (" + storeArchiveDir + ") for store:" + + Bytes.toString(family) + ", deleting compacted files instead."); + } + + // do the actual archive + long start = EnvironmentEdgeManager.currentTimeMillis(); + File file = new FileablePath(fs, storeFile); + if (!resolveAndArchiveFile(storeArchiveDir, file, Long.toString(start))) { + throw new IOException("Failed to archive/delete the file for region:" + + regionInfo.getRegionNameAsString() + ", family:" + Bytes.toString(family) + + " into " + storeArchiveDir + ". Something is probably awry on the filesystem."); + } + } + + /** * Archive the given files and resolve any conflicts with existing files via appending the time * archiving started (so all conflicts in the same group have the same timestamp appended). *

@@ -414,6 +440,34 @@ } /** + * Simple delete of regular files from the {@link FileSystem}. + *

+ * This method is a more generic implementation that the other deleteXXX + * methods in this class, allowing more code reuse at the cost of a couple + * more, short-lived objects (which should have minimum impact on the jvm). + * @param fs {@link FileSystem} where the files live + * @param files {@link Collection} of files to be deleted + * @throws IOException if a file cannot be deleted. All files will be + * attempted to deleted before throwing the exception, rather than + * failing at the first file. + */ + private static void deleteFilesWithoutArchiving(Collection files) throws IOException { + List errors = new ArrayList(0); + for (File file : files) { + try { + LOG.debug("Deleting region file:" + file); + file.delete(); + } catch (IOException e) { + LOG.error("Failed to delete file:" + file); + errors.add(e); + } + } + if (errors.size() > 0) { + throw MultipleIOException.createIOException(errors); + } + } + + /** * Without regard for backup, delete a region. Should be used with caution. * @param regionDir {@link Path} to the region to be deleted. * @param fs FileSystem from which to delete the region Index: src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java (working copy) @@ -597,8 +597,25 @@ public static boolean createTableDescriptor(FileSystem fs, Path rootdir, HTableDescriptor htableDescriptor, boolean forceCreation) throws IOException { - FileStatus status = - getTableInfoPath(fs, rootdir, htableDescriptor.getNameAsString()); + Path tabledir = FSUtils.getTablePath(rootdir, htableDescriptor.getNameAsString()); + return createTableDescriptorForTableDirectory(fs, tabledir, htableDescriptor, forceCreation); + } + + /** + * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create + * a new table or snapshot a table. + * @param fs filesystem where the descriptor should be written + * @param tabledir directory under which we should write the file + * @param htableDescriptor description of the table to write + * @param forceCreation if true,then even if previous table descriptor is present it will + * be overwritten + * @return true if the we successfully created the file, false if the file + * already exists and we weren't forcing the descriptor creation. + * @throws IOException if a filesystem error occurs + */ + public static boolean createTableDescriptorForTableDirectory(FileSystem fs, Path tabledir, + HTableDescriptor htableDescriptor, boolean forceCreation) throws IOException { + FileStatus status = getTableInfoPath(fs, tabledir); if (status != null) { LOG.info("Current tableInfoPath = " + status.getPath()); if (!forceCreation) { @@ -608,8 +625,7 @@ } } } - Path p = writeTableDescriptor(fs, htableDescriptor, - FSUtils.getTablePath(rootdir, htableDescriptor.getNameAsString()), status); + Path p = writeTableDescriptor(fs, htableDescriptor, tabledir, status); return p != null; } } Index: src/main/java/org/apache/hadoop/hbase/util/FSUtils.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/util/FSUtils.java (working copy) @@ -75,7 +75,7 @@ public static FSUtils getInstance(FileSystem fs, Configuration conf) { String scheme = fs.getUri().getScheme(); if (scheme == null) { - LOG.warn("Could not find scheme for uri " + + LOG.warn("Could not find scheme for uri " + fs.getUri() + ", default to hdfs"); scheme = "hdfs"; } @@ -122,7 +122,7 @@ *

  • use the default block size
  • *
  • not track progress
  • * - * + * * @param fs {@link FileSystem} on which to write the file * @param path {@link Path} to the file to write * @return output stream to the created file @@ -143,7 +143,7 @@ *
  • use the default block size
  • *
  • not track progress
  • * - * + * * @param fs {@link FileSystem} on which to write the file * @param path {@link Path} to the file to write * @param perm @@ -163,7 +163,7 @@ /** * Get the file permissions specified in the configuration, if they are * enabled. - * + * * @param fs filesystem that the file will be created on. * @param conf configuration to read for determining if permissions are * enabled and which to use @@ -222,7 +222,7 @@ try { fs.close(); } catch (Exception e) { - LOG.error("file system close failed: ", e); + LOG.error("file system close failed: ", e); } IOException io = new IOException("File system is not available"); io.initCause(exception); @@ -255,11 +255,11 @@ } /** - * Check whether dfs is in safemode. + * Check whether dfs is in safemode. * @param conf * @throws IOException */ - public static void checkDfsSafeMode(final Configuration conf) + public static void checkDfsSafeMode(final Configuration conf) throws IOException { boolean isInSafeMode = false; FileSystem fs = FileSystem.get(conf); @@ -271,7 +271,7 @@ throw new IOException("File system is in safemode, it can't be written now"); } } - + /** * Verifies current version of file system * @@ -309,7 +309,7 @@ */ public static void checkVersion(FileSystem fs, Path rootdir, boolean message) throws IOException { - checkVersion(fs, rootdir, message, 0, + checkVersion(fs, rootdir, message, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } @@ -359,7 +359,7 @@ */ public static void setVersion(FileSystem fs, Path rootdir) throws IOException { - setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0, + setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0, HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); } @@ -406,7 +406,7 @@ fs.delete(versionFile, false); try { if (wait > 0) { - Thread.sleep(wait); + Thread.sleep(wait); } } catch (InterruptedException ex) { // ignore @@ -612,9 +612,9 @@ * @param fs file system * @param status file status of the file * @param start start position of the portion - * @param length length of the portion + * @param length length of the portion * @return The HDFS blocks distribution - */ + */ static public HDFSBlocksDistribution computeHDFSBlocksDistribution( final FileSystem fs, FileStatus status, long start, long length) throws IOException { @@ -626,12 +626,12 @@ long len = bl.getLength(); blocksDistribution.addHostsAndBlockWeight(hosts, len); } - + return blocksDistribution; } - - + + /** * Runs through the hbase rootdir and checks all stores have only * one file in them -- that is, they've been major compacted. Looks @@ -851,6 +851,27 @@ } /** + * A {@link PathFilter} that returns only regular files. + */ + static class FileFilter implements PathFilter { + private final FileSystem fs; + + public FileFilter(final FileSystem fs) { + this.fs = fs; + } + + @Override + public boolean accept(Path p) { + try { + return fs.isFile(p); + } catch (IOException e) { + LOG.debug("unable to verify if path=" + p + " is a regular file", e); + return false; + } + } + } + + /** * A {@link PathFilter} that returns directories. */ public static class DirFilter implements PathFilter { @@ -860,13 +881,14 @@ this.fs = fs; } + @Override public boolean accept(Path p) { boolean isValid = false; try { if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(p)) { isValid = false; } else { - isValid = this.fs.getFileStatus(p).isDir(); + isValid = this.fs.getFileStatus(p).isDir(); } } catch (IOException e) { e.printStackTrace(); @@ -920,7 +942,7 @@ } /** - * Recover file lease. Used when a file might be suspect + * Recover file lease. Used when a file might be suspect * to be had been left open by another process. * @param fs FileSystem handle * @param p Path of file to recover lease @@ -929,7 +951,7 @@ */ public abstract void recoverFileLease(final FileSystem fs, final Path p, Configuration conf) throws IOException; - + /** * @param fs * @param rootdir @@ -1096,10 +1118,10 @@ throws IOException { return getRootDir(conf).getFileSystem(conf); } - + /** - * Runs through the HBase rootdir and creates a reverse lookup map for - * table StoreFile names to the full Path. + * Runs through the HBase rootdir and creates a reverse lookup map for + * table StoreFile names to the full Path. *
    * Example...
    * Key = 3944417774205889744
    @@ -1146,17 +1168,17 @@ Path sf = sfStatus.getPath(); map.put( sf.getName(), sf); } - + } } } return map; } - + /** * Calls fs.listStatus() and treats FileNotFoundException as non-fatal - * This would accommodate difference in various hadoop versions - * + * This accommodates differences between hadoop versions + * * @param fs file system * @param dir directory * @param filter path filter @@ -1169,15 +1191,27 @@ status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter); } catch (FileNotFoundException fnfe) { // if directory doesn't exist, return null - LOG.info(dir + " doesn't exist"); + LOG.debug(dir + " doesn't exist"); } if (status == null || status.length < 1) return null; return status; } - + /** + * Calls fs.listStatus() and treats FileNotFoundException as non-fatal + * This would accommodates differences between hadoop versions + * + * @param fs file system + * @param dir directory + * @return null if tabledir doesn't exist, otherwise FileStatus array + */ + public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException { + return listStatus(fs, dir, null); + } + + /** * Calls fs.delete() and returns the value returned by the fs.delete() - * + * * @param fs * @param path * @param recursive @@ -1229,7 +1263,7 @@ /** * Calls fs.exists(). Checks if the specified path exists - * + * * @param fs * @param path * @return Index: src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/util/HFileArchiveUtil.java (working copy) @@ -41,6 +41,21 @@ /** * Get the directory to archive a store directory * @param conf {@link Configuration} to read for the archive directory name + * @param tableName table name under which the store currently lives + * @param regionName region encoded name under which the store currently lives + * @param family name of the family in the store + * @return {@link Path} to the directory to archive the given store or + * null if it should not be archived + */ + public static Path getStoreArchivePath(final Configuration conf, final String tableName, + final String regionName, final String familyName) throws IOException { + Path tableArchiveDir = getTableArchivePath(conf, tableName); + return Store.getStoreHomedir(tableArchiveDir, regionName, familyName); + } + + /** + * Get the directory to archive a store directory + * @param conf {@link Configuration} to read for the archive directory name * @param region parent region information under which the store currently * lives * @param family name of the family in the store @@ -85,6 +100,24 @@ } /** + * Get the archive directory for a given region under the specified table + * @param rootdir {@link Path} to the root directory where hbase files are stored (for building + * the archive path) + * @param tabledir the original table directory. Cannot be null. + * @param regiondir the path to the region directory. Cannot be null. + * @return {@link Path} to the directory to archive the given region, or null if it + * should not be archived + */ + public static Path getRegionArchiveDir(Path rootdir, Path tabledir, Path regiondir) { + // get the archive directory for a table + Path archiveDir = getTableArchivePath(rootdir, tabledir.getName()); + + // then add on the region path under the archive + String encodedRegionName = regiondir.getName(); + return HRegion.getRegionDir(archiveDir, encodedRegionName); + } + + /** * Get the path to the table archive directory based on the configured archive directory. *

    * Get the path to the table's archive directory. @@ -95,10 +128,38 @@ */ public static Path getTableArchivePath(Path tabledir) { Path root = tabledir.getParent(); - return new Path(new Path(root,HConstants.HFILE_ARCHIVE_DIRECTORY), tabledir.getName()); + return getTableArchivePath(root, tabledir.getName()); } /** + * Get the path to the table archive directory based on the configured archive directory. + *

    + * Get the path to the table's archive directory. + *

    + * Generally of the form: /hbase/.archive/[tablename] + * @param rootdir {@link Path} to the root directory where hbase files are stored (for building + * the archive path) + * @param tableName Name of the table to be archived. Cannot be null. + * @return {@link Path} to the archive directory for the table + */ + public static Path getTableArchivePath(final Path rootdir, final String tableName) { + return new Path(getArchivePath(rootdir), tableName); + } + + /** + * Get the path to the table archive directory based on the configured archive directory. + *

    + * Assumed that the table should already be archived. + * @param conf {@link Configuration} to read the archive directory property. Can be null + * @param tableName Name of the table to be archived. Cannot be null. + * @return {@link Path} to the archive directory for the table + */ + public static Path getTableArchivePath(final Configuration conf, final String tableName) + throws IOException { + return new Path(getArchivePath(conf), tableName); + } + + /** * Get the full path to the archive directory on the configured {@link FileSystem} * @param conf to look for archive directory name and root directory. Cannot be null. Notes for * testing: requires a FileSystem root directory to be specified. @@ -106,6 +167,16 @@ * @throws IOException if an unexpected error occurs */ public static Path getArchivePath(Configuration conf) throws IOException { - return new Path(FSUtils.getRootDir(conf), HConstants.HFILE_ARCHIVE_DIRECTORY); + return getArchivePath(FSUtils.getRootDir(conf)); } + + /** + * Get the full path to the archive directory on the configured {@link FileSystem} + * @param rootdir {@link Path} to the root directory where hbase files are stored (for building + * the archive path) + * @return the full {@link Path} to the archive directory, as defined by the configuration + */ + private static Path getArchivePath(final Path rootdir) { + return new Path(rootdir, HConstants.HFILE_ARCHIVE_DIRECTORY); + } } Index: src/main/java/org/apache/hadoop/hbase/util/FSVisitor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/FSVisitor.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/util/FSVisitor.java (revision 0) @@ -0,0 +1,194 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.util.NavigableSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.io.Reference; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.wal.HLog; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Utility methods for interacting with the hbase.root file system. + */ +@InterfaceAudience.Private +public final class FSVisitor { + private static final Log LOG = LogFactory.getLog(FSVisitor.class); + + public interface StoreFileVisitor { + void storeFile(final String region, final String family, final String hfileName) + throws IOException; + } + + public interface RecoveredEditsVisitor { + void recoveredEdits (final String region, final String logfile) + throws IOException; + } + + public interface LogFileVisitor { + void logFile (final String server, final String logfile) + throws IOException; + } + + private FSVisitor() { + // private constructor for utility class + } + + /** + * Iterate over the table store files + * + * @param fs {@link FileSystem} + * @param tableDir {@link Path} to the table directory + * @param visitor callback object to get the store files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitTableStoreFiles(final FileSystem fs, final Path tableDir, + final StoreFileVisitor visitor) throws IOException { + FileStatus[] regions = FSUtils.listStatus(fs, tableDir, new FSUtils.RegionDirFilter(fs)); + if (regions == null) { + LOG.info("No regions under directory:" + tableDir); + return; + } + + for (FileStatus region: regions) { + visitRegionStoreFiles(fs, region.getPath(), visitor); + } + } + + /** + * Iterate over the region store files + * + * @param fs {@link FileSystem} + * @param regionDir {@link Path} to the region directory + * @param visitor callback object to get the store files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitRegionStoreFiles(final FileSystem fs, final Path regionDir, + final StoreFileVisitor visitor) throws IOException { + FileStatus[] families = FSUtils.listStatus(fs, regionDir, new FSUtils.FamilyDirFilter(fs)); + if (families == null) { + LOG.info("No families under region directory:" + regionDir); + return; + } + + PathFilter fileFilter = new FSUtils.FileFilter(fs); + for (FileStatus family: families) { + Path familyDir = family.getPath(); + String familyName = familyDir.getName(); + + // get all the storeFiles in the family + FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir, fileFilter); + if (storeFiles == null) { + LOG.debug("No hfiles found for family: " + familyDir + ", skipping."); + continue; + } + + for (FileStatus hfile: storeFiles) { + Path hfilePath = hfile.getPath(); + visitor.storeFile(regionDir.getName(), familyName, hfilePath.getName()); + } + } + } + + /** + * Iterate over each region in the table and inform about recovered.edits + * + * @param fs {@link FileSystem} + * @param tableDir {@link Path} to the table directory + * @param visitor callback object to get the recovered.edits files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitTableRecoveredEdits(final FileSystem fs, final Path tableDir, + final FSVisitor.RecoveredEditsVisitor visitor) throws IOException { + FileStatus[] regions = FSUtils.listStatus(fs, tableDir, new FSUtils.RegionDirFilter(fs)); + if (regions == null) { + LOG.info("No regions under directory:" + tableDir); + return; + } + + for (FileStatus region: regions) { + visitRegionRecoveredEdits(fs, region.getPath(), visitor); + } + } + + /** + * Iterate over recovered.edits of the specified region + * + * @param fs {@link FileSystem} + * @param regionDir {@link Path} to the Region directory + * @param visitor callback object to get the recovered.edits files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitRegionRecoveredEdits(final FileSystem fs, final Path regionDir, + final FSVisitor.RecoveredEditsVisitor visitor) throws IOException { + NavigableSet files = HLog.getSplitEditFilesSorted(fs, regionDir); + if (files == null || files.size() == 0) return; + + for (Path source: files) { + // check to see if the file is zero length, in which case we can skip it + FileStatus stat = fs.getFileStatus(source); + if (stat.getLen() <= 0) continue; + + visitor.recoveredEdits(regionDir.getName(), source.getName()); + } + } + + /** + * Iterate over hbase log files + * + * @param fs {@link FileSystem} + * @param rootDir {@link Path} to the HBase root folder + * @param visitor callback object to get the log files + * @throws IOException if an error occurred while scanning the directory + */ + public static void visitLogFiles(final FileSystem fs, final Path rootDir, + final LogFileVisitor visitor) throws IOException { + Path logsDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); + FileStatus[] logServerDirs = FSUtils.listStatus(fs, logsDir); + if (logServerDirs == null) { + LOG.info("No logs under directory:" + logsDir); + return; + } + + for (FileStatus serverLogs: logServerDirs) { + String serverName = serverLogs.getPath().getName(); + + FileStatus[] hlogs = FSUtils.listStatus(fs, serverLogs.getPath()); + if (hlogs == null) { + LOG.debug("No hfiles found for server: " + serverName + ", skipping."); + continue; + } + + for (FileStatus hlogRef: hlogs) { + visitor.logFile(serverName, hlogRef.getPath().getName()); + } + } + } +} Index: src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (revision 0) @@ -0,0 +1,176 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.backup.HFileArchiver; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.regionserver.HRegion; + +/** + * Utility methods for interacting with the regions. + */ +@InterfaceAudience.Private +public abstract class ModifyRegionUtils { + private static final Log LOG = LogFactory.getLog(ModifyRegionUtils.class); + + private ModifyRegionUtils() { + } + + public interface RegionFillTask { + public void fillRegion(final HRegion region) throws IOException; + } + + /** + * Create new set of regions on the specified file-system. + * NOTE: that you should add the regions to .META. after this operation. + * + * @param conf {@link Configuration} + * @param rootDir Root directory for HBase instance + * @param hTableDescriptor description of the table + * @param newRegions {@link HRegionInfo} that describes the regions to create + * @throws IOException + */ + public static List createRegions(final Configuration conf, final Path rootDir, + final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions) throws IOException { + return createRegions(conf, rootDir, hTableDescriptor, newRegions, null); + } + + /** + * Create new set of regions on the specified file-system. + * NOTE: that you should add the regions to .META. after this operation. + * + * @param conf {@link Configuration} + * @param rootDir Root directory for HBase instance + * @param hTableDescriptor description of the table + * @param newRegions {@link HRegionInfo} that describes the regions to create + * @param task {@link RegionFillTask} custom code to populate region after creation + * @throws IOException + */ + public static List createRegions(final Configuration conf, final Path rootDir, + final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions, + final RegionFillTask task) throws IOException { + if (newRegions == null) return null; + int regionNumber = newRegions.length; + ThreadPoolExecutor regionOpenAndInitThreadPool = getRegionOpenAndInitThreadPool(conf, + "RegionOpenAndInitThread-" + hTableDescriptor.getNameAsString(), regionNumber); + CompletionService completionService = new ExecutorCompletionService( + regionOpenAndInitThreadPool); + List regionInfos = new ArrayList(); + for (final HRegionInfo newRegion : newRegions) { + completionService.submit(new Callable() { + public HRegionInfo call() throws IOException { + // 1. Create HRegion + HRegion region = HRegion.createHRegion(newRegion, + rootDir, conf, hTableDescriptor, null, + false, true); + try { + // 2. Custom user code to interact with the created region + if (task != null) { + task.fillRegion(region); + } + } finally { + // 3. Close the new region to flush to disk. Close log file too. + region.close(); + } + return region.getRegionInfo(); + } + }); + } + try { + // 4. wait for all regions to finish creation + for (int i = 0; i < regionNumber; i++) { + Future future = completionService.take(); + HRegionInfo regionInfo = future.get(); + regionInfos.add(regionInfo); + } + } catch (InterruptedException e) { + LOG.error("Caught " + e + " during region creation"); + throw new InterruptedIOException(e.getMessage()); + } catch (ExecutionException e) { + throw new IOException(e); + } finally { + regionOpenAndInitThreadPool.shutdownNow(); + } + return regionInfos; + } + + /* + * used by createRegions() to get the thread pool executor based on the + * "hbase.hregion.open.and.init.threads.max" property. + */ + static ThreadPoolExecutor getRegionOpenAndInitThreadPool(final Configuration conf, + final String threadNamePrefix, int regionNumber) { + int maxThreads = Math.min(regionNumber, conf.getInt( + "hbase.hregion.open.and.init.threads.max", 10)); + ThreadPoolExecutor regionOpenAndInitThreadPool = Threads + .getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS, + new ThreadFactory() { + private int count = 1; + + public Thread newThread(Runnable r) { + Thread t = new Thread(r, threadNamePrefix + "-" + count++); + return t; + } + }); + return regionOpenAndInitThreadPool; + } + + /** + * Trigger immediate assignment of the regions in round-robin fashion + * + * @param assignmentManager + * @param regions + */ + public static void assignRegions(final AssignmentManager assignmentManager, + final List regions) throws IOException { + try { + assignmentManager.assignUserRegionsToOnlineServers(regions); + } catch (InterruptedException ie) { + LOG.error("Caught " + ie + " during round-robin assignment"); + throw new InterruptedIOException(ie.getMessage()); + } + } +} Index: src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (working copy) @@ -61,14 +61,24 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.ipc.MasterExecRPCInvoker; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest.CompactionState; import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; +import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; +import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.util.StringUtils; +import com.google.protobuf.ServiceException; + /** * Provides an interface to manage HBase database table metadata + general * administrative functions. Use HBaseAdmin to create, drop, list, enable and @@ -194,7 +204,7 @@ this.aborted = true; throw new RuntimeException(why, e); } - + @Override public boolean isAborted(){ return this.aborted; @@ -601,7 +611,7 @@ // continue } } - + if (tableExists) { throw new IOException("Retries exhausted, it took too long to wait"+ " for the table " + Bytes.toString(tableName) + " to be deleted."); @@ -676,9 +686,23 @@ enableTableAsync(tableName); // Wait until all regions are enabled + waitUntilTableIsEnabled(tableName); + + LOG.info("Enabled table " + Bytes.toString(tableName)); + } + + /** + * Wait for the table to be enabled and available + * If enabling the table exceeds the retry period, an exception is thrown. + * @param tableName name of the table + * @throws IOException if a remote or network exception occurs or + * table is not enabled after the retries period. + */ + private void waitUntilTableIsEnabled(final byte[] tableName) throws IOException { boolean enabled = false; + long start = EnvironmentEdgeManager.currentTimeMillis(); for (int tries = 0; tries < (this.numRetries * this.retryLongerMultiplier); tries++) { - enabled = isTableEnabled(tableName); + enabled = isTableEnabled(tableName) && isTableAvailable(tableName); if (enabled) { break; } @@ -697,10 +721,10 @@ } } if (!enabled) { - throw new IOException("Unable to enable table " + - Bytes.toString(tableName)); + long msec = EnvironmentEdgeManager.currentTimeMillis() - start; + throw new IOException("Table '" + Bytes.toString(tableName) + + "' not yet enabled, after " + msec + "ms."); } - LOG.info("Enabled table " + Bytes.toString(tableName)); } public void enableTableAsync(final String tableName) @@ -1119,7 +1143,7 @@ * servername is provided then based on the online regions in the specified * regionserver the specified region will be closed. The master will not be * informed of the close. Note that the regionname is the encoded regionname. - * + * * @param encodedRegionName * The encoded region name; i.e. the hash that makes up the region * name suffix: e.g. if regionname is @@ -1255,7 +1279,7 @@ throws IOException, InterruptedException { compact(tableNameOrRegionName, null, false); } - + /** * Compact a column family within a table or region. * Asynchronous operation. @@ -1309,7 +1333,7 @@ throws IOException, InterruptedException { compact(tableNameOrRegionName, null, true); } - + /** * Major compact a column family within a table or region. * Asynchronous operation. @@ -1761,7 +1785,7 @@ * @param tableName the name of the table * @return Ordered list of {@link HRegionInfo}. * @throws IOException - */ + */ public List getTableRegions(final byte[] tableName) throws IOException { CatalogTracker ct = getCatalogTracker(); @@ -1773,7 +1797,7 @@ } return Regions; } - + public void close() throws IOException { if (this.connection != null) { this.connection.close(); @@ -1793,14 +1817,14 @@ /** * Roll the log writer. That is, start writing log messages to a new file. - * + * * @param serverName * The servername of the regionserver. A server name is made of host, * port and startcode. This is mandatory. Here is an example: * host187.example.com,60020,1289493121758 * @return If lots of logs, flush the returned regions so next time through * we can clean logs. Returns null if nothing to flush. Names are actual - * region names as returned by {@link HRegionInfo#getEncodedName()} + * region names as returned by {@link HRegionInfo#getEncodedName()} * @throws IOException if a remote or network exception occurs * @throws FailedLogCloseException */ @@ -1921,4 +1945,367 @@ connection, protocol)); } + + + /** + * Create a timestamp consistent snapshot for the given table. + *

    + * Snapshots are considered unique based on the name of the snapshot. Attempts to take a + * snapshot with the same name (even a different type or with different parameters) will fail with + * a {@link SnapshotCreationException} indicating the duplicate naming. + *

    + * Snapshot names follow the same naming constraints as tables in HBase. See + * {@link HTableDescriptor#isLegalTableName(byte[])}. + * @param snapshotName name of the snapshot to be created + * @param tableName name of the table for which snapshot is created + * @throws IOException if a remote or network exception occurs + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + public void snapshot(final String snapshotName, final String tableName) throws IOException, + SnapshotCreationException, IllegalArgumentException { + snapshot(snapshotName, tableName, SnapshotDescription.Type.FLUSH); + } + + /** + * Take a snapshot for the given table. If the table is enabled, a FLUSH-type snapshot will be + * taken. If the table is disabled, an offline snapshot is taken. + *

    + * Snapshots are considered unique based on the name of the snapshot. Attempts to take a + * snapshot with the same name (even a different type or with different parameters) will fail with + * a {@link SnapshotCreationException} indicating the duplicate naming. + *

    + * Snapshot names follow the same naming constraints as tables in HBase. See + * {@link HTableDescriptor#isLegalTableName(byte[])}. + * @param snapshotName name of the snapshot to be created + * @param tableName name of the table for which snapshot is created + * @throws IOException if a remote or network exception occurs + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + public void snapshot(final byte[] snapshotName, final byte[] tableName) throws IOException, + SnapshotCreationException, IllegalArgumentException { + snapshot(Bytes.toString(snapshotName), Bytes.toString(tableName)); + } + + /** + * Create typed snapshot of the table. + *

    + * Snapshots are considered unique based on the name of the snapshot. Attempts to take a + * snapshot with the same name (even a different type or with different parameters) will fail with + * a {@link SnapshotCreationException} indicating the duplicate naming. + *

    + * Snapshot names follow the same naming constraints as tables in HBase. See + * {@link HTableDescriptor#isLegalTableName(byte[])}. + *

    + * @param snapshotName name to give the snapshot on the filesystem. Must be unique from all other + * snapshots stored on the cluster + * @param tableName name of the table to snapshot + * @param type type of snapshot to take + * @throws IOException we fail to reach the master + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + public void snapshot(final String snapshotName, final String tableName, + SnapshotDescription.Type type) throws IOException, SnapshotCreationException, + IllegalArgumentException { + SnapshotDescription.Builder builder = SnapshotDescription.newBuilder(); + builder.setTable(tableName); + builder.setName(snapshotName); + builder.setType(type); + snapshot(builder.build()); + } + + /** + * Take a snapshot and wait for the server to complete that snapshot (blocking). + *

    + * Only a single snapshot should be taken at a time for an instance of HBase, or results may be + * undefined (you can tell multiple HBase clusters to snapshot at the same time, but only one at a + * time for a single cluster). + *

    + * Snapshots are considered unique based on the name of the snapshot. Attempts to take a + * snapshot with the same name (even a different type or with different parameters) will fail with + * a {@link SnapshotCreationException} indicating the duplicate naming. + *

    + * Snapshot names follow the same naming constraints as tables in HBase. See + * {@link HTableDescriptor#isLegalTableName(byte[])}. + *

    + * You should probably use {@link #snapshot(String, String)} or {@link #snapshot(byte[], byte[])} + * unless you are sure about the type of snapshot that you want to take. + * @param snapshot snapshot to take + * @throws IOException or we lose contact with the master. + * @throws SnapshotCreationException if snapshot failed to be taken + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + public void snapshot(SnapshotDescription snapshot) throws IOException, SnapshotCreationException, + IllegalArgumentException { + HSnapshotDescription snapshotWritable = new HSnapshotDescription(snapshot); + + try { + // actually take the snapshot + long max = takeSnapshotAsync(snapshot); + long start = EnvironmentEdgeManager.currentTimeMillis(); + long maxPauseTime = max / this.numRetries; + boolean done = false; + int tries = 0; + LOG.debug("Waiting a max of " + max + " ms for snapshot '" + + SnapshotDescriptionUtils.toString(snapshot) + "' to complete. (max " + + maxPauseTime + " ms per retry)"); + while (tries == 0 || (EnvironmentEdgeManager.currentTimeMillis() - start) < max && !done) { + try { + // sleep a backoff <= pauseTime amount + long sleep = getPauseTime(tries++); + sleep = sleep > maxPauseTime ? maxPauseTime : sleep; + LOG.debug("(#" + tries + ") Sleeping: " + sleep + + "ms while waiting for snapshot completion."); + Thread.sleep(sleep); + + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for snapshot " + snapshot + " to complete"); + Thread.currentThread().interrupt(); + } + LOG.debug("Getting current status of snapshot from master..."); + done = getMaster().isSnapshotDone(snapshotWritable); + } + + if (!done) { + throw new SnapshotCreationException("Snapshot '" + snapshot.getName() + + "' wasn't completed in expectedTime:" + max + " ms", snapshot); + } + } catch (RemoteException e) { + throw RemoteExceptionHandler.decodeRemoteException(e); + } + } + + /** + * Take a snapshot without waiting for the server to complete that snapshot (asynchronous) + *

    + * Only a single snapshot should be taken at a time, or results may be undefined. + * @param snapshot snapshot to take + * @return the max time in millis to wait for the snapshot + * @throws IOException if the snapshot did not succeed or we lose contact with the master. + * @throws SnapshotCreationException if snapshot creation failed + * @throws IllegalArgumentException if the snapshot request is formatted incorrectly + */ + public long takeSnapshotAsync(SnapshotDescription snapshot) throws IOException, + SnapshotCreationException { + SnapshotDescriptionUtils.assertSnapshotRequestIsValid(snapshot); + HSnapshotDescription snapshotWritable = new HSnapshotDescription(snapshot); + return getMaster().snapshot(snapshotWritable); + } + + /** + * Check the current state of the passed snapshot. + *

    + * There are three possible states: + *

      + *
    1. running - returns false
    2. + *
    3. finished - returns true
    4. + *
    5. finished with error - throws the exception that caused the snapshot to fail
    6. + *
    + *

    + * The cluster only knows about the most recent snapshot. Therefore, if another snapshot has been + * run/started since the snapshot your are checking, you will recieve an + * {@link UnknownSnapshotException}. + * @param snapshot description of the snapshot to check + * @return true if the snapshot is completed, false if the snapshot is still + * running + * @throws IOException if we have a network issue + * @throws HBaseSnapshotException if the snapshot failed + * @throws UnknownSnapshotException if the requested snapshot is unknown + */ + public boolean isSnapshotFinished(final SnapshotDescription snapshot) + throws IOException, HBaseSnapshotException, UnknownSnapshotException { + try { + return getMaster().isSnapshotDone(new HSnapshotDescription(snapshot)); + } catch (RemoteException e) { + throw RemoteExceptionHandler.decodeRemoteException(e); + } + } + + /** + * Restore the specified snapshot on the original table. (The table must be disabled) + * Before restoring the table, a new snapshot with the current table state is created. + * In case of failure, the table will be rolled back to the its original state. + * + * @param snapshotName name of the snapshot to restore + * @throws IOException if a remote or network exception occurs + * @throws RestoreSnapshotException if snapshot failed to be restored + * @throws IllegalArgumentException if the restore request is formatted incorrectly + */ + public void restoreSnapshot(final byte[] snapshotName) + throws IOException, RestoreSnapshotException { + restoreSnapshot(Bytes.toString(snapshotName)); + } + + /** + * Restore the specified snapshot on the original table. (The table must be disabled) + * Before restoring the table, a new snapshot with the current table state is created. + * In case of failure, the table will be rolled back to its original state. + * + * @param snapshotName name of the snapshot to restore + * @throws IOException if a remote or network exception occurs + * @throws RestoreSnapshotException if snapshot failed to be restored + * @throws IllegalArgumentException if the restore request is formatted incorrectly + */ + public void restoreSnapshot(final String snapshotName) + throws IOException, RestoreSnapshotException { + String rollbackSnapshot = snapshotName + "-" + EnvironmentEdgeManager.currentTimeMillis(); + + String tableName = null; + for (SnapshotDescription snapshotInfo: listSnapshots()) { + if (snapshotInfo.getName().equals(snapshotName)) { + tableName = snapshotInfo.getTable(); + break; + } + } + + if (tableName == null) { + throw new RestoreSnapshotException( + "Unable to find the table name for snapshot=" + snapshotName); + } + + // Take a snapshot of the current state + snapshot(rollbackSnapshot, tableName); + + // Restore snapshot + try { + internalRestoreSnapshot(snapshotName, tableName); + } catch (IOException e) { + // Try to rollback + try { + String msg = "Restore snapshot=" + snapshotName + + " failed. Rollback to snapshot=" + rollbackSnapshot + " succeeded."; + LOG.error(msg, e); + internalRestoreSnapshot(rollbackSnapshot, tableName); + throw new RestoreSnapshotException(msg, e); + } catch (IOException ex) { + String msg = "Failed to restore and rollback to snapshot=" + rollbackSnapshot; + LOG.error(msg, ex); + throw new RestoreSnapshotException(msg, ex); + } + } + } + + /** + * Create a new table by cloning the snapshot content. + * + * @param snapshotName name of the snapshot to be cloned + * @param tableName name of the table where the snapshot will be restored + * @throws IOException if a remote or network exception occurs + * @throws TableExistsException if table to be created already exists + * @throws RestoreSnapshotException if snapshot failed to be cloned + * @throws IllegalArgumentException if the specified table has not a valid name + */ + public void cloneSnapshot(final byte[] snapshotName, final byte[] tableName) + throws IOException, TableExistsException, RestoreSnapshotException, InterruptedException { + cloneSnapshot(Bytes.toString(snapshotName), Bytes.toString(tableName)); + } + + /** + * Create a new table by cloning the snapshot content. + * + * @param snapshotName name of the snapshot to be cloned + * @param tableName name of the table where the snapshot will be restored + * @throws IOException if a remote or network exception occurs + * @throws TableExistsException if table to be created already exists + * @throws RestoreSnapshotException if snapshot failed to be cloned + * @throws IllegalArgumentException if the specified table has not a valid name + */ + public void cloneSnapshot(final String snapshotName, final String tableName) + throws IOException, TableExistsException, RestoreSnapshotException, InterruptedException { + if (tableExists(tableName)) { + throw new TableExistsException("Table '" + tableName + " already exists"); + } + internalRestoreSnapshot(snapshotName, tableName); + waitUntilTableIsEnabled(Bytes.toBytes(tableName)); + } + + /** + * Execute Restore/Clone snapshot and wait for the server to complete (blocking). + * To check if the cloned table exists, use {@link #isTableAvailable} -- it is not safe to + * create an HTable instance to this table before it is available. + * @param snapshot snapshot to restore + * @param tableName table name to restore the snapshot on + * @throws IOException if a remote or network exception occurs + * @throws RestoreSnapshotException if snapshot failed to be restored + * @throws IllegalArgumentException if the restore request is formatted incorrectly + */ + private void internalRestoreSnapshot(final String snapshotName, final String tableName) + throws IOException, RestoreSnapshotException { + HSnapshotDescription snapshot = new HSnapshotDescription( + SnapshotDescription.newBuilder().setName(snapshotName).setTable(tableName).build()); + + try { + // actually restore the snapshot + getMaster().restoreSnapshot(snapshot); + + final long maxPauseTime = 5000; + boolean done = false; + int tries = 0; + while (!done) { + try { + // sleep a backoff <= pauseTime amount + long sleep = getPauseTime(tries++); + sleep = sleep > maxPauseTime ? maxPauseTime : sleep; + LOG.debug(tries + ") Sleeping: " + sleep + " ms while we wait for snapshot restore to complete."); + Thread.sleep(sleep); + } catch (InterruptedException e) { + LOG.debug("Interrupted while waiting for snapshot " + snapshot + " restore to complete"); + Thread.currentThread().interrupt(); + } + LOG.debug("Getting current status of snapshot restore from master..."); + done = getMaster().isRestoreSnapshotDone(snapshot); + } + if (!done) { + throw new RestoreSnapshotException("Snapshot '" + snapshot.getName() + "' wasn't restored."); + } + } catch (RemoteException e) { + throw RemoteExceptionHandler.decodeRemoteException(e); + } + } + + /** + * List completed snapshots. + * @return a list of snapshot descriptors for completed snapshots + * @throws IOException if a network error occurs + */ + public List listSnapshots() throws IOException { + List snapshots = new LinkedList(); + try { + for (HSnapshotDescription snapshot: getMaster().getCompletedSnapshots()) { + snapshots.add(snapshot.getProto()); + } + } catch (RemoteException e) { + throw RemoteExceptionHandler.decodeRemoteException(e); + } + return snapshots; + } + + /** + * Delete an existing snapshot. + * @param snapshotName name of the snapshot + * @throws IOException if a remote or network exception occurs + */ + public void deleteSnapshot(final byte[] snapshotName) throws IOException { + // make sure the snapshot is possibly valid + HTableDescriptor.isLegalTableName(snapshotName); + // do the delete + SnapshotDescription snapshot = SnapshotDescription.newBuilder() + .setName(Bytes.toString(snapshotName)).build(); + try { + getMaster().deleteSnapshot(new HSnapshotDescription(snapshot)); + } catch (RemoteException e) { + throw RemoteExceptionHandler.decodeRemoteException(e); + } + } + + /** + * Delete an existing snapshot. + * @param snapshotName name of the snapshot + * @throws IOException if a remote or network exception occurs + */ + public void deleteSnapshot(final String snapshotName) throws IOException { + deleteSnapshot(Bytes.toBytes(snapshotName)); + } } Index: src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java (working copy) @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; +import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; @@ -403,7 +404,7 @@ * @throws IllegalArgumentException If not null and not a legitimate family * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because * b can be null when deserializing). Cannot start with a '.' - * either. + * either. Also Family can not be an empty value or equal "recovered.edits". */ public static byte [] isLegalFamilyName(final byte [] b) { if (b == null) { @@ -420,6 +421,11 @@ Bytes.toString(b)); } } + byte[] recoveredEdit = Bytes.toBytes(HLog.RECOVERED_EDITS_DIR); + if (Bytes.equals(recoveredEdit, b)) { + throw new IllegalArgumentException("Family name cannot be: " + + HLog.RECOVERED_EDITS_DIR); + } return b; } Index: src/main/java/org/apache/hadoop/hbase/DaemonThreadFactory.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/DaemonThreadFactory.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/DaemonThreadFactory.java (revision 0) @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Thread factory that creates daemon threads + */ +public class DaemonThreadFactory implements ThreadFactory { + static final AtomicInteger poolNumber = new AtomicInteger(1); + final ThreadGroup group; + final AtomicInteger threadNumber = new AtomicInteger(1); + final String namePrefix; + + public DaemonThreadFactory(String name) { + SecurityManager s = System.getSecurityManager(); + group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup(); + namePrefix = name + poolNumber.getAndIncrement() + "-thread-"; + } + + @Override + public Thread newThread(Runnable r) { + Thread t = new Thread(group, r, namePrefix + threadNumber.getAndIncrement(), 0); + if (!t.isDaemon()) { + t.setDaemon(true); + } + if (t.getPriority() != Thread.NORM_PRIORITY) { + t.setPriority(Thread.NORM_PRIORITY); + } + return t; + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (working copy) @@ -124,10 +124,10 @@ * Name of directory that holds recovered edits written by the wal log * splitting code, one per region */ - private static final String RECOVERED_EDITS_DIR = "recovered.edits"; + public static final String RECOVERED_EDITS_DIR = "recovered.edits"; private static final Pattern EDITFILES_NAME_PATTERN = Pattern.compile("-?[0-9]+"); - static final String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp"; + public static final String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp"; private final FileSystem fs; private final Path dir; Index: src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java (revision 0) @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.snapshot; + +import java.util.List; +import java.util.concurrent.Callable; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.procedure.ProcedureMember; +import org.apache.hadoop.hbase.procedure.Subprocedure; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.snapshot.RegionServerSnapshotManager.SnapshotSubprocedurePool; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; + +/** + * This online snapshot implementation uses the distributed procedure framework to force a + * store flush and then records the hfiles. Its enter stage does nothing. Its leave stage then + * flushes the memstore, builds the region server's snapshot manifest from its hfiles list, and + * copies .regioninfos into the snapshot working directory. At the master side, there is an atomic + * rename of the working dir into the proper snapshot directory. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class FlushSnapshotSubprocedure extends Subprocedure { + private static final Log LOG = LogFactory.getLog(FlushSnapshotSubprocedure.class); + + private final List regions; + private final SnapshotDescription snapshot; + private final SnapshotSubprocedurePool taskManager; + + public FlushSnapshotSubprocedure(ProcedureMember member, + ForeignExceptionDispatcher errorListener, long wakeFrequency, long timeout, + List regions, SnapshotDescription snapshot, + SnapshotSubprocedurePool taskManager) { + super(member, snapshot.getName(), errorListener, wakeFrequency, timeout); + this.snapshot = snapshot; + this.regions = regions; + this.taskManager = taskManager; + } + + /** + * Callable for adding files to snapshot manifest working dir. Ready for multithreading. + */ + private class RegionSnapshotTask implements Callable { + HRegion region; + RegionSnapshotTask(HRegion region) { + this.region = region; + } + + @Override + public Void call() throws Exception { + // Taking the region read lock prevents the individual region from being closed while a + // snapshot is in progress. This is helpful but not sufficient for preventing races with + // snapshots that involve multiple regions and regionservers. It is still possible to have + // an interleaving such that globally regions are missing, so we still need the verification + // step. + LOG.debug("Starting region operation on " + region); + region.startRegionOperation(); + try { + LOG.debug("Flush Snapshotting region " + region.toString() + " started..."); + region.flushcache(); + region.addRegionToSnapshot(snapshot, monitor); + LOG.debug("... Flush Snapshotting region " + region.toString() + " completed."); + } finally { + LOG.debug("Closing region operation on " + region); + region.closeRegionOperation(); + } + return null; + } + } + + private void flushSnapshot() throws ForeignException { + if (regions.isEmpty()) { + // No regions on this RS, we are basically done. + return; + } + + monitor.rethrowException(); + + // assert that the taskManager is empty. + if (taskManager.hasTasks()) { + throw new IllegalStateException("Attempting to take snapshot " + + SnapshotDescriptionUtils.toString(snapshot) + + " but we currently have outstanding tasks"); + } + + // Add all hfiles already existing in region. + for (HRegion region : regions) { + // submit one task per region for parallelize by region. + taskManager.submitTask(new RegionSnapshotTask(region)); + monitor.rethrowException(); + } + + // wait for everything to complete. + LOG.debug("Flush Snapshot Tasks submitted for " + regions.size() + " regions"); + try { + taskManager.waitForOutstandingTasks(); + } catch (InterruptedException e) { + throw new ForeignException(getMemberName(), e); + } + } + + /** + * do nothing, core of snapshot is executed in {@link #insideBarrier} step. + */ + @Override + public void acquireBarrier() throws ForeignException { + // NO OP + } + + /** + * do a flush snapshot of every region on this rs from the target table. + */ + @Override + public void insideBarrier() throws ForeignException { + flushSnapshot(); + } + + /** + * Cancel threads if they haven't finished. + */ + @Override + public void cleanup(Exception e) { + LOG.info("Aborting all online FLUSH snapshot subprocedure task threads for '" + + snapshot.getName() + "' due to error", e); + try { + taskManager.cancelTasks(); + } catch (InterruptedException e1) { + Thread.currentThread().interrupt(); + } + } + + /** + * Hooray! + */ + public void releaseBarrier() { + // NO OP + } + +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java (revision 0) @@ -0,0 +1,377 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.snapshot; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.DaemonThreadFactory; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.master.snapshot.MasterSnapshotVerifier; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; +import org.apache.hadoop.hbase.procedure.ProcedureMember; +import org.apache.hadoop.hbase.procedure.ProcedureMemberRpcs; +import org.apache.hadoop.hbase.procedure.Subprocedure; +import org.apache.hadoop.hbase.procedure.SubprocedureFactory; +import org.apache.hadoop.hbase.procedure.ZKProcedureMemberRpcs; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.RegionServerServices; +import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.zookeeper.KeeperException; + +import com.google.protobuf.InvalidProtocolBufferException; + +/** + * This manager class handles the work dealing with snapshots for a {@link HRegionServer}. + *

    + * This provides the mechanism necessary to kick off a online snapshot specific + * {@link Subprocedure} that is responsible for the regions being served by this region server. + * If any failures occur with the subprocedure, the RegionSeverSnapshotManager's subprocedure + * handler, {@link ProcedureMember}, notifies the master's ProcedureCoordinator to abort all + * others. + *

    + * On startup, requires {@link #start()} to be called. + *

    + * On shutdown, requires {@link #stop(boolean)} to be called + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class RegionServerSnapshotManager { + private static final Log LOG = LogFactory.getLog(RegionServerSnapshotManager.class); + + /** Maximum number of snapshot region tasks that can run concurrently */ + private static final String CONCURENT_SNAPSHOT_TASKS_KEY = "hbase.snapshot.region.concurrentTasks"; + private static final int DEFAULT_CONCURRENT_SNAPSHOT_TASKS = 3; + + /** Conf key for number of request threads to start snapshots on regionservers */ + public static final String SNAPSHOT_REQUEST_THREADS_KEY = "hbase.snapshot.region.pool.threads"; + /** # of threads for snapshotting regions on the rs. */ + public static final int SNAPSHOT_REQUEST_THREADS_DEFAULT = 10; + + /** Conf key for max time to keep threads in snapshot request pool waiting */ + public static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.region.timeout"; + /** Keep threads alive in request pool for max of 60 seconds */ + public static final long SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 60000; + + /** Conf key for millis between checks to see if snapshot completed or if there are errors*/ + public static final String SNAPSHOT_REQUEST_WAKE_MILLIS_KEY = "hbase.snapshot.region.wakefrequency"; + /** Default amount of time to check for errors while regions finish snapshotting */ + private static final long SNAPSHOT_REQUEST_WAKE_MILLIS_DEFAULT = 500; + + private final RegionServerServices rss; + private final ProcedureMemberRpcs memberRpcs; + private final ProcedureMember member; + + /** + * Exposed for testing. + * @param conf HBase configuration. + * @param parent parent running the snapshot handler + * @param memberRpc use specified memberRpc instance + * @param procMember use specified ProcedureMember + */ + RegionServerSnapshotManager(Configuration conf, HRegionServer parent, + ProcedureMemberRpcs memberRpc, ProcedureMember procMember) { + this.rss = parent; + this.memberRpcs = memberRpc; + this.member = procMember; + } + + /** + * Create a default snapshot handler - uses a zookeeper based member controller. + * @param rss region server running the handler + * @throws KeeperException if the zookeeper cluster cannot be reached + */ + public RegionServerSnapshotManager(RegionServerServices rss) + throws KeeperException { + this.rss = rss; + ZooKeeperWatcher zkw = rss.getZooKeeper(); + String nodeName = rss.getServerName().toString(); + this.memberRpcs = new ZKProcedureMemberRpcs(zkw, + SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, nodeName); + + // read in the snapshot request configuration properties + Configuration conf = rss.getConfiguration(); + long wakeMillis = conf.getLong(SNAPSHOT_REQUEST_WAKE_MILLIS_KEY, SNAPSHOT_REQUEST_WAKE_MILLIS_DEFAULT); + long keepAlive = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT); + int opThreads = conf.getInt(SNAPSHOT_REQUEST_THREADS_KEY, SNAPSHOT_REQUEST_THREADS_DEFAULT); + + // create the actual snapshot procedure member + ThreadPoolExecutor pool = ProcedureMember.defaultPool(wakeMillis, keepAlive, opThreads, nodeName); + this.member = new ProcedureMember(memberRpcs, pool, new SnapshotSubprocedureBuilder()); + } + + /** + * Start accepting snapshot requests. + */ + public void start() { + this.memberRpcs.start(member); + } + + /** + * Close this and all running snapshot tasks + * @param force forcefully stop all running tasks + * @throws IOException + */ + public void stop(boolean force) throws IOException { + String mode = force ? "abruptly" : "gracefully"; + LOG.info("Stopping RegionServerSnapshotManager " + mode + "."); + + try { + this.member.close(); + } finally { + this.memberRpcs.close(); + } + } + + /** + * If in a running state, creates the specified subprocedure for handling an online snapshot. + * + * Because this gets the local list of regions to snapshot and not the set the master had, + * there is a possibility of a race where regions may be missed. This detected by the master in + * the snapshot verification step. + * + * @param snapshot + * @return Subprocedure to submit to the ProcedureMemeber. + */ + public Subprocedure buildSubprocedure(SnapshotDescription snapshot) { + + // don't run a snapshot if the parent is stop(ping) + if (rss.isStopping() || rss.isStopped()) { + throw new IllegalStateException("Can't start snapshot on RS: " + rss.getServerName() + + ", because stopping/stopped!"); + } + + // check to see if this server is hosting any regions for the snapshots + // check to see if we have regions for the snapshot + List involvedRegions; + try { + involvedRegions = getRegionsToSnapshot(snapshot); + } catch (IOException e1) { + throw new IllegalStateException("Failed to figure out if we should handle a snapshot - " + + "something has gone awry with the online regions.", e1); + } + + // We need to run the subprocedure even if we have no relevant regions. The coordinator + // expects participation in the procedure and without sending message the snapshot attempt + // will hang and fail. + + LOG.debug("Launching subprocedure for snapshot " + snapshot.getName() + " from table " + + snapshot.getTable()); + ForeignExceptionDispatcher exnDispatcher = new ForeignExceptionDispatcher(); + Configuration conf = rss.getConfiguration(); + long timeoutMillis = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, + SNAPSHOT_TIMEOUT_MILLIS_DEFAULT); + long wakeMillis = conf.getLong(SNAPSHOT_REQUEST_WAKE_MILLIS_KEY, + SNAPSHOT_REQUEST_WAKE_MILLIS_DEFAULT); + + switch (snapshot.getType()) { + case FLUSH: + SnapshotSubprocedurePool taskManager = + new SnapshotSubprocedurePool(rss.getServerName().toString(), conf); + return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis, + timeoutMillis, involvedRegions, snapshot, taskManager); + default: + throw new UnsupportedOperationException("Unrecognized snapshot type:" + snapshot.getType()); + } + } + + /** + * Determine if the snapshot should be handled on this server + * + * NOTE: This is racy -- the master expects a list of regionservers. + * This means if a region moves somewhere between the calls we'll miss some regions. + * For example, a region move during a snapshot could result in a region to be skipped or done + * twice. This is manageable because the {@link MasterSnapshotVerifier} will double check the + * region lists after the online portion of the snapshot completes and will explicitly fail the + * snapshot. + * + * @param snapshot + * @return the list of online regions. Empty list is returned if no regions are responsible for + * the given snapshot. + * @throws IOException + */ + private List getRegionsToSnapshot(SnapshotDescription snapshot) throws IOException { + byte[] table = Bytes.toBytes(snapshot.getTable()); + return rss.getOnlineRegions(table); + } + + /** + * Build the actual snapshot runner that will do all the 'hard' work + */ + public class SnapshotSubprocedureBuilder implements SubprocedureFactory { + + @Override + public Subprocedure buildSubprocedure(String name, byte[] data) { + try { + // unwrap the snapshot information + SnapshotDescription snapshot = SnapshotDescription.parseFrom(data); + return RegionServerSnapshotManager.this.buildSubprocedure(snapshot); + } catch (InvalidProtocolBufferException e) { + throw new IllegalArgumentException("Could not read snapshot information from request."); + } + } + + } + + /** + * We use the SnapshotSubprocedurePool, a class specific thread pool instead of + * {@link org.apache.hadoop.hbase.executor.ExecutorService}. + * + * It uses a {@link java.util.concurrent.ExecutorCompletionService} which provides queuing of + * completed tasks which lets us efficiently cancel pending tasks upon the earliest operation + * failures. + * + * HBase's ExecutorService (different from {@link java.util.concurrent.ExecutorService}) isn't + * really built for coordinated tasks where multiple threads as part of one larger task. In + * RS's the HBase Executor services are only used for open and close and not other threadpooled + * operations such as compactions and replication sinks. + */ + static class SnapshotSubprocedurePool { + private final ExecutorCompletionService taskPool; + private final ThreadPoolExecutor executor; + private volatile boolean stopped; + private final List> futures = new ArrayList>(); + private final String name; + + SnapshotSubprocedurePool(String name, Configuration conf) { + // configure the executor service + long keepAlive = conf.getLong( + RegionServerSnapshotManager.SNAPSHOT_TIMEOUT_MILLIS_KEY, + RegionServerSnapshotManager.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT); + int threads = conf.getInt(CONCURENT_SNAPSHOT_TASKS_KEY, DEFAULT_CONCURRENT_SNAPSHOT_TASKS); + this.name = name; + executor = new ThreadPoolExecutor(1, threads, keepAlive, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue(), new DaemonThreadFactory("rs(" + + name + ")-snapshot-pool")); + taskPool = new ExecutorCompletionService(executor); + } + + boolean hasTasks() { + return futures.size() != 0; + } + + /** + * Submit a task to the pool. + * + * NOTE: all must be submitted before you can safely {@link #waitForOutstandingTasks()}. This + * version does not support issuing tasks from multiple concurrent table snapshots requests. + */ + void submitTask(final Callable task) { + Future f = this.taskPool.submit(task); + futures.add(f); + } + + /** + * Wait for all of the currently outstanding tasks submitted via {@link #submitTask(Callable)}. + * This *must* be called after all tasks are submitted via submitTask. + * + * @return true on success, false otherwise + * @throws InterruptedException + * @throws SnapshotCreationException if the snapshot failed while we were waiting + */ + boolean waitForOutstandingTasks() throws ForeignException, InterruptedException { + LOG.debug("Waiting for local region snapshots to finish."); + + int sz = futures.size(); + try { + // Using the completion service to process the futures that finish first first. + for (int i = 0; i < sz; i++) { + Future f = taskPool.take(); + f.get(); + if (!futures.remove(f)) { + LOG.warn("unexpected future" + f); + } + LOG.debug("Completed " + (i+1) + "/" + sz + " local region snapshots."); + } + LOG.debug("Completed " + sz + " local region snapshots."); + return true; + } catch (InterruptedException e) { + LOG.warn("Got InterruptedException in SnapshotSubprocedurePool", e); + if (!stopped) { + Thread.currentThread().interrupt(); + throw new ForeignException("SnapshotSubprocedurePool", e); + } + // we are stopped so we can just exit. + } catch (ExecutionException e) { + if (e.getCause() instanceof ForeignException) { + LOG.warn("Rethrowing ForeignException from SnapshotSubprocedurePool", e); + throw (ForeignException)e.getCause(); + } + LOG.warn("Got Exception in SnapshotSubprocedurePool", e); + throw new ForeignException(name, e.getCause()); + } finally { + cancelTasks(); + } + return false; + } + + /** + * This attempts to cancel out all pending and in progress tasks (interruptions issues) + * @throws InterruptedException + */ + void cancelTasks() throws InterruptedException { + Collection> tasks = futures; + LOG.debug("cancelling " + tasks.size() + " tasks for snapshot " + name); + for (Future f: tasks) { + // TODO Ideally we'd interrupt hbase threads when we cancel. However it seems that there + // are places in the HBase code where row/region locks are taken and not released in a + // finally block. Thus we cancel without interrupting. Cancellations will be slower to + // complete but we won't suffer from unreleased locks due to poor code discipline. + f.cancel(false); + } + + // evict remaining tasks and futures from taskPool. + LOG.debug(taskPool); + while (!futures.isEmpty()) { + // block to remove cancelled futures; + LOG.warn("Removing cancelled elements from taskPool"); + futures.remove(taskPool.take()); + } + stop(); + } + + /** + * Abruptly shutdown the thread pool. Call when exiting a region server. + */ + void stop() { + if (this.stopped) return; + + this.stopped = true; + this.executor.shutdownNow(); + } + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -96,6 +96,7 @@ import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterBase; @@ -111,12 +112,14 @@ import org.apache.hadoop.hbase.ipc.RpcCallContext; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; import org.apache.hadoop.hbase.regionserver.metrics.OperationMetrics; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLogKey; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CancelableProgressable; import org.apache.hadoop.hbase.util.ClassSize; @@ -746,9 +749,32 @@ * @throws IOException */ private void checkRegioninfoOnFilesystem() throws IOException { - Path regioninfoPath = new Path(this.regiondir, REGIONINFO_FILE); - if (this.fs.exists(regioninfoPath) && - this.fs.getFileStatus(regioninfoPath).getLen() > 0) { + checkRegioninfoOnFilesystem(this.regiondir); + } + + /** + * Write out an info file under the region directory. Useful recovering mangled regions. + * @param regiondir directory under which to write out the region info + * @throws IOException + */ + private void checkRegioninfoOnFilesystem(Path regiondir) throws IOException { + writeRegioninfoOnFilesystem(regionInfo, regiondir, getFilesystem(), conf); + } + + /** + * Write out an info file under the region directory. Useful recovering mangled regions. If the + * regioninfo already exists on disk and there is information in the file, then we fast exit. + * @param regionInfo information about the region + * @param regiondir directory under which to write out the region info + * @param fs {@link FileSystem} on which to write the region info + * @param conf {@link Configuration} from which to extract specific file locations + * @throws IOException on unexpected error. + */ + public static void writeRegioninfoOnFilesystem(HRegionInfo regionInfo, Path regiondir, + FileSystem fs, Configuration conf) throws IOException { + Path regioninfoPath = new Path(regiondir, REGIONINFO_FILE); + if (fs.exists(regioninfoPath) && + fs.getFileStatus(regioninfoPath).getLen() > 0) { return; } // Create in tmpdir and then move into place in case we crash after @@ -761,7 +787,7 @@ HConstants.DATA_FILE_UMASK_KEY); // and then create the file - Path tmpPath = new Path(getTmpDir(), REGIONINFO_FILE); + Path tmpPath = new Path(getTmpDir(regiondir), REGIONINFO_FILE); // if datanode crashes or if the RS goes down just before the close is called while trying to // close the created regioninfo file in the .tmp directory then on next @@ -774,10 +800,10 @@ FSDataOutputStream out = FSUtils.create(fs, tmpPath, perms); try { - this.regionInfo.write(out); + regionInfo.write(out); out.write('\n'); out.write('\n'); - out.write(Bytes.toBytes(this.regionInfo.toString())); + out.write(Bytes.toBytes(regionInfo.toString())); } finally { out.close(); } @@ -1194,9 +1220,13 @@ * will have its contents removed when the region is reopened. */ Path getTmpDir() { - return new Path(getRegionDir(), REGION_TEMP_SUBDIR); + return getTmpDir(getRegionDir()); } + static Path getTmpDir(Path regionDir) { + return new Path(regionDir, REGION_TEMP_SUBDIR); + } + void triggerMajorCompaction() { for (Store h: stores.values()) { h.triggerMajorCompaction(); @@ -2561,9 +2591,72 @@ /** - * Replaces any KV timestamps set to {@link HConstants#LATEST_TIMESTAMP} - * with the provided current timestamp. + * Complete taking the snapshot on the region. Writes the region info and adds references to the + * working snapshot directory. + * + * TODO for api consistency, consider adding another version with no {@link ForeignExceptionSnare} + * arg. (In the future other cancellable HRegion methods could eventually add a + * {@link ForeignExceptionSnare}, or we could do something fancier). + * + * @param desc snasphot description object + * @param exnSnare ForeignExceptionSnare that captures external exeptions in case we need to + * bail out. This is allowed to be null and will just be ignored in that case. + * @throws IOException if there is an external or internal error causing the snapshot to fail */ + public void addRegionToSnapshot(SnapshotDescription desc, + ForeignExceptionSnare exnSnare) throws IOException { + // This should be "fast" since we don't rewrite store files but instead + // back up the store files by creating a reference + Path rootDir = FSUtils.getRootDir(this.rsServices.getConfiguration()); + Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(desc, rootDir, + regionInfo.getEncodedName()); + + // 1. dump region meta info into the snapshot directory + LOG.debug("Storing region-info for snapshot."); + checkRegioninfoOnFilesystem(snapshotRegionDir); + + // 2. iterate through all the stores in the region + LOG.debug("Creating references for hfiles"); + + // This ensures that we have an atomic view of the directory as long as we have < ls limit + // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in + // batches and may miss files being added/deleted. This could be more robust (iteratively + // checking to see if we have all the files until we are sure), but the limit is currently 1000 + // files/batch, far more than the number of store files under a single column family. + for (Store store : stores.values()) { + // 2.1. build the snapshot reference directory for the store + Path dstStoreDir = TakeSnapshotUtils.getStoreSnapshotDirectory(snapshotRegionDir, + Bytes.toString(store.getFamily().getName())); + List storeFiles = store.getStorefiles(); + if (LOG.isDebugEnabled()) { + LOG.debug("Adding snapshot references for " + storeFiles + " hfiles"); + } + + // 2.2. iterate through all the store's files and create "references". + int sz = storeFiles.size(); + for (int i = 0; i < sz; i++) { + if (exnSnare != null) { + exnSnare.rethrowException(); + } + Path file = storeFiles.get(i).getPath(); + // create "reference" to this store file. It is intentionally an empty file -- all + // necessary infomration is captured by its fs location and filename. This allows us to + // only figure out what needs to be done via a single nn operation (instead of having to + // open and read the files as well). + LOG.debug("Creating reference for file (" + (i+1) + "/" + sz + ") : " + file); + Path referenceFile = new Path(dstStoreDir, file.getName()); + boolean success = fs.createNewFile(referenceFile); + if (!success) { + throw new IOException("Failed to create reference file:" + referenceFile); + } + } + } + } + + /** + * Replaces any KV timestamps set to {@link HConstants#LATEST_TIMESTAMP} with the provided current + * timestamp. + */ private void updateKVTimestamps( final Iterable> keyLists, final byte[] now) { for (List keys: keyLists) { @@ -4066,6 +4159,8 @@ Path regionDir = HRegion.getRegionDir(tableDir, info.getEncodedName()); FileSystem fs = FileSystem.get(conf); fs.mkdirs(regionDir); + // Write HRI to a file in case we need to recover .META. + writeRegioninfoOnFilesystem(info, regionDir, fs, conf); HLog effectiveHLog = hlog; if (hlog == null && !ignoreHLog) { effectiveHLog = new HLog(fs, new Path(regionDir, HConstants.HREGION_LOGDIR_NAME), @@ -4465,11 +4560,11 @@ } // delete out the 'A' region - HFileArchiver.archiveRegion(a.getConf(), fs, FSUtils.getRootDir(a.getConf()), a.getTableDir(), - a.getRegionDir()); + HFileArchiver.archiveRegion(fs, FSUtils.getRootDir(a.getConf()), + a.getTableDir(), a.getRegionDir()); // delete out the 'B' region - HFileArchiver.archiveRegion(b.getConf(), fs, FSUtils.getRootDir(b.getConf()), b.getTableDir(), - b.getRegionDir()); + HFileArchiver.archiveRegion(fs, FSUtils.getRootDir(b.getConf()), + b.getTableDir(), b.getRegionDir()); LOG.info("merge completed. New region is " + dstRegion); Index: src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (working copy) @@ -37,33 +37,37 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HDFSBlocksDistribution; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.io.HalfStoreFileReader; import org.apache.hadoop.hbase.io.Reference; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.io.hfile.HFileWriterV1; import org.apache.hadoop.hbase.io.hfile.HFileWriterV2; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; -import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; -import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; -import org.apache.hadoop.hbase.util.ChecksumType; import org.apache.hadoop.hbase.util.BloomFilter; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ChecksumType; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.io.RawComparator; @@ -150,6 +154,9 @@ // If this StoreFile references another, this is the other files path. private Path referencePath; + // If this storefile is a link to another, this is the link instance. + private HFileLink link; + // Block cache configuration and reference. private final CacheConfig cacheConf; @@ -194,14 +201,27 @@ */ private Map metadataMap; - /* - * Regex that will work for straight filenames and for reference names. - * If reference, then the regex has more than just one group. Group 1 is - * this files id. Group 2 the referenced region name, etc. + /** + * A non-capture group, for hfiles, so that this can be embedded. + * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix. */ - private static final Pattern REF_NAME_PARSER = - Pattern.compile("^([0-9a-f]+)(?:\\.(.+))?$"); + public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?"; + /** Regex that will work for hfiles */ + private static final Pattern HFILE_NAME_PATTERN = + Pattern.compile("^(" + HFILE_NAME_REGEX + ")"); + + /** + * Regex that will work for straight reference names (.) + * and hfilelink reference names (

    =-.) + * If reference, then the regex has more than just one group. + * Group 1, hfile/hfilelink pattern, is this file's id. + * Group 2 '(.+)' is the reference's parent region name. + */ + private static final Pattern REF_NAME_PATTERN = + Pattern.compile(String.format("^(%s|%s)\\.(.+)$", + HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX)); + // StoreFile.Reader private volatile Reader reader; @@ -244,9 +264,20 @@ this.dataBlockEncoder = dataBlockEncoder == null ? NoOpDataBlockEncoder.INSTANCE : dataBlockEncoder; - if (isReference(p)) { + + if (HFileLink.isHFileLink(p)) { + this.link = new HFileLink(conf, p); + LOG.debug("Store file " + p + " is a link"); + } else if (isReference(p)) { this.reference = Reference.read(fs, p); this.referencePath = getReferredToFile(this.path); + if (HFileLink.isHFileLink(this.referencePath)) { + this.link = new HFileLink(conf, this.referencePath); + } + LOG.debug("Store file " + p + " is a " + reference.getFileRegion() + + " reference to " + this.referencePath); + } else if (!isHFile(p)) { + throw new IOException("path=" + path + " doesn't look like a valid StoreFile"); } if (BloomFilterFactory.isGeneralBloomEnabled(conf)) { @@ -291,26 +322,32 @@ } /** + * @return true if this StoreFile is an HFileLink + */ + boolean isLink() { + return this.link != null && this.reference == null; + } + + private static boolean isHFile(final Path path) { + Matcher m = HFILE_NAME_PATTERN.matcher(path.getName()); + return m.matches() && m.groupCount() > 0; + } + + /** * @param p Path to check. * @return True if the path has format of a HStoreFile reference. */ public static boolean isReference(final Path p) { - return !p.getName().startsWith("_") && - isReference(p, REF_NAME_PARSER.matcher(p.getName())); + return isReference(p.getName()); } /** - * @param p Path to check. - * @param m Matcher to use. + * @param name file name to check. * @return True if the path has format of a HStoreFile reference. */ - public static boolean isReference(final Path p, final Matcher m) { - if (m == null || !m.matches()) { - LOG.warn("Failed match of store file name " + p.toString()); - throw new RuntimeException("Failed match of store file name " + - p.toString()); - } - return m.groupCount() > 1 && m.group(2) != null; + public static boolean isReference(final String name) { + Matcher m = REF_NAME_PATTERN.matcher(name); + return m.matches() && m.groupCount() > 1; } /* @@ -318,13 +355,13 @@ * hierarchy of ${hbase.rootdir}/tablename/regionname/familyname. * @param p Path to a Reference file. * @return Calculated path to parent region file. - * @throws IOException + * @throws IllegalArgumentException when path regex fails to match. */ public static Path getReferredToFile(final Path p) { - Matcher m = REF_NAME_PARSER.matcher(p.getName()); + Matcher m = REF_NAME_PATTERN.matcher(p.getName()); if (m == null || !m.matches()) { LOG.warn("Failed match of store file name " + p.toString()); - throw new RuntimeException("Failed match of store file name " + + throw new IllegalArgumentException("Failed match of store file name " + p.toString()); } // Other region name is suffix on the passed Reference file name @@ -332,6 +369,8 @@ // Tabledir is up two directories from where Reference was written. Path tableDir = p.getParent().getParent().getParent(); String nameStrippedOfSuffix = m.group(1); + LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix); + // Build up new path with the referenced region in place of our current // region in the reference path. Also strip regionname suffix from name. return new Path(new Path(new Path(tableDir, otherRegion), @@ -435,16 +474,15 @@ * If this estimate isn't good enough, we can improve it later. * @param fs The FileSystem * @param reference The reference - * @param reference The referencePath + * @param status The reference FileStatus * @return HDFS blocks distribution */ static private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( - FileSystem fs, Reference reference, Path referencePath) throws IOException { - if ( referencePath == null) { + FileSystem fs, Reference reference, FileStatus status) throws IOException { + if (status == null) { return null; } - FileStatus status = fs.getFileStatus(referencePath); long start = 0; long length = 0; @@ -459,35 +497,25 @@ } /** - * helper function to compute HDFS blocks distribution of a given file. - * For reference file, it is an estimate - * @param fs The FileSystem - * @param p The path of the file - * @return HDFS blocks distribution - */ - static public HDFSBlocksDistribution computeHDFSBlockDistribution( - FileSystem fs, Path p) throws IOException { - if (isReference(p)) { - Reference reference = Reference.read(fs, p); - Path referencePath = getReferredToFile(p); - return computeRefFileHDFSBlockDistribution(fs, reference, referencePath); - } else { - FileStatus status = fs.getFileStatus(p); - long length = status.getLen(); - return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length); - } - } - - - /** * compute HDFS block distribution, for reference file, it is an estimate */ private void computeHDFSBlockDistribution() throws IOException { if (isReference()) { + FileStatus status; + if (this.link != null) { + status = this.link.getFileStatus(fs); + } else { + status = fs.getFileStatus(this.referencePath); + } this.hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution( - this.fs, this.reference, this.referencePath); + this.fs, this.reference, status); } else { - FileStatus status = this.fs.getFileStatus(this.path); + FileStatus status; + if (isLink()) { + status = link.getFileStatus(fs); + } else { + status = this.fs.getFileStatus(path); + } long length = status.getLen(); this.hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution( this.fs, status, 0, length); @@ -505,9 +533,17 @@ throw new IllegalAccessError("Already open"); } if (isReference()) { - this.reader = new HalfStoreFileReader(this.fs, this.referencePath, - this.cacheConf, this.reference, - dataBlockEncoder.getEncodingInCache()); + if (this.link != null) { + this.reader = new HalfStoreFileReader(this.fs, this.referencePath, this.link, + this.cacheConf, this.reference, dataBlockEncoder.getEncodingInCache()); + } else { + this.reader = new HalfStoreFileReader(this.fs, this.referencePath, + this.cacheConf, this.reference, dataBlockEncoder.getEncodingInCache()); + } + } else if (isLink()) { + long size = link.getFileStatus(fs).getLen(); + this.reader = new Reader(this.fs, this.path, link, size, this.cacheConf, + dataBlockEncoder.getEncodingInCache(), true); } else { this.reader = new Reader(this.fs, this.path, this.cacheConf, dataBlockEncoder.getEncodingInCache()); @@ -875,6 +911,10 @@ * @return true if the file could be a valid store file, false otherwise */ public static boolean validateStoreFileName(String fileName) { + if (HFileLink.isHFileLink(fileName)) + return true; + if (isReference(fileName)) + return true; return !fileName.contains("-"); } @@ -899,7 +939,7 @@ // A reference to the bottom half of the hsf store file. Reference r = new Reference(splitRow, range); // Add the referred-to regions name as a dot separated suffix. - // See REF_NAME_PARSER regex above. The referred-to regions name is + // See REF_NAME_REGEX regex above. The referred-to regions name is // up in the path of the passed in f -- parentdir is family, // then the directory above is the region name. String parentRegionName = f.getPath().getParent().getParent().getName(); @@ -1263,6 +1303,23 @@ bloomFilterType = BloomType.NONE; } + public Reader(FileSystem fs, Path path, HFileLink hfileLink, long size, + CacheConfig cacheConf, DataBlockEncoding preferredEncodingInCache, + boolean closeIStream) throws IOException { + super(path); + + FSDataInputStream in = hfileLink.open(fs); + FSDataInputStream inNoChecksum = in; + if (fs instanceof HFileSystem) { + FileSystem noChecksumFs = ((HFileSystem)fs).getNoChecksumFs(); + inNoChecksum = hfileLink.open(noChecksumFs); + } + + reader = HFile.createReaderWithEncoding(fs, path, in, inNoChecksum, + size, cacheConf, preferredEncodingInCache, closeIStream); + bloomFilterType = BloomType.NONE; + } + /** * ONLY USE DEFAULT CONSTRUCTOR FOR UNIT TESTS */ Index: src/main/java/org/apache/hadoop/hbase/regionserver/Store.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (working copy) @@ -54,6 +54,7 @@ import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.Compression; @@ -336,11 +337,30 @@ */ public static Path getStoreHomedir(final Path tabledir, final String encodedName, final byte [] family) { - return new Path(tabledir, new Path(encodedName, - new Path(Bytes.toString(family)))); + return getStoreHomedir(tabledir, encodedName, Bytes.toString(family)); + } + + /** + * @param tabledir + * @param encodedName Encoded region name. + * @param family + * @return Path to family/Store home directory. + */ + public static Path getStoreHomedir(final Path tabledir, + final String encodedName, final String family) { + return new Path(tabledir, new Path(encodedName, new Path(family))); } /** + * @param parentRegionDirectory directory for the parent region + * @param family family name of this store + * @return Path to the family/Store home directory + */ + public static Path getStoreHomedir(final Path parentRegionDirectory, final byte[] family) { + return new Path(parentRegionDirectory, new Path(Bytes.toString(family))); + } + + /** * Return the directory in which this store stores its * StoreFiles */ @@ -393,9 +413,10 @@ continue; } final Path p = files[i].getPath(); - // Check for empty file. Should never be the case but can happen + // Check for empty hfile. Should never be the case but can happen // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646 - if (this.fs.getFileStatus(p).getLen() <= 0) { + // NOTE: that the HFileLink is just a name, so it's an empty file. + if (!HFileLink.isHFileLink(p) && this.fs.getFileStatus(p).getLen() <= 0) { LOG.warn("Skipping " + p + " because its empty. HBASE-646 DATA LOSS?"); continue; } Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -141,6 +141,7 @@ import org.apache.hadoop.hbase.regionserver.metrics.RegionServerMetrics; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.StoreMetricType; +import org.apache.hadoop.hbase.regionserver.snapshot.RegionServerSnapshotManager; import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; @@ -459,6 +460,9 @@ cacheConfig = new CacheConfig(conf); } + /** Handle all the snapshot requests to this server */ + RegionServerSnapshotManager snapshotManager; + /** * Run test on configured codecs to make sure supporting libs are in place. * @param c @@ -631,6 +635,13 @@ // Create the catalog tracker and start it; this.catalogTracker = new CatalogTracker(this.zooKeeper, this.conf, this); catalogTracker.start(); + + // watch for snapshots + try { + this.snapshotManager = new RegionServerSnapshotManager(this); + } catch (KeeperException e) { + this.abort("Failed to reach zk cluster when creating snapshot handler."); + } } /** @@ -717,6 +728,9 @@ } registerMBean(); + // start the snapshot handler, since the server is ready to run + this.snapshotManager.start(); + // We registered with the Master. Go into run mode. long lastMsg = 0; long oldRequestCount = -1; @@ -796,6 +810,12 @@ this.healthCheckChore.interrupt(); } + try { + if (snapshotManager != null) snapshotManager.stop(this.abortRequested); + } catch (IOException e) { + LOG.warn("Failed to close snapshot handler cleanly", e); + } + if (this.killed) { // Just skip out w/o closing regions. Used when testing. } else if (abortRequested) { @@ -812,6 +832,13 @@ // handlers are stuck waiting on meta or root. if (this.catalogTracker != null) this.catalogTracker.stop(); + // stop the snapshot handler, forcefully killing all running tasks + try { + if (snapshotManager != null) snapshotManager.stop(this.abortRequested || this.killed); + } catch (IOException e) { + LOG.warn("Failed to close snapshot handler cleanly", e); + } + // Closing the compactSplit thread before closing meta regions if (!this.killed && containsMetaTableRegions()) { if (!abortRequested || this.fsOk) { Index: src/main/java/org/apache/hadoop/hbase/HRegionInfo.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HRegionInfo.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/HRegionInfo.java (working copy) @@ -80,6 +80,9 @@ private static final int ENC_SEPARATOR = '.'; public static final int MD5_HEX_LENGTH = 32; + /** A non-capture group so that this can be embedded. */ + public static final String ENCODED_REGION_NAME_REGEX = "(?:[a-f0-9]+)"; + /** * Does region name contain its encoded name? * @param regionName region name Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -76,6 +76,8 @@ private final Path oldLogDir; // root hbase directory on the FS private final Path rootdir; + // hbase temp directory used for table construction and deletion + private final Path tempdir; // create the split log lock final Lock splitLogLock = new ReentrantLock(); final boolean distributedLogSplitting; @@ -94,6 +96,7 @@ // default localfs. Presumption is that rootdir is fully-qualified before // we get to here with appropriate fs scheme. this.rootdir = FSUtils.getRootDir(conf); + this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY); // Cover both bases, the old way of setting default fs and the new. // We're supposed to run on 0.20 and 0.21 anyways. this.fs = this.rootdir.getFileSystem(conf); @@ -130,6 +133,9 @@ // check if the root directory exists checkRootDir(this.rootdir, conf, this.fs); + // check if temp directory exists and clean it + checkTempDir(this.tempdir, conf, this.fs); + Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); // Make sure the region servers can archive their old logs @@ -178,6 +184,13 @@ } /** + * @return HBase temp dir. + */ + public Path getTempDir() { + return this.tempdir; + } + + /** * @return The unique identifier generated for this cluster */ public String getClusterId() { @@ -385,6 +398,32 @@ } } + /** + * Make sure the hbase temp directory exists and is empty. + * NOTE that this method is only executed once just after the master becomes the active one. + */ + private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs) + throws IOException { + // If the temp directory exists, clear the content (left over, from the previous run) + if (fs.exists(tmpdir)) { + // Archive table in temp, maybe left over from failed deletion, + // if not the cleaner will take care of them. + for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) { + for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) { + HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + } + } + if (!fs.delete(tmpdir, true)) { + throw new IOException("Unable to clean the temp directory: " + tmpdir); + } + } + + // Create the temp directory + if (!fs.mkdirs(tmpdir)) { + throw new IOException("HBase temp directory '" + tmpdir + "' creation failure."); + } + } + private static void bootstrap(final Path rd, final Configuration c) throws IOException { LOG.info("BOOTSTRAP: creating ROOT and first META regions"); @@ -451,6 +490,37 @@ fs.delete(new Path(rootdir, Bytes.toString(tableName)), true); } + /** + * Move the specified file/directory to the hbase temp directory. + * @param path The path of the file/directory to move + * @return The temp location of the file/directory moved + * @throws IOException in case of file-system failure + */ + public Path moveToTemp(final Path path) throws IOException { + Path tempPath = new Path(this.tempdir, path.getName()); + + // Ensure temp exists + if (!fs.exists(tempdir) && !fs.mkdirs(tempdir)) { + throw new IOException("HBase temp directory '" + tempdir + "' creation failure."); + } + + if (!fs.rename(path, tempPath)) { + throw new IOException("Unable to move '" + path + "' to temp '" + tempPath + "'"); + } + + return tempPath; + } + + /** + * Move the specified table to the hbase temp directory + * @param tableName Table name to move + * @return The temp location of the table moved + * @throws IOException in case of file-system failure + */ + public Path moveTableToTemp(byte[] tableName) throws IOException { + return moveToTemp(HTableDescriptor.getTableDir(this.rootdir, tableName)); + } + public void updateRegionInfo(HRegionInfo region) { // TODO implement this. i think this is currently broken in trunk i don't // see this getting updated. Index: src/main/java/org/apache/hadoop/hbase/master/SnapshotSentinel.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/SnapshotSentinel.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/SnapshotSentinel.java (revision 0) @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; + +/** + * Watch the current snapshot under process + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public interface SnapshotSentinel { + + /** + * Check to see if the snapshot is finished, where finished may be success or failure. + * @return false if the snapshot is still in progress, true if the snapshot has + * finished + */ + public boolean isFinished(); + + /** + * Actively cancel a running snapshot. + * @param why Reason for cancellation. + */ + public void cancel(String why); + + /** + * @return the description of the snapshot being run + */ + public SnapshotDescription getSnapshot(); + + /** + * Get the exception that caused the snapshot to fail, if the snapshot has failed. + * @return {@link ForeignException} that caused the snapshot to fail, or null if the + * snapshot is still in progress or has succeeded + */ + public ForeignException getExceptionIfFailed(); + +} Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -44,6 +44,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.ClusterStatus; @@ -92,13 +93,18 @@ import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler; import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; +import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.monitoring.TaskMonitor; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import org.apache.hadoop.hbase.replication.regionserver.Replication; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.HasThread; import org.apache.hadoop.hbase.util.InfoServer; @@ -225,6 +231,9 @@ private long masterStartTime; private long masterActiveTime; + // monitor for snapshot of hbase tables + private SnapshotManager snapshotManager; + /** * MX Bean for MasterInfo */ @@ -406,6 +415,7 @@ if (this.serverManager != null) this.serverManager.stop(); if (this.assignmentManager != null) this.assignmentManager.stop(); if (this.fileSystemManager != null) this.fileSystemManager.stop(); + if (this.snapshotManager != null) this.snapshotManager.stop("server shutting down."); this.zooKeeper.close(); } LOG.info("HMaster main thread exiting"); @@ -467,6 +477,9 @@ ", sessionid=0x" + Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) + ", cluster-up flag was=" + wasUp); + + // create the snapshot manager + this.snapshotManager = new SnapshotManager(this); } // Check if we should stop every second. @@ -1989,4 +2002,125 @@ String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC); return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation); } + + /** + * Exposed for TESTING! + * @return the underlying snapshot manager + */ + public SnapshotManager getSnapshotManagerForTesting() { + return this.snapshotManager; + } + + + /** + * Triggers an asynchronous attempt to take a snapshot. + * {@inheritDoc} + */ + @Override + public long snapshot(final HSnapshotDescription request) throws IOException { + LOG.debug("Submitting snapshot request for:" + + SnapshotDescriptionUtils.toString(request.getProto())); + try { + this.snapshotManager.checkSnapshotSupport(); + } catch (UnsupportedOperationException e) { + throw new IOException(e); + } + + // get the snapshot information + SnapshotDescription snapshot = SnapshotDescriptionUtils.validate(request.getProto(), + this.conf); + + snapshotManager.takeSnapshot(snapshot); + + // send back the max amount of time the client should wait for the snapshot to complete + long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(conf, snapshot.getType(), + SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME); + return waitTime; + } + + /** + * List the currently available/stored snapshots. Any in-progress snapshots are ignored + */ + @Override + public List getCompletedSnapshots() throws IOException { + List availableSnapshots = new ArrayList(); + List snapshots = snapshotManager.getCompletedSnapshots(); + + // convert to writables + for (SnapshotDescription snapshot: snapshots) { + availableSnapshots.add(new HSnapshotDescription(snapshot)); + } + + return availableSnapshots; + } + + /** + * Execute Delete Snapshot operation. + * @throws ServiceException wrapping SnapshotDoesNotExistException if specified snapshot did not + * exist. + */ + @Override + public void deleteSnapshot(final HSnapshotDescription request) throws IOException { + try { + this.snapshotManager.checkSnapshotSupport(); + } catch (UnsupportedOperationException e) { + throw new IOException(e); + } + + snapshotManager.deleteSnapshot(request.getProto()); + } + + /** + * Checks if the specified snapshot is done. + * @return true if the snapshot is in file system ready to use, + * false if the snapshot is in the process of completing + * @throws ServiceException wrapping UnknownSnapshotException if invalid snapshot, or + * a wrapped HBaseSnapshotException with progress failure reason. + */ + @Override + public boolean isSnapshotDone(final HSnapshotDescription request) throws IOException { + LOG.debug("Checking to see if snapshot from request:" + + SnapshotDescriptionUtils.toString(request.getProto()) + " is done"); + return snapshotManager.isSnapshotDone(request.getProto()); + } + + /** + * Execute Restore/Clone snapshot operation. + * + *

    If the specified table exists a "Restore" is executed, replacing the table + * schema and directory data with the content of the snapshot. + * The table must be disabled, or a UnsupportedOperationException will be thrown. + * + *

    If the table doesn't exist a "Clone" is executed, a new table is created + * using the schema at the time of the snapshot, and the content of the snapshot. + * + *

    The restore/clone operation does not require copying HFiles. Since HFiles + * are immutable the table can point to and use the same files as the original one. + */ + @Override + public void restoreSnapshot(final HSnapshotDescription request) throws IOException { + try { + this.snapshotManager.checkSnapshotSupport(); + } catch (UnsupportedOperationException e) { + throw new IOException(e); + } + + snapshotManager.restoreSnapshot(request.getProto()); + } + + /** + * Returns the status of the requested snapshot restore/clone operation. + * This method is not exposed to the user, it is just used internally by HBaseAdmin + * to verify if the restore is completed. + * + * No exceptions are thrown if the restore is not running, the result will be "done". + * + * @return done true if the restore/clone operation is completed. + * @throws RestoreSnapshotExcepton if the operation failed. + */ + @Override + public boolean isRestoreSnapshotDone(final HSnapshotDescription request) throws IOException { + return !snapshotManager.isRestoringTable(request.getProto()); + } } + Index: src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java (working copy) @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.coprocessor.*; import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import java.io.IOException; @@ -629,4 +630,146 @@ } } } + + public void preSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).preSnapshot(ctx, snapshot, hTableDescriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } + + public void postSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).postSnapshot(ctx, snapshot, hTableDescriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } + + public void preCloneSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).preCloneSnapshot(ctx, snapshot, hTableDescriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } + + public void postCloneSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).postCloneSnapshot(ctx, snapshot, hTableDescriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } + + public void preRestoreSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).preRestoreSnapshot(ctx, snapshot, hTableDescriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } + + public void postRestoreSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).postRestoreSnapshot(ctx, snapshot, hTableDescriptor); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } + + public void preDeleteSnapshot(final SnapshotDescription snapshot) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).preDeleteSnapshot(ctx, snapshot); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } + + public void postDeleteSnapshot(final SnapshotDescription snapshot) throws IOException { + ObserverContext ctx = null; + for (MasterEnvironment env: coprocessors) { + if (env.getInstance() instanceof MasterObserver) { + ctx = ObserverContext.createAndPrepare(env, ctx); + try { + ((MasterObserver)env.getInstance()).postDeleteSnapshot(ctx, snapshot); + } catch (Throwable e) { + handleCoprocessorThrowable(env, e); + } + if (ctx.shouldComplete()) { + break; + } + } + } + } } Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/EnabledTableSnapshotHandler.java (revision 0) @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.procedure.Procedure; +import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; +import org.apache.hadoop.hbase.util.Pair; + +import com.google.common.collect.Lists; + +/** + * Handle the master side of taking a snapshot of an online table, regardless of snapshot type. + * Uses a {@link Procedure} to run the snapshot across all the involved region servers. + * @see ProcedureCoordinator + */ +@InterfaceAudience.Private +public class EnabledTableSnapshotHandler extends TakeSnapshotHandler { + + private static final Log LOG = LogFactory.getLog(EnabledTableSnapshotHandler.class); + private final ProcedureCoordinator coordinator; + + public EnabledTableSnapshotHandler(SnapshotDescription snapshot, MasterServices master, + SnapshotManager manager) throws IOException { + super(snapshot, master); + this.coordinator = manager.getCoordinator(); + } + + // TODO consider switching over to using regionnames, rather than server names. This would allow + // regions to migrate during a snapshot, and then be involved when they are ready. Still want to + // enforce a snapshot time constraints, but lets us be potentially a bit more robust. + + /** + * This method kicks off a snapshot procedure. Other than that it hangs around for various + * phases to complete. + */ + @Override + protected void snapshotRegions(List> regions) + throws HBaseSnapshotException { + Set regionServers = new HashSet(regions.size()); + for (Pair region : regions) { + regionServers.add(region.getSecond().toString()); + } + + // start the snapshot on the RS + Procedure proc = coordinator.startProcedure(this.monitor, this.snapshot.getName(), + this.snapshot.toByteArray(), Lists.newArrayList(regionServers)); + if (proc == null) { + String msg = "Failed to submit distributed procedure for snapshot '" + + snapshot.getName() + "'"; + LOG.error(msg); + throw new HBaseSnapshotException(msg); + } + + try { + // wait for the snapshot to complete. A timer thread is kicked off that should cancel this + // if it takes too long. + proc.waitForCompleted(); + LOG.info("Done waiting - snapshot for " + this.snapshot.getName() + " finished!"); + } catch (InterruptedException e) { + ForeignException ee = + new ForeignException("Interrupted while waiting for snapshot to finish", e); + monitor.receive(ee); + Thread.currentThread().interrupt(); + } catch (ForeignException e) { + monitor.receive(e); + } + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/RestoreSnapshotHandler.java (revision 0) @@ -0,0 +1,155 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CancellationException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.catalog.CatalogTracker; +import org.apache.hadoop.hbase.catalog.MetaEditor; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.SnapshotSentinel; +import org.apache.hadoop.hbase.master.handler.TableEventHandler; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Handler to Restore a snapshot. + * + *

    Uses {@link RestoreSnapshotHelper} to replace the table content with the + * data available in the snapshot. + */ +@InterfaceAudience.Private +public class RestoreSnapshotHandler extends TableEventHandler implements SnapshotSentinel { + private static final Log LOG = LogFactory.getLog(RestoreSnapshotHandler.class); + + private final HTableDescriptor hTableDescriptor; + private final SnapshotDescription snapshot; + + private final ForeignExceptionDispatcher monitor; + private volatile boolean stopped = false; + + public RestoreSnapshotHandler(final MasterServices masterServices, + final SnapshotDescription snapshot, final HTableDescriptor htd) + throws IOException { + super(EventType.C_M_RESTORE_SNAPSHOT, htd.getName(), masterServices, masterServices); + + // Snapshot information + this.snapshot = snapshot; + + // Monitor + this.monitor = new ForeignExceptionDispatcher(); + + // Check table exists. + getTableDescriptor(); + + // This is the new schema we are going to write out as this modification. + this.hTableDescriptor = htd; + } + + /** + * The restore table is executed in place. + * - The on-disk data will be restored - reference files are put in place without moving data + * - [if something fail here: you need to delete the table and re-run the restore] + * - META will be updated + * - [if something fail here: you need to run hbck to fix META entries] + * The passed in list gets changed in this method + */ + @Override + protected void handleTableOperation(List hris) throws IOException { + MasterFileSystem fileSystemManager = masterServices.getMasterFileSystem(); + CatalogTracker catalogTracker = masterServices.getCatalogTracker(); + FileSystem fs = fileSystemManager.getFileSystem(); + Path rootDir = fileSystemManager.getRootDir(); + byte[] tableName = hTableDescriptor.getName(); + Path tableDir = HTableDescriptor.getTableDir(rootDir, tableName); + + try { + // 1. Update descriptor + this.masterServices.getTableDescriptors().add(hTableDescriptor); + + // 2. Execute the on-disk Restore + LOG.debug("Starting restore snapshot=" + SnapshotDescriptionUtils.toString(snapshot)); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper( + masterServices.getConfiguration(), fs, + snapshot, snapshotDir, hTableDescriptor, tableDir, monitor); + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); + + // 3. Applies changes to .META. + hris.clear(); + if (metaChanges.hasRegionsToAdd()) hris.addAll(metaChanges.getRegionsToAdd()); + if (metaChanges.hasRegionsToRestore()) hris.addAll(metaChanges.getRegionsToRestore()); + List hrisToRemove = metaChanges.getRegionsToRemove(); + MetaEditor.mutateRegions(catalogTracker, hrisToRemove, hris); + + // At this point the restore is complete. Next step is enabling the table. + LOG.info("Restore snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + " on table=" + + Bytes.toString(tableName) + " completed!"); + } catch (IOException e) { + String msg = "restore snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + + " failed. Try re-running the restore command."; + LOG.error(msg, e); + monitor.receive(new ForeignException(masterServices.getServerName().toString(), e)); + throw new RestoreSnapshotException(msg, e); + } finally { + this.stopped = true; + } + } + + @Override + public boolean isFinished() { + return this.stopped; + } + + @Override + public SnapshotDescription getSnapshot() { + return snapshot; + } + + @Override + public void cancel(String why) { + if (this.stopped) return; + this.stopped = true; + String msg = "Stopping restore snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + + " because: " + why; + LOG.info(msg); + CancellationException ce = new CancellationException(why); + this.monitor.receive(new ForeignException(masterServices.getServerName().toString(), ce)); + } + + public ForeignException getExceptionIfFailed() { + return this.monitor.getException(); + } +} Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotHFileCleaner.java (revision 0) @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Implementation of a file cleaner that checks if a hfile is still used by snapshots of HBase + * tables. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate { + private static final Log LOG = LogFactory.getLog(SnapshotHFileCleaner.class); + + /** + * Conf key for the frequency to attempt to refresh the cache of hfiles currently used in + * snapshots (ms) + */ + public static final String HFILE_CACHE_REFRESH_PERIOD_CONF_KEY = + "hbase.master.hfilecleaner.plugins.snapshot.period"; + + /** Refresh cache, by default, every 5 minutes */ + private static final long DEFAULT_HFILE_CACHE_REFRESH_PERIOD = 300000; + + /** File cache for HFiles in the completed and currently running snapshots */ + private SnapshotFileCache cache; + + @Override + public synchronized boolean isFileDeletable(Path filePath) { + try { + return !cache.contains(filePath.getName()); + } catch (IOException e) { + LOG.error("Exception while checking if:" + filePath + " was valid, keeping it just in case.", + e); + return false; + } + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + try { + long cacheRefreshPeriod = conf.getLong(HFILE_CACHE_REFRESH_PERIOD_CONF_KEY, + DEFAULT_HFILE_CACHE_REFRESH_PERIOD); + final FileSystem fs = FSUtils.getCurrentFileSystem(conf); + Path rootDir = FSUtils.getRootDir(conf); + cache = new SnapshotFileCache(fs, rootDir, cacheRefreshPeriod, cacheRefreshPeriod, + "snapshot-hfile-cleaner-cache-refresher", new SnapshotFileCache.SnapshotFileInspector() { + public Collection filesUnderSnapshot(final Path snapshotDir) + throws IOException { + return SnapshotReferenceUtil.getHFileNames(fs, snapshotDir); + } + }); + } catch (IOException e) { + LOG.error("Failed to create cleaner util", e); + } + } + + @Override + public void stop(String why) { + this.cache.stop(why); + } + + @Override + public boolean isStopped() { + return this.cache.isStopped(); + } + + /** + * Exposed for Testing! + * @return the cache of all hfiles + */ + public SnapshotFileCache getFileCacheForTesting() { + return this.cache; + } +} Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java (revision 0) @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CancellationException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare; +import org.apache.hadoop.hbase.executor.EventHandler; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.SnapshotSentinel; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.zookeeper.KeeperException; + +/** + * A handler for taking snapshots from the master. + * + * This is not a subclass of TableEventHandler because using that would incur an extra META scan. + * + * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor. + */ +@InterfaceAudience.Private +public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel, + ForeignExceptionSnare { + private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class); + + private volatile boolean finished; + + // none of these should ever be null + protected final MasterServices master; + protected final SnapshotDescription snapshot; + protected final Configuration conf; + protected final FileSystem fs; + protected final Path rootDir; + private final Path snapshotDir; + protected final Path workingDir; + private final MasterSnapshotVerifier verifier; + protected final ForeignExceptionDispatcher monitor; + + /** + * @param snapshot descriptor of the snapshot to take + * @param masterServices master services provider + * @throws IOException on unexpected error + */ + public TakeSnapshotHandler(SnapshotDescription snapshot, + final MasterServices masterServices) throws IOException { + super(masterServices, EventType.C_M_SNAPSHOT_TABLE); + assert snapshot != null : "SnapshotDescription must not be nul1"; + assert masterServices != null : "MasterServices must not be nul1"; + + this.master = masterServices; + this.snapshot = snapshot; + this.conf = this.master.getConfiguration(); + this.fs = this.master.getMasterFileSystem().getFileSystem(); + this.rootDir = this.master.getMasterFileSystem().getRootDir(); + this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); + this.monitor = new ForeignExceptionDispatcher(); + + loadTableDescriptor(); // check that .tableinfo is present + + // prepare the verify + this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, rootDir); + } + + private HTableDescriptor loadTableDescriptor() + throws FileNotFoundException, IOException { + final String name = snapshot.getTable(); + HTableDescriptor htd = + this.master.getTableDescriptors().get(name); + if (htd == null) { + throw new IOException("HTableDescriptor missing for " + name); + } + return htd; + } + + /** + * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)} + * call should get implemented for each snapshot flavor. + */ + @Override + public void process() { + LOG.info("Running table snapshot operation " + eventType + " on table " + snapshot.getTable()); + try { + // If regions move after this meta scan, the region specific snapshot should fail, triggering + // an external exception that gets captured here. + + // write down the snapshot info in the working directory + SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, this.fs); + new TableInfoCopyTask(monitor, snapshot, fs, rootDir).call(); + monitor.rethrowException(); + + List> regionsAndLocations = + MetaReader.getTableRegionsAndLocations(this.server.getCatalogTracker(), + Bytes.toBytes(snapshot.getTable()), true); + + // run the snapshot + snapshotRegions(regionsAndLocations); + + // extract each pair to separate lists + Set serverNames = new HashSet(); + for (Pair p : regionsAndLocations) { + serverNames.add(p.getSecond().toString()); + } + + // verify the snapshot is valid + verifier.verifySnapshot(this.workingDir, serverNames); + + // complete the snapshot, atomically moving from tmp to .snapshot dir. + completeSnapshot(this.snapshotDir, this.workingDir, this.fs); + } catch (Exception e) { + String reason = "Failed taking snapshot " + SnapshotDescriptionUtils.toString(snapshot) + + " due to exception:" + e.getMessage(); + LOG.error(reason, e); + ForeignException ee = new ForeignException(reason, e); + monitor.receive(ee); + // need to mark this completed to close off and allow cleanup to happen. + cancel("Failed to take snapshot '" + SnapshotDescriptionUtils.toString(snapshot) + + "' due to exception"); + } finally { + LOG.debug("Launching cleanup of working dir:" + workingDir); + try { + // if the working dir is still present, the snapshot has failed. it is present we delete + // it. + if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) { + LOG.error("Couldn't delete snapshot working directory:" + workingDir); + } + } catch (IOException e) { + LOG.error("Couldn't delete snapshot working directory:" + workingDir); + } + } + } + + /** + * Reset the manager to allow another snapshot to proceed + * + * @param snapshotDir final path of the snapshot + * @param workingDir directory where the in progress snapshot was built + * @param fs {@link FileSystem} where the snapshot was built + * @throws SnapshotCreationException if the snapshot could not be moved + * @throws IOException the filesystem could not be reached + */ + public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs) + throws SnapshotCreationException, IOException { + LOG.debug("Sentinel is done, just moving the snapshot from " + workingDir + " to " + + snapshotDir); + if (!fs.rename(workingDir, snapshotDir)) { + throw new SnapshotCreationException("Failed to move working directory(" + workingDir + + ") to completed directory(" + snapshotDir + ")."); + } + finished = true; + } + + /** + * Snapshot the specified regions + */ + protected abstract void snapshotRegions(List> regions) + throws IOException, KeeperException; + + @Override + public void cancel(String why) { + if (finished) return; + + this.finished = true; + LOG.info("Stop taking snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + " because: " + + why); + CancellationException ce = new CancellationException(why); + monitor.receive(new ForeignException(master.getServerName().toString(), ce)); + } + + @Override + public boolean isFinished() { + return finished; + } + + @Override + public SnapshotDescription getSnapshot() { + return snapshot; + } + + @Override + public ForeignException getExceptionIfFailed() { + return monitor.getException(); + } + + @Override + public void rethrowException() throws ForeignException { + monitor.rethrowException(); + } + + @Override + public boolean hasException() { + return monitor.hasException(); + } + + @Override + public ForeignException getException() { + return monitor.getException(); + } + +} Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java (revision 0) @@ -0,0 +1,308 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.Timer; +import java.util.TimerTask; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Intelligently keep track of all the files for all the snapshots. + *

    + * A cache of files is kept to avoid querying the {@link FileSystem} frequently. If there is a cache + * miss the directory modification time is used to ensure that we don't rescan directories that we + * already have in cache. We only check the modification times of the snapshot directories + * (/hbase/.snapshot/[snapshot_name]) to determine if the files need to be loaded into the cache. + *

    + * New snapshots will be added to the cache and deleted snapshots will be removed when we refresh + * the cache. If the files underneath a snapshot directory are changed, but not the snapshot itself, + * we will ignore updates to that snapshot's files. + *

    + * This is sufficient because each snapshot has its own directory and is added via an atomic rename + * once, when the snapshot is created. We don't need to worry about the data in the snapshot + * being run. + *

    + * Further, the cache is periodically refreshed ensure that files in snapshots that were deleted are + * also removed from the cache. + *

    + * A SnapshotFileInspector must be passed when creating this to allow extraction + * of files under the /hbase/.snapshot/[snapshot name] directory, for each snapshot. + * This allows you to only cache files under, for instance, all the logs in the .logs directory or + * all the files under all the regions. + *

    + * this also considers all running snapshots (those under /hbase/.snapshot/.tmp) as valid + * snapshots and will attempt to cache files from those snapshots as well. + *

    + * Queries about a given file are thread-safe with respect to multiple queries and cache refreshes. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class SnapshotFileCache implements Stoppable { + interface SnapshotFileInspector { + /** + * Returns a collection of file names needed by the snapshot. + * @param snapshotDir {@link Path} to the snapshot directory to scan. + * @return the collection of file names needed by the snapshot. + */ + Collection filesUnderSnapshot(final Path snapshotDir) throws IOException; + } + + private static final Log LOG = LogFactory.getLog(SnapshotFileCache.class); + private volatile boolean stop = false; + private final FileSystem fs; + private final SnapshotFileInspector fileInspector; + private final Path snapshotDir; + private final Set cache = new HashSet(); + /** + * This is a helper map of information about the snapshot directories so we don't need to rescan + * them if they haven't changed since the last time we looked. + */ + private final Map snapshots = + new HashMap(); + private final Timer refreshTimer; + + private long lastModifiedTime = Long.MIN_VALUE; + + /** + * Create a snapshot file cache for all snapshots under the specified [root]/.snapshot on the + * filesystem. + *

    + * Immediately loads the file cache. + * @param conf to extract the configured {@link FileSystem} where the snapshots are stored and + * hbase root directory + * @param cacheRefreshPeriod frequency (ms) with which the cache should be refreshed + * @param refreshThreadName name of the cache refresh thread + * @param inspectSnapshotFiles Filter to apply to each snapshot to extract the files. + * @throws IOException if the {@link FileSystem} or root directory cannot be loaded + */ + public SnapshotFileCache(Configuration conf, long cacheRefreshPeriod, String refreshThreadName, + SnapshotFileInspector inspectSnapshotFiles) throws IOException { + this(FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf), 0, cacheRefreshPeriod, + refreshThreadName, inspectSnapshotFiles); + } + + /** + * Create a snapshot file cache for all snapshots under the specified [root]/.snapshot on the + * filesystem + * @param fs {@link FileSystem} where the snapshots are stored + * @param rootDir hbase root directory + * @param cacheRefreshPeriod period (ms) with which the cache should be refreshed + * @param cacheRefreshDelay amount of time to wait for the cache to be refreshed + * @param refreshThreadName name of the cache refresh thread + * @param inspectSnapshotFiles Filter to apply to each snapshot to extract the files. + */ + public SnapshotFileCache(FileSystem fs, Path rootDir, long cacheRefreshPeriod, + long cacheRefreshDelay, String refreshThreadName, SnapshotFileInspector inspectSnapshotFiles) { + this.fs = fs; + this.fileInspector = inspectSnapshotFiles; + this.snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + // periodically refresh the file cache to make sure we aren't superfluously saving files. + this.refreshTimer = new Timer(refreshThreadName, true); + this.refreshTimer.scheduleAtFixedRate(new RefreshCacheTask(), cacheRefreshDelay, + cacheRefreshPeriod); + } + + /** + * Trigger a cache refresh, even if its before the next cache refresh. Does not affect pending + * cache refreshes. + *

    + * Blocks until the cache is refreshed. + *

    + * Exposed for TESTING. + */ + public void triggerCacheRefreshForTesting() { + try { + SnapshotFileCache.this.refreshCache(); + } catch (IOException e) { + LOG.warn("Failed to refresh snapshot hfile cache!", e); + } + LOG.debug("Current cache:" + cache); + } + + /** + * Check to see if the passed file name is contained in any of the snapshots. First checks an + * in-memory cache of the files to keep. If its not in the cache, then the cache is refreshed and + * the cache checked again for that file. This ensures that we always return true for a + * files that exists. + *

    + * Note this may lead to periodic false positives for the file being referenced. Periodically, the + * cache is refreshed even if there are no requests to ensure that the false negatives get removed + * eventually. For instance, suppose you have a file in the snapshot and it gets loaded into the + * cache. Then at some point later that snapshot is deleted. If the cache has not been refreshed + * at that point, cache will still think the file system contains that file and return + * true, even if it is no longer present (false positive). However, if the file never was + * on the filesystem, we will never find it and always return false. + * @param fileName file to check + * @return false if the file is not referenced in any current or running snapshot, + * true if the file is in the cache. + * @throws IOException if there is an unexpected error reaching the filesystem. + */ + // XXX this is inefficient to synchronize on the method, when what we really need to guard against + // is an illegal access to the cache. Really we could do a mutex-guarded pointer swap on the + // cache, but that seems overkill at the moment and isn't necessarily a bottleneck. + public synchronized boolean contains(String fileName) throws IOException { + if (this.cache.contains(fileName)) return true; + + refreshCache(); + + // then check again + return this.cache.contains(fileName); + } + + private synchronized void refreshCache() throws IOException { + // get the status of the snapshots directory + FileStatus status; + try { + status = fs.getFileStatus(snapshotDir); + } catch (FileNotFoundException e) { + LOG.error("Snapshot directory: " + snapshotDir + " doesn't exist"); + return; + } + // if the snapshot directory wasn't modified since we last check, we are done + if (status.getModificationTime() <= lastModifiedTime) return; + + // directory was modified, so we need to reload our cache + // there could be a slight race here where we miss the cache, check the directory modification + // time, then someone updates the directory, causing us to not scan the directory again. + // However, snapshot directories are only created once, so this isn't an issue. + + // 1. update the modified time + this.lastModifiedTime = status.getModificationTime(); + + // 2.clear the cache + this.cache.clear(); + Map known = new HashMap(); + + // 3. check each of the snapshot directories + FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir); + if (snapshots == null) { + // remove all the remembered snapshots because we don't have any left + LOG.debug("No snapshots on-disk, cache empty"); + this.snapshots.clear(); + return; + } + + // 3.1 iterate through the on-disk snapshots + for (FileStatus snapshot : snapshots) { + String name = snapshot.getPath().getName(); + // its the tmp dir + if (name.equals(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) { + // only add those files to the cache, but not to the known snapshots + FileStatus[] running = FSUtils.listStatus(fs, snapshot.getPath()); + if (running == null) continue; + for (FileStatus run : running) { + this.cache.addAll(fileInspector.filesUnderSnapshot(run.getPath())); + } + } else { + SnapshotDirectoryInfo files = this.snapshots.remove(name); + // 3.1.1 if we don't know about the snapshot or its been modified, we need to update the files + // the latter could occur where I create a snapshot, then delete it, and then make a new + // snapshot with the same name. We will need to update the cache the information from that new + // snapshot, even though it has the same name as the files referenced have probably changed. + if (files == null || files.hasBeenModified(snapshot.getModificationTime())) { + // get all files for the snapshot and create a new info + Collection storedFiles = fileInspector.filesUnderSnapshot(snapshot.getPath()); + files = new SnapshotDirectoryInfo(snapshot.getModificationTime(), storedFiles); + } + // 3.2 add all the files to cache + this.cache.addAll(files.getFiles()); + known.put(name, files); + } + } + + // 4. set the snapshots we are tracking + this.snapshots.clear(); + this.snapshots.putAll(known); + } + + /** + * Simple helper task that just periodically attempts to refresh the cache + */ + public class RefreshCacheTask extends TimerTask { + @Override + public void run() { + try { + SnapshotFileCache.this.refreshCache(); + } catch (IOException e) { + LOG.warn("Failed to refresh snapshot hfile cache!", e); + } + } + } + + @Override + public void stop(String why) { + if (!this.stop) { + this.stop = true; + this.refreshTimer.cancel(); + } + + } + + @Override + public boolean isStopped() { + return this.stop; + } + + /** + * Information about a snapshot directory + */ + private static class SnapshotDirectoryInfo { + long lastModified; + Collection files; + + public SnapshotDirectoryInfo(long mtime, Collection files) { + this.lastModified = mtime; + this.files = files; + } + + /** + * @return the hfiles in the snapshot when this was made. + */ + public Collection getFiles() { + return this.files; + } + + /** + * Check if the snapshot directory has been modified + * @param mtime current modification time of the directory + * @return true if it the modification time of the directory is newer time when we + * created this + */ + public boolean hasBeenModified(long mtime) { + return this.lastModified < mtime; + } + } +} Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/MasterSnapshotVerifier.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/MasterSnapshotVerifier.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/MasterSnapshotVerifier.java (revision 0) @@ -0,0 +1,249 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.IOException; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; + +/** + * General snapshot verification on the master. + *

    + * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't + * attempt to verify that the files are exact copies (that would be paramount to taking the + * snapshot again!), but instead just attempts to ensure that the files match the expected + * files and are the same length. + *

    + * Taking an online snapshots can race against other operations and this is an last line of + * defense. For example, if meta changes between when snapshots are taken not all regions of a + * table may be present. This can be caused by a region split (daughters present on this scan, + * but snapshot took parent), or move (snapshots only checks lists of region servers, a move could + * have caused a region to be skipped or done twice). + *

    + * Current snapshot files checked: + *

      + *
    1. SnapshotDescription is readable
    2. + *
    3. Table info is readable
    4. + *
    5. Regions
    6. + *
        + *
      • Matching regions in the snapshot as currently in the table
      • + *
      • {@link HRegionInfo} matches the current and stored regions
      • + *
      • All referenced hfiles have valid names
      • + *
      • All the hfiles are present (either in .archive directory in the region)
      • + *
      • All recovered.edits files are present (by name) and have the correct file size
      • + *
      + *
    + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class MasterSnapshotVerifier { + + private SnapshotDescription snapshot; + private FileSystem fs; + private Path rootDir; + private String tableName; + private MasterServices services; + + /** + * @param services services for the master + * @param snapshot snapshot to check + * @param rootDir root directory of the hbase installation. + */ + public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot, Path rootDir) { + this.fs = services.getMasterFileSystem().getFileSystem(); + this.services = services; + this.snapshot = snapshot; + this.rootDir = rootDir; + this.tableName = snapshot.getTable(); + } + + /** + * Verify that the snapshot in the directory is a valid snapshot + * @param snapshotDir snapshot directory to check + * @param snapshotServers {@link ServerName} of the servers that are involved in the snapshot + * @throws CorruptedSnapshotException if the snapshot is invalid + * @throws IOException if there is an unexpected connection issue to the filesystem + */ + public void verifySnapshot(Path snapshotDir, Set snapshotServers) + throws CorruptedSnapshotException, IOException { + // verify snapshot info matches + verifySnapshotDescription(snapshotDir); + + // check that tableinfo is a valid table description + verifyTableInfo(snapshotDir); + + // check that each region is valid + verifyRegions(snapshotDir); + } + + /** + * Check that the snapshot description written in the filesystem matches the current snapshot + * @param snapshotDir snapshot directory to check + */ + private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException { + SnapshotDescription found = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); + if (!this.snapshot.equals(found)) { + throw new CorruptedSnapshotException("Snapshot read (" + found + + ") doesn't equal snapshot we ran (" + snapshot + ").", snapshot); + } + } + + /** + * Check that the table descriptor for the snapshot is a valid table descriptor + * @param snapshotDir snapshot directory to check + */ + private void verifyTableInfo(Path snapshotDir) throws IOException { + FSTableDescriptors.getTableDescriptor(fs, snapshotDir); + } + + /** + * Check that all the regions in the snapshot are valid, and accounted for. + * @param snapshotDir snapshot directory to check + * @throws IOException if we can't reach .META. or read the files from the FS + */ + private void verifyRegions(Path snapshotDir) throws IOException { + List regions = MetaReader.getTableRegions(this.services.getCatalogTracker(), + Bytes.toBytes(tableName)); + for (HRegionInfo region : regions) { + // if offline split parent, skip it + if (region.isOffline() && (region.isSplit() || region.isSplitParent())) { + continue; + } + + verifyRegion(fs, snapshotDir, region); + } + } + + /** + * Verify that the region (regioninfo, hfiles) are valid + * @param fs the FileSystem instance + * @param snapshotDir snapshot directory to check + * @param region the region to check + */ + private void verifyRegion(FileSystem fs, Path snapshotDir, HRegionInfo region) throws IOException { + // make sure we have region in the snapshot + Path regionDir = new Path(snapshotDir, region.getEncodedName()); + if (!fs.exists(regionDir)) { + // could happen due to a move or split race. + throw new CorruptedSnapshotException("No region directory found for region:" + region, + snapshot); + } + // make sure we have the region info in the snapshot + Path regionInfo = new Path(regionDir, HRegion.REGIONINFO_FILE); + // make sure the file exists + if (!fs.exists(regionInfo)) { + throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot); + } + FSDataInputStream in = fs.open(regionInfo); + HRegionInfo found = new HRegionInfo(); + try { + found.readFields(in); + if (!region.equals(found)) { + throw new CorruptedSnapshotException("Found region info (" + found + + ") doesn't match expected region:" + region, snapshot); + } + } finally { + in.close(); + } + + // make sure we have the expected recovered edits files + TakeSnapshotUtils.verifyRecoveredEdits(fs, snapshotDir, found, snapshot); + + // check for the existance of each hfile + PathFilter familiesDirs = new FSUtils.FamilyDirFilter(fs); + FileStatus[] columnFamilies = FSUtils.listStatus(fs, regionDir, familiesDirs); + // should we do some checking here to make sure the cfs are correct? + if (columnFamilies == null) return; + + // setup the suffixes for the snapshot directories + Path tableNameSuffix = new Path(tableName); + Path regionNameSuffix = new Path(tableNameSuffix, region.getEncodedName()); + + // get the potential real paths + Path archivedRegion = new Path(HFileArchiveUtil.getArchivePath(services.getConfiguration()), + regionNameSuffix); + Path realRegion = new Path(rootDir, regionNameSuffix); + + // loop through each cf and check we can find each of the hfiles + for (FileStatus cf : columnFamilies) { + FileStatus[] hfiles = FSUtils.listStatus(fs, cf.getPath(), null); + // should we check if there should be hfiles? + if (hfiles == null || hfiles.length == 0) continue; + + Path realCfDir = new Path(realRegion, cf.getPath().getName()); + Path archivedCfDir = new Path(archivedRegion, cf.getPath().getName()); + for (FileStatus hfile : hfiles) { + // make sure the name is correct + if (!StoreFile.validateStoreFileName(hfile.getPath().getName())) { + throw new CorruptedSnapshotException("HFile: " + hfile.getPath() + + " is not a valid hfile name.", snapshot); + } + + // check to see if hfile is present in the real table + String fileName = hfile.getPath().getName(); + Path file = new Path(realCfDir, fileName); + Path archived = new Path(archivedCfDir, fileName); + if (!fs.exists(file) && !file.equals(archived)) { + throw new CorruptedSnapshotException("Can't find hfile: " + hfile.getPath() + + " in the real (" + realCfDir + ") or archive (" + archivedCfDir + + ") directory for the primary table.", snapshot); + } + } + } + } + + /** + * Check that the logs stored in the log directory for the snapshot are valid - it contains all + * the expected logs for all servers involved in the snapshot. + * @param snapshotDir snapshot directory to check + * @param snapshotServers list of the names of servers involved in the snapshot. + * @throws CorruptedSnapshotException if the hlogs in the snapshot are not correct + * @throws IOException if we can't reach the filesystem + */ + private void verifyLogs(Path snapshotDir, Set snapshotServers) + throws CorruptedSnapshotException, IOException { + Path snapshotLogDir = new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME); + Path logsDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME); + TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logsDir, snapshotServers, snapshot, + snapshotLogDir); + } +} Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotManager.java (revision 0) @@ -0,0 +1,916 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ThreadPoolExecutor; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.master.MasterCoprocessorHost; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.SnapshotSentinel; +import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; +import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner; +import org.apache.hadoop.hbase.procedure.Procedure; +import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; +import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs; +import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinatorRpcs; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type; +import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; +import org.apache.hadoop.hbase.snapshot.SnapshotCreationException; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException; +import org.apache.hadoop.hbase.snapshot.SnapshotExistsException; +import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException; +import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.zookeeper.KeeperException; + +/** + * This class manages the procedure of taking and restoring snapshots. There is only one + * SnapshotManager for the master. + *

    + * The class provides methods for monitoring in-progress snapshot actions. + *

    + * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a + * simplification in the current implementation. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class SnapshotManager implements Stoppable { + private static final Log LOG = LogFactory.getLog(SnapshotManager.class); + + /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */ + private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500; + + /** Enable or disable snapshot support */ + public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled"; + + /** + * Conf key for # of ms elapsed between checks for snapshot errors while waiting for + * completion. + */ + private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis"; + + /** By default, check to see if the snapshot is complete (ms) */ + private static final int SNAPSHOT_TIMEOUT_MILLIS_DEFAULT = 5000; + + /** + * Conf key for # of ms elapsed before injecting a snapshot timeout error when waiting for + * completion. + */ + private static final String SNAPSHOT_TIMEOUT_MILLIS_KEY = "hbase.snapshot.master.timeoutMillis"; + + /** Name of the operation to use in the controller */ + public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot"; + + // TODO - enable having multiple snapshots with multiple monitors/threads + // this needs to be configuration based when running multiple snapshots is implemented + /** number of current operations running on the master */ + private static final int opThreads = 1; + + private boolean stopped; + private final long wakeFrequency; + private final MasterServices master; // Needed by TableEventHandlers + private final ProcedureCoordinator coordinator; + + // Is snapshot feature enabled? + private boolean isSnapshotSupported = false; + + // A reference to a handler. If the handler is non-null, then it is assumed that a snapshot is + // in progress currently + // TODO: this is a bad smell; likely replace with a collection in the future. Also this gets + // reset by every operation. + private TakeSnapshotHandler handler; + + private final Path rootDir; + private final ExecutorService executorService; + + // Restore Sentinels map, with table name as key + private Map restoreHandlers = new HashMap(); + + /** + * Construct a snapshot manager. + * @param master + */ + public SnapshotManager(final MasterServices master) throws KeeperException, IOException, + UnsupportedOperationException { + this.master = master; + checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem()); + + // get the configuration for the coordinator + Configuration conf = master.getConfiguration(); + this.wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT); + long keepAliveTime = conf.getLong(SNAPSHOT_TIMEOUT_MILLIS_KEY, SNAPSHOT_TIMEOUT_MILLIS_DEFAULT); + + // setup the default procedure coordinator + String name = master.getServerName().toString(); + ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, keepAliveTime, opThreads, wakeFrequency); + ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinatorRpcs( + master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name); + this.coordinator = new ProcedureCoordinator(comms, tpool); + this.rootDir = master.getMasterFileSystem().getRootDir(); + this.executorService = master.getExecutorService(); + resetTempDir(); + } + + /** + * Fully specify all necessary components of a snapshot manager. Exposed for testing. + * @param master services for the master where the manager is running + * @param coordinator procedure coordinator instance. exposed for testing. + * @param pool HBase ExecutorServcie instance, exposed for testing. + */ + public SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator, ExecutorService pool) + throws IOException, UnsupportedOperationException { + this.master = master; + checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem()); + + this.wakeFrequency = master.getConfiguration().getInt(SNAPSHOT_WAKE_MILLIS_KEY, + SNAPSHOT_WAKE_MILLIS_DEFAULT); + this.coordinator = coordinator; + this.rootDir = master.getMasterFileSystem().getRootDir(); + this.executorService = pool; + resetTempDir(); + } + + /** + * Gets the list of all completed snapshots. + * @return list of SnapshotDescriptions + * @throws IOException File system exception + */ + public List getCompletedSnapshots() throws IOException { + List snapshotDescs = new ArrayList(); + // first create the snapshot root path and check to see if it exists + Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir); + FileSystem fs = master.getMasterFileSystem().getFileSystem(); + + // if there are no snapshots, return an empty list + if (!fs.exists(snapshotDir)) { + return snapshotDescs; + } + + // ignore all the snapshots in progress + FileStatus[] snapshots = fs.listStatus(snapshotDir, + new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); + // loop through all the completed snapshots + for (FileStatus snapshot : snapshots) { + Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE); + // if the snapshot is bad + if (!fs.exists(info)) { + LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist"); + continue; + } + FSDataInputStream in = null; + try { + in = fs.open(info); + SnapshotDescription desc = SnapshotDescription.parseFrom(in); + snapshotDescs.add(desc); + } catch (IOException e) { + LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e); + } finally { + if (in != null) { + in.close(); + } + } + } + return snapshotDescs; + } + + /** + * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed + * snapshot attempts. + * + * @throws IOException if we can't reach the filesystem + */ + void resetTempDir() throws IOException { + // cleanup any existing snapshots. + Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir); + if (!master.getMasterFileSystem().getFileSystem().delete(tmpdir, true)) { + LOG.warn("Couldn't delete working snapshot directory: " + tmpdir); + } + } + + /** + * Delete the specified snapshot + * @param snapshot + * @throws SnapshotDoesNotExistException If the specified snapshot does not exist. + * @throws IOException For filesystem IOExceptions + */ + public void deleteSnapshot(SnapshotDescription snapshot) throws SnapshotDoesNotExistException, IOException { + + // call coproc pre hook + MasterCoprocessorHost cpHost = master.getCoprocessorHost(); + if (cpHost != null) { + cpHost.preDeleteSnapshot(snapshot); + } + + // check to see if it is completed + if (!isSnapshotCompleted(snapshot)) { + throw new SnapshotDoesNotExistException(snapshot); + } + + String snapshotName = snapshot.getName(); + LOG.debug("Deleting snapshot: " + snapshotName); + // first create the snapshot description and check to see if it exists + MasterFileSystem fs = master.getMasterFileSystem(); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); + + // delete the existing snapshot + if (!fs.getFileSystem().delete(snapshotDir, true)) { + throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir); + } + + // call coproc post hook + if (cpHost != null) { + cpHost.postDeleteSnapshot(snapshot); + } + + } + + /** + * Return the handler if it is currently running and has the same snapshot target name. + * @param snapshot + * @return null if doesn't match, else a live handler. + */ + private synchronized TakeSnapshotHandler getTakeSnapshotHandler(SnapshotDescription snapshot) { + TakeSnapshotHandler h = this.handler; + if (h == null) { + return null; + } + + if (!h.getSnapshot().getName().equals(snapshot.getName())) { + // specified snapshot is to the one currently running + return null; + } + + return h; + } + + /** + * Check if the specified snapshot is done + * @param expected + * @return true if snapshot is ready to be restored, false if it is still being taken. + * @throws IOException IOException if error from HDFS or RPC + * @throws UnknownSnapshotException if snapshot is invalid or does not exist. + */ + public boolean isSnapshotDone(SnapshotDescription expected) throws IOException { + // check the request to make sure it has a snapshot + if (expected == null) { + throw new UnknownSnapshotException( + "No snapshot name passed in request, can't figure out which snapshot you want to check."); + } + + String ssString = SnapshotDescriptionUtils.toString(expected); + + // check to see if the sentinel exists + TakeSnapshotHandler handler = getTakeSnapshotHandler(expected); + if (handler == null) { + // doesn't exist, check if it is already completely done. + if (!isSnapshotCompleted(expected)) { + throw new UnknownSnapshotException("Snapshot " + ssString + + " is not currently running or one of the known completed snapshots."); + } + // was done, return true; + return true; + } + + // pass on any failure we find in the sentinel + try { + handler.rethrowException(); + } catch (ForeignException e) { + // Give some procedure info on an exception. + String status; + Procedure p = coordinator.getProcedure(expected.getName()); + if (p != null) { + status = p.getStatus(); + } else { + status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames(); + } + throw new HBaseSnapshotException("Snapshot " + ssString + " had an error. " + status, e, + expected); + } + + // check to see if we are done + if (handler.isFinished()) { + LOG.debug("Snapshot '" + ssString + "' has completed, notifying client."); + return true; + } else if (LOG.isDebugEnabled()) { + LOG.debug("Snapshoting '" + ssString + "' is still in progress!"); + } + return false; + } + + /** + * Check to see if there are any snapshots in progress currently. Currently we have a + * limitation only allowing a single snapshot attempt at a time. + * @return true if there any snapshots in progress, false otherwise + * @throws SnapshotCreationException if the snapshot failed + */ + synchronized boolean isTakingSnapshot() throws SnapshotCreationException { + // TODO later when we handle multiple there would be a map with ssname to handler. + return handler != null && !handler.isFinished(); + } + + /** + * Check to see if the specified table has a snapshot in progress. Currently we have a + * limitation only allowing a single snapshot attempt at a time. + * @param tableName name of the table being snapshotted. + * @return true if there is a snapshot in progress on the specified table. + */ + private boolean isTakingSnapshot(final String tableName) { + if (handler != null && handler.getSnapshot().getTable().equals(tableName)) { + return !handler.isFinished(); + } + return false; + } + + /** + * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we + * aren't already running a snapshot. + * @param snapshot description of the snapshot we want to start + * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot + */ + private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot) + throws HBaseSnapshotException { + FileSystem fs = master.getMasterFileSystem().getFileSystem(); + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); + + // make sure we aren't already running a snapshot + if (isTakingSnapshot()) { + throw new SnapshotCreationException("Rejected taking " + + SnapshotDescriptionUtils.toString(snapshot) + + " because we are already running another snapshot " + + SnapshotDescriptionUtils.toString(this.handler.getSnapshot()), snapshot); + } + + // make sure we aren't running a restore on the same table + if (isRestoringTable(snapshot.getTable())) { + throw new SnapshotCreationException("Rejected taking " + + SnapshotDescriptionUtils.toString(snapshot) + + " because we are already have a restore in progress on the same snapshot " + + SnapshotDescriptionUtils.toString(this.handler.getSnapshot()), snapshot); + } + + try { + // delete the working directory, since we aren't running the snapshot. Likely leftovers + // from a failed attempt. + fs.delete(workingDir, true); + + // recreate the working directory for the snapshot + if (!fs.mkdirs(workingDir)) { + throw new SnapshotCreationException("Couldn't create working directory (" + workingDir + + ") for snapshot" , snapshot); + } + } catch (HBaseSnapshotException e) { + throw e; + } catch (IOException e) { + throw new SnapshotCreationException( + "Exception while checking to see if snapshot could be started.", e, snapshot); + } + } + + /** + * Take a snapshot of an enabled table. + *

    + * The thread limitation on the executorService's thread pool for snapshots ensures the + * snapshot won't be started if there is another snapshot already running. Does + * not check to see if another snapshot of the same name already exists. + * @param snapshot description of the snapshot to take. + * @throws HBaseSnapshotException if the snapshot could not be started + */ + private synchronized void snapshotEnabledTable(SnapshotDescription snapshot) + throws HBaseSnapshotException { + TakeSnapshotHandler handler; + try { + handler = new EnabledTableSnapshotHandler(snapshot, master, this); + this.executorService.submit(handler); + this.handler = handler; + } catch (IOException e) { + // cleanup the working directory by trying to delete it from the fs. + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); + try { + if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) { + LOG.warn("Couldn't delete working directory (" + workingDir + " for snapshot:" + + SnapshotDescriptionUtils.toString(snapshot)); + } + } catch (IOException e1) { + LOG.warn("Couldn't delete working directory (" + workingDir + " for snapshot:" + + SnapshotDescriptionUtils.toString(snapshot)); + } + // fail the snapshot + throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot); + } + } + + /** + * Take a snapshot based on the enabled/disabled state of the table. + * + * @param snapshot + * @throws HBaseSnapshotException when a snapshot specific exception occurs. + * @throws IOException when some sort of generic IO exception occurs. + */ + public void takeSnapshot(SnapshotDescription snapshot) throws IOException { + // check to see if we already completed the snapshot + if (isSnapshotCompleted(snapshot)) { + throw new SnapshotExistsException("Snapshot '" + snapshot.getName() + + "' already stored on the filesystem.", snapshot); + } + + LOG.debug("No existing snapshot, attempting snapshot..."); + + // check to see if the table exists + HTableDescriptor desc = null; + try { + desc = master.getTableDescriptors().get(snapshot.getTable()); + } catch (FileNotFoundException e) { + String msg = "Table:" + snapshot.getTable() + " info doesn't exist!"; + LOG.error(msg); + throw new SnapshotCreationException(msg, e, snapshot); + } catch (IOException e) { + throw new SnapshotCreationException("Error while geting table description for table " + + snapshot.getTable(), e, snapshot); + } + if (desc == null) { + throw new SnapshotCreationException("Table '" + snapshot.getTable() + + "' doesn't exist, can't take snapshot.", snapshot); + } + + // set the snapshot version, now that we are ready to take it + snapshot = snapshot.toBuilder().setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION) + .build(); + + // call pre coproc hook + MasterCoprocessorHost cpHost = master.getCoprocessorHost(); + if (cpHost != null) { + cpHost.preSnapshot(snapshot, desc); + } + + // setup the snapshot + prepareToTakeSnapshot(snapshot); + + // if the table is enabled, then have the RS run actually the snapshot work + AssignmentManager assignmentMgr = master.getAssignmentManager(); + if (assignmentMgr.getZKTable().isEnabledTable(snapshot.getTable())) { + LOG.debug("Table enabled, starting distributed snapshot."); + snapshotEnabledTable(snapshot); + LOG.debug("Started snapshot: " + SnapshotDescriptionUtils.toString(snapshot)); + } + // For disabled table, snapshot is created by the master + else if (assignmentMgr.getZKTable().isDisabledTable(snapshot.getTable())) { + LOG.debug("Table is disabled, running snapshot entirely on master."); + snapshotDisabledTable(snapshot); + LOG.debug("Started snapshot: " + SnapshotDescriptionUtils.toString(snapshot)); + } else { + LOG.error("Can't snapshot table '" + snapshot.getTable() + + "', isn't open or closed, we don't know what to do!"); + TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable() + + " isn't fully open."); + throw new SnapshotCreationException("Table is not entirely open or closed", tpoe, snapshot); + } + + // call post coproc hook + if (cpHost != null) { + cpHost.postSnapshot(snapshot, desc); + } + } + + /** + * Take a snapshot of a disabled table. + *

    + * The thread limitation on the executorService's thread pool for snapshots ensures the + * snapshot won't be started if there is another snapshot already running. Does + * not check to see if another snapshot of the same name already exists. + * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}. + * @throws HBaseSnapshotException if the snapshot could not be started + */ + private synchronized void snapshotDisabledTable(SnapshotDescription snapshot) + throws HBaseSnapshotException { + + // set the snapshot to be a disabled snapshot, since the client doesn't know about that + snapshot = snapshot.toBuilder().setType(Type.DISABLED).build(); + + DisabledTableSnapshotHandler handler; + try { + handler = new DisabledTableSnapshotHandler(snapshot, this.master); + this.executorService.submit(handler); + this.handler = handler; + } catch (IOException e) { + // cleanup the working directory by trying to delete it from the fs. + Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir); + try { + if (!this.master.getMasterFileSystem().getFileSystem().delete(workingDir, true)) { + LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" + + SnapshotDescriptionUtils.toString(snapshot)); + } + } catch (IOException e1) { + LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" + + SnapshotDescriptionUtils.toString(snapshot)); + } + // fail the snapshot + throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot); + } + } + + /** + * Set the handler for the current snapshot + *

    + * Exposed for TESTING + * @param handler handler the master should use + * + * TODO get rid of this if possible, repackaging, modify tests. + */ + public synchronized void setSnapshotHandlerForTesting(TakeSnapshotHandler handler) { + this.handler = handler; + } + + /** + * @return distributed commit coordinator for all running snapshots + */ + ProcedureCoordinator getCoordinator() { + return coordinator; + } + + /** + * Check to see if the snapshot is one of the currently completed snapshots + * @param expected snapshot to check + * @return true if the snapshot is stored on the {@link FileSystem}, false if is + * not stored + * @throws IOException if the filesystem throws an unexpected exception, + * @throws IllegalArgumentException if snapshot name is invalid. + */ + private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException { + try { + final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + FileSystem fs = master.getMasterFileSystem().getFileSystem(); + + // check to see if the snapshot already exists + return fs.exists(snapshotDir); + } catch (IllegalArgumentException iae) { + throw new UnknownSnapshotException("Unexpected exception thrown", iae); + } + } + + /** + * Clone the specified snapshot into a new table. + * The operation will fail if the destination table has a snapshot or restore in progress. + * + * @param snapshot Snapshot Descriptor + * @param hTableDescriptor Table Descriptor of the table to create + * @param waitTime timeout before considering the clone failed + */ + synchronized void cloneSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws HBaseSnapshotException { + String tableName = hTableDescriptor.getNameAsString(); + + // make sure we aren't running a snapshot on the same table + if (isTakingSnapshot(tableName)) { + throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName); + } + + // make sure we aren't running a restore on the same table + if (isRestoringTable(tableName)) { + throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName); + } + + try { + CloneSnapshotHandler handler = + new CloneSnapshotHandler(master, snapshot, hTableDescriptor); + this.executorService.submit(handler); + restoreHandlers.put(tableName, handler); + } catch (Exception e) { + String msg = "Couldn't clone the snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + + " on table=" + tableName; + LOG.error(msg, e); + throw new RestoreSnapshotException(msg, e); + } + } + + /** + * Restore the specified snapshot + * @param reqSnapshot + * @throws IOException + */ + public void restoreSnapshot(SnapshotDescription reqSnapshot) throws IOException { + FileSystem fs = master.getMasterFileSystem().getFileSystem(); + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir); + MasterCoprocessorHost cpHost = master.getCoprocessorHost(); + + // check if the snapshot exists + if (!fs.exists(snapshotDir)) { + LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist."); + throw new SnapshotDoesNotExistException(reqSnapshot); + } + + // read snapshot information + SnapshotDescription fsSnapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); + HTableDescriptor snapshotTableDesc = FSTableDescriptors.getTableDescriptor(fs, snapshotDir); + String tableName = reqSnapshot.getTable(); + + // stop tracking completed restores + cleanupRestoreSentinels(); + + // Execute the restore/clone operation + if (MetaReader.tableExists(master.getCatalogTracker(), tableName)) { + if (master.getAssignmentManager().getZKTable().isEnabledTable(fsSnapshot.getTable())) { + throw new UnsupportedOperationException("Table '" + + fsSnapshot.getTable() + "' must be disabled in order to perform a restore operation."); + } + + // call coproc pre hook + if (cpHost != null) { + cpHost.preRestoreSnapshot(reqSnapshot, snapshotTableDesc); + } + restoreSnapshot(fsSnapshot, snapshotTableDesc); + LOG.info("Restore snapshot=" + fsSnapshot.getName() + " as table=" + tableName); + + if (cpHost != null) { + cpHost.postRestoreSnapshot(reqSnapshot, snapshotTableDesc); + } + } else { + HTableDescriptor htd = RestoreSnapshotHelper.cloneTableSchema(snapshotTableDesc, + Bytes.toBytes(tableName)); + if (cpHost != null) { + cpHost.preCloneSnapshot(reqSnapshot, htd); + } + cloneSnapshot(fsSnapshot, htd); + LOG.info("Clone snapshot=" + fsSnapshot.getName() + " as table=" + tableName); + + if (cpHost != null) { + cpHost.postCloneSnapshot(reqSnapshot, htd); + } + } + } + + /** + * Restore the specified snapshot. + * The restore will fail if the destination table has a snapshot or restore in progress. + * + * @param snapshot Snapshot Descriptor + * @param hTableDescriptor Table Descriptor + * @param waitTime timeout before considering the restore failed + */ + private synchronized void restoreSnapshot(final SnapshotDescription snapshot, + final HTableDescriptor hTableDescriptor) throws HBaseSnapshotException { + String tableName = hTableDescriptor.getNameAsString(); + + // make sure we aren't running a snapshot on the same table + if (isTakingSnapshot(tableName)) { + throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName); + } + + // make sure we aren't running a restore on the same table + if (isRestoringTable(tableName)) { + throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName); + } + + try { + RestoreSnapshotHandler handler = + new RestoreSnapshotHandler(master, snapshot, hTableDescriptor); + this.executorService.submit(handler); + restoreHandlers.put(hTableDescriptor.getNameAsString(), handler); + } catch (Exception e) { + String msg = "Couldn't restore the snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + + " on table=" + tableName; + LOG.error(msg, e); + throw new RestoreSnapshotException(msg, e); + } + } + + /** + * Verify if the restore of the specified table is in progress. + * + * @param tableName table under restore + * @return true if there is a restore in progress of the specified table. + */ + private boolean isRestoringTable(final String tableName) { + SnapshotSentinel sentinel = restoreHandlers.get(tableName); + return(sentinel != null && !sentinel.isFinished()); + } + + /** + * Returns status of a restore request, specifically comparing source snapshot and target table + * names. Throws exception if not a known snapshot. + * @param snapshot + * @return true if in progress, false if snapshot is completed. + * @throws UnknownSnapshotException if specified source snapshot does not exit. + * @throws IOException if there was some sort of IO failure + */ + public boolean isRestoringTable(final SnapshotDescription snapshot) throws IOException { + // check to see if the snapshot is already on the fs + if (!isSnapshotCompleted(snapshot)) { + throw new UnknownSnapshotException("Snapshot:" + snapshot.getName() + + " is not one of the known completed snapshots."); + } + + SnapshotSentinel sentinel = getRestoreSnapshotSentinel(snapshot.getTable()); + if (sentinel == null) { + // there is no sentinel so restore is not in progress. + return false; + } + if (!sentinel.getSnapshot().getName().equals(snapshot.getName())) { + // another handler is trying to restore to the table, but it isn't the same snapshot source. + return false; + } + + LOG.debug("Verify snapshot=" + snapshot.getName() + " against=" + + sentinel.getSnapshot().getName() + " table=" + snapshot.getTable()); + ForeignException e = sentinel.getExceptionIfFailed(); + if (e != null) throw e; + + // check to see if we are done + if (sentinel.isFinished()) { + LOG.debug("Restore snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + + " has completed. Notifying the client."); + return false; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Sentinel is not yet finished with restoring snapshot=" + + SnapshotDescriptionUtils.toString(snapshot)); + } + return true; + } + + /** + * Get the restore snapshot sentinel for the specified table + * @param tableName table under restore + * @return the restore snapshot handler + */ + private synchronized SnapshotSentinel getRestoreSnapshotSentinel(final String tableName) { + try { + return restoreHandlers.get(tableName); + } finally { + cleanupRestoreSentinels(); + } + } + + /** + * Scan the restore handlers and remove the finished ones. + */ + private synchronized void cleanupRestoreSentinels() { + Iterator> it = restoreHandlers.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = it.next(); + SnapshotSentinel sentinel = entry.getValue(); + if (sentinel.isFinished()) { + it.remove(); + } + } + } + + // + // Implementing Stoppable interface + // + + @Override + public void stop(String why) { + // short circuit + if (this.stopped) return; + // make sure we get stop + this.stopped = true; + // pass the stop onto take snapshot handlers + if (this.handler != null) this.handler.cancel(why); + + // pass the stop onto all the restore handlers + for (SnapshotSentinel restoreHandler: this.restoreHandlers.values()) { + restoreHandler.cancel(why); + } + } + + @Override + public boolean isStopped() { + return this.stopped; + } + + /** + * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported. + * Called at the beginning of snapshot() and restoreSnapshot() methods. + * @throws UnsupportedOperationException if snapshot are not supported + */ + public void checkSnapshotSupport() throws UnsupportedOperationException { + if (!this.isSnapshotSupported) { + throw new UnsupportedOperationException( + "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" + + HBASE_SNAPSHOT_ENABLED + "' property with value 'true'."); + } + } + + /** + * Called at startup, to verify if snapshot operation is supported, and to avoid + * starting the master if there're snapshots present but the cleaners needed are missing. + * Otherwise we can end up with snapshot data loss. + * @param conf The {@link Configuration} object to use + * @param mfs The MasterFileSystem to use + * @throws IOException in case of file-system operation failure + * @throws UnsupportedOperationException in case cleaners are missing and + * there're snapshot in the system + */ + private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs) + throws IOException, UnsupportedOperationException { + // Verify if snapshot is disabled by the user + String enabled = conf.get(HBASE_SNAPSHOT_ENABLED); + boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false); + boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled); + + // Extract cleaners from conf + Set hfileCleaners = new HashSet(); + String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS); + if (cleaners != null) Collections.addAll(hfileCleaners, cleaners); + + Set logCleaners = new HashSet(); + cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS); + if (cleaners != null) Collections.addAll(logCleaners, cleaners); + + // If the user has enabled the snapshot, we force the cleaners to be present + // otherwise we still need to check if cleaners are enabled or not and verify + // that there're no snapshot in the .snapshot folder. + if (snapshotEnabled) { + // Inject snapshot cleaners, if snapshot.enable is true + hfileCleaners.add(SnapshotHFileCleaner.class.getName()); + hfileCleaners.add(HFileLinkCleaner.class.getName()); + logCleaners.add(SnapshotLogCleaner.class.getName()); + + // Set cleaners conf + conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, + hfileCleaners.toArray(new String[hfileCleaners.size()])); + conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, + logCleaners.toArray(new String[logCleaners.size()])); + } else { + // Verify if cleaners are present + snapshotEnabled = logCleaners.contains(SnapshotLogCleaner.class.getName()) && + hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) && + hfileCleaners.contains(HFileLinkCleaner.class.getName()); + + // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set. + if (snapshotEnabled) { + LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " + + "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " + + (userDisabled ? "is set to 'false'." : "is not set.")); + } + } + + // Mark snapshot feature as enabled if cleaners are present and user has not disabled it. + this.isSnapshotSupported = snapshotEnabled && !userDisabled; + + // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder + // otherwise we end up with snapshot data loss. + if (!snapshotEnabled) { + LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners."); + Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir()); + FileSystem fs = mfs.getFileSystem(); + if (fs.exists(snapshotDir)) { + FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir, + new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); + if (snapshots != null) { + LOG.error("Snapshots are present, but cleaners are not enabled."); + checkSnapshotSupport(); + } + } + } + } +} Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotLogCleaner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotLogCleaner.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotLogCleaner.java (revision 0) @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.master.cleaner.BaseLogCleanerDelegate; +import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * Implementation of a log cleaner that checks if a log is still used by + * snapshots of HBase tables. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class SnapshotLogCleaner extends BaseLogCleanerDelegate { + private static final Log LOG = LogFactory.getLog(SnapshotLogCleaner.class); + + /** + * Conf key for the frequency to attempt to refresh the cache of hfiles currently used in + * snapshots (ms) + */ + static final String HLOG_CACHE_REFRESH_PERIOD_CONF_KEY = + "hbase.master.hlogcleaner.plugins.snapshot.period"; + + /** Refresh cache, by default, every 5 minutes */ + private static final long DEFAULT_HLOG_CACHE_REFRESH_PERIOD = 300000; + + private SnapshotFileCache cache; + + @Override + public synchronized boolean isLogDeletable(Path filePath) { + try { + if (null == cache) return false; + return !cache.contains(filePath.getName()); + } catch (IOException e) { + LOG.error("Exception while checking if:" + filePath + " was valid, keeping it just in case.", + e); + return false; + } + } + + /** + * This method should only be called once, as it starts a thread to keep the cache + * up-to-date. + *

    + * {@inheritDoc} + */ + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + try { + long cacheRefreshPeriod = conf.getLong( + HLOG_CACHE_REFRESH_PERIOD_CONF_KEY, DEFAULT_HLOG_CACHE_REFRESH_PERIOD); + final FileSystem fs = FSUtils.getCurrentFileSystem(conf); + Path rootDir = FSUtils.getRootDir(conf); + cache = new SnapshotFileCache(fs, rootDir, cacheRefreshPeriod, cacheRefreshPeriod, + "snapshot-log-cleaner-cache-refresher", new SnapshotFileCache.SnapshotFileInspector() { + public Collection filesUnderSnapshot(final Path snapshotDir) + throws IOException { + return SnapshotReferenceUtil.getHLogNames(fs, snapshotDir); + } + }); + } catch (IOException e) { + LOG.error("Failed to create snapshot log cleaner", e); + } + } + + @Override + public void stop(String why) { + this.cache.stop(why); + } + + @Override + public boolean isStopped() { + return this.cache.isStopped(); + } +} Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/DisabledTableSnapshotHandler.java (revision 0) @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.TimeoutExceptionInjector; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.snapshot.CopyRecoveredEditsTask; +import org.apache.hadoop.hbase.snapshot.ReferenceRegionHFilesTask; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.snapshot.TableInfoCopyTask; +import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.zookeeper.KeeperException; + +/** + * Take a snapshot of a disabled table. + *

    + * Table must exist when taking the snapshot, or results are undefined. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class DisabledTableSnapshotHandler extends TakeSnapshotHandler { + private static final Log LOG = LogFactory.getLog(DisabledTableSnapshotHandler.class); + private final TimeoutExceptionInjector timeoutInjector; + + /** + * @param snapshot descriptor of the snapshot to take + * @param masterServices master services provider + * @throws IOException on unexpected error + */ + public DisabledTableSnapshotHandler(SnapshotDescription snapshot, + final MasterServices masterServices) throws IOException { + super(snapshot, masterServices); + + // setup the timer + timeoutInjector = TakeSnapshotUtils.getMasterTimerAndBindToMonitor(snapshot, conf, monitor); + } + + // TODO consider parallelizing these operations since they are independent. Right now its just + // easier to keep them serial though + @Override + public void snapshotRegions(List> regionsAndLocations) throws IOException, + KeeperException { + try { + timeoutInjector.start(); + + // 1. get all the regions hosting this table. + + // extract each pair to separate lists + Set serverNames = new HashSet(); + Set regions = new HashSet(); + for (Pair p : regionsAndLocations) { + regions.add(p.getFirst()); + serverNames.add(p.getSecond().toString()); + } + + // 2. for each region, write all the info to disk + LOG.info("Starting to write region info and WALs for regions for offline snapshot:" + + SnapshotDescriptionUtils.toString(snapshot)); + for (HRegionInfo regionInfo : regions) { + // 2.1 copy the regionInfo files to the snapshot + Path snapshotRegionDir = TakeSnapshotUtils.getRegionSnapshotDirectory(snapshot, rootDir, + regionInfo.getEncodedName()); + HRegion.writeRegioninfoOnFilesystem(regionInfo, snapshotRegionDir, fs, conf); + // check for error for each region + monitor.rethrowException(); + + // 2.2 for each region, copy over its recovered.edits directory + Path regionDir = HRegion.getRegionDir(rootDir, regionInfo); + new CopyRecoveredEditsTask(snapshot, monitor, fs, regionDir, snapshotRegionDir).call(); + monitor.rethrowException(); + + // 2.3 reference all the files in the region + new ReferenceRegionHFilesTask(snapshot, monitor, regionDir, fs, snapshotRegionDir).call(); + monitor.rethrowException(); + } + + // 3. write the table info to disk + LOG.info("Starting to copy tableinfo for offline snapshot: " + + SnapshotDescriptionUtils.toString(snapshot)); + TableInfoCopyTask tableInfoCopyTask = new TableInfoCopyTask(this.monitor, snapshot, fs, + FSUtils.getRootDir(conf)); + tableInfoCopyTask.call(); + monitor.rethrowException(); + } catch (Exception e) { + // make sure we capture the exception to propagate back to the client later + String reason = "Failed snapshot " + SnapshotDescriptionUtils.toString(snapshot) + + " due to exception:" + e.getMessage(); + ForeignException ee = new ForeignException(reason, e); + monitor.receive(ee); + } finally { + LOG.debug("Marking snapshot" + SnapshotDescriptionUtils.toString(snapshot) + + " as finished."); + + // 6. mark the timer as finished - even if we got an exception, we don't need to time the + // operation any further + timeoutInjector.complete(); + } + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (revision 0) @@ -0,0 +1,150 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.snapshot; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CancellationException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.TableExistsException; +import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.errorhandling.ForeignException; +import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.SnapshotSentinel; +import org.apache.hadoop.hbase.master.handler.CreateTableHandler; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException; +import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; +import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.base.Preconditions; + +/** + * Handler to Clone a snapshot. + * + *

    Uses {@link RestoreSnapshotHelper} to create a new table with the same + * content of the specified snapshot. + */ +@InterfaceAudience.Private +public class CloneSnapshotHandler extends CreateTableHandler implements SnapshotSentinel { + private static final Log LOG = LogFactory.getLog(CloneSnapshotHandler.class); + + private final static String NAME = "Master CloneSnapshotHandler"; + + private final SnapshotDescription snapshot; + + private final ForeignExceptionDispatcher monitor; + + private volatile boolean stopped = false; + + public CloneSnapshotHandler(final MasterServices masterServices, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { + super(masterServices, masterServices.getMasterFileSystem(), + masterServices.getServerManager(), hTableDescriptor, + masterServices.getConfiguration(), null, masterServices.getCatalogTracker(), + masterServices.getAssignmentManager()); + + // Snapshot information + this.snapshot = snapshot; + + // Monitor + this.monitor = new ForeignExceptionDispatcher(); + } + + /** + * Create the on-disk regions, using the tableRootDir provided by the CreateTableHandler. + * The cloned table will be created in a temp directory, and then the CreateTableHandler + * will be responsible to add the regions returned by this method to META and do the assignment. + */ + @Override + protected List handleCreateHdfsRegions(final Path tableRootDir, final String tableName) + throws IOException { + FileSystem fs = fileSystemManager.getFileSystem(); + Path rootDir = fileSystemManager.getRootDir(); + Path tableDir = new Path(tableRootDir, tableName); + + try { + // 1. Execute the on-disk Clone + Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); + RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(conf, fs, + snapshot, snapshotDir, hTableDescriptor, tableDir, monitor); + RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); + + // Clone operation should not have stuff to restore or remove + Preconditions.checkArgument(!metaChanges.hasRegionsToRestore(), + "A clone should not have regions to restore"); + Preconditions.checkArgument(!metaChanges.hasRegionsToRemove(), + "A clone should not have regions to remove"); + + // At this point the clone is complete. Next step is enabling the table. + LOG.info("Clone snapshot=" + snapshot.getName() + " on table=" + tableName + " completed!"); + + // 2. let the CreateTableHandler add the regions to meta + return metaChanges.getRegionsToAdd(); + } catch (Exception e) { + String msg = "clone snapshot=" + SnapshotDescriptionUtils.toString(snapshot) + " failed"; + LOG.error(msg, e); + IOException rse = new RestoreSnapshotException(msg, e, snapshot); + + // these handlers aren't futures so we need to register the error here. + this.monitor.receive(new ForeignException(NAME, rse)); + throw rse; + } + } + + @Override + protected void completed(final Throwable exception) { + this.stopped = true; + } + + @Override + public boolean isFinished() { + return this.stopped; + } + + @Override + public SnapshotDescription getSnapshot() { + return snapshot; + } + + @Override + public void cancel(String why) { + if (this.stopped) return; + this.stopped = true; + LOG.info("Stopping clone snapshot=" + snapshot + " because: " + why); + this.monitor.receive(new ForeignException(NAME, new CancellationException(why))); + } + + @Override + public ForeignException getExceptionIfFailed() { + return this.monitor.getException(); + } +} Index: src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java (revision 0) @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.cleaner; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import org.apache.hadoop.hbase.io.HFileLink; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; +import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; + +/** + * HFileLink cleaner that determines if a hfile should be deleted. + * HFiles can be deleted only if there're no links to them. + * + * When a HFileLink is created a back reference file is created in: + * /hbase/archive/table/region/cf/.links-hfile/ref-region.ref-table + * To check if the hfile can be deleted the back references folder must be empty. + */ +@InterfaceAudience.Private +public class HFileLinkCleaner extends BaseHFileCleanerDelegate { + private static final Log LOG = LogFactory.getLog(HFileLinkCleaner.class); + + private FileSystem fs = null; + + @Override + public synchronized boolean isFileDeletable(Path filePath) { + if (this.fs == null) return false; + + // HFile Link is always deletable + if (HFileLink.isHFileLink(filePath)) return true; + + // If the file is inside a link references directory, means that is a back ref link. + // The back ref can be deleted only if the referenced file doesn't exists. + Path parentDir = filePath.getParent(); + if (HFileLink.isBackReferencesDir(parentDir)) { + try { + Path hfilePath = HFileLink.getHFileFromBackReference(getConf(), filePath); + return !fs.exists(hfilePath); + } catch (IOException e) { + LOG.error("Couldn't verify if the referenced file still exists, keep it just in case"); + return false; + } + } + + // HFile is deletable only if has no links + try { + Path backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName()); + return FSUtils.listStatus(fs, backRefDir) == null; + } catch (IOException e) { + LOG.error("Couldn't get the references, not deleting file, just in case"); + return false; + } + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + + // setup filesystem + try { + this.fs = FileSystem.get(this.getConf()); + } catch (IOException e) { + LOG.error("Couldn't instantiate the file system, not deleting file, just in case"); + } + } +} Index: src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileCleaner.java (working copy) @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.regionserver.StoreFile; /** * This Chore, every time it runs, will clear the HFiles in the hfile archive @@ -46,6 +47,9 @@ @Override protected boolean validate(Path file) { + if (HFileLink.isBackReferencesDir(file) || HFileLink.isBackReferencesDir(file.getParent())) { + return true; + } return StoreFile.validateStoreFileName(file.getName()); } } Index: src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/cleaner/CleanerChore.java (working copy) @@ -82,7 +82,10 @@ if (logCleaners != null) { for (String className : logCleaners) { T logCleaner = newFileCleaner(className, conf); - if (logCleaner != null) this.cleanersChain.add(logCleaner); + if (logCleaner != null) { + LOG.debug("initialize cleaner=" + className); + this.cleanersChain.add(logCleaner); + } } } } Index: src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/handler/DeleteTableHandler.java (working copy) @@ -24,10 +24,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.backup.HFileArchiver; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; @@ -47,6 +51,7 @@ @Override protected void handleTableOperation(List regions) throws IOException, KeeperException { + // 1. Wait because of region in transition AssignmentManager am = this.masterServices.getAssignmentManager(); long waitTime = server.getConfiguration(). getLong("hbase.master.wait.on.region", 5 * 60 * 1000); @@ -63,23 +68,39 @@ waitTime + "ms) for region to leave region " + region.getRegionNameAsString() + " in transitions"); } - LOG.debug("Deleting region " + region.getRegionNameAsString() + - " from META and FS"); - // Remove region from META - MetaEditor.deleteRegion(this.server.getCatalogTracker(), region); - // Delete region from FS - this.masterServices.getMasterFileSystem().deleteRegion(region); } - // Delete table from FS - this.masterServices.getMasterFileSystem().deleteTable(tableName); - // Update table descriptor cache - this.masterServices.getTableDescriptors().remove(Bytes.toString(tableName)); - // If entry for this table in zk, and up in AssignmentManager, remove it. + // 2. Remove regions from META + LOG.debug("Deleting regions from META"); + MetaEditor.deleteRegions(this.server.getCatalogTracker(), regions); - am.getZKTable().setDeletedTable(Bytes.toString(tableName)); + // 3. Move the table in /hbase/.tmp + LOG.debug("Moving table directory to a temp directory"); + MasterFileSystem mfs = this.masterServices.getMasterFileSystem(); + Path tempTableDir = mfs.moveTableToTemp(tableName); + + try { + // 4. Delete regions from FS (temp directory) + FileSystem fs = mfs.getFileSystem(); + for (HRegionInfo hri: regions) { + LOG.debug("Archiving region " + hri.getRegionNameAsString() + " from FS"); + HFileArchiver.archiveRegion(fs, mfs.getRootDir(), + tempTableDir, new Path(tempTableDir, hri.getEncodedName())); + } + + // 5. Delete table from FS (temp directory) + if (!fs.delete(tempTableDir, true)) { + LOG.error("Couldn't delete " + tempTableDir); + } + } finally { + // 6. Update table descriptor cache + this.masterServices.getTableDescriptors().remove(Bytes.toString(tableName)); + + // 7. If entry for this table in zk, and up in AssignmentManager, remove it. + am.getZKTable().setDeletedTable(Bytes.toString(tableName)); + } } - + @Override public String toString() { String name = "UnknownServerName"; Index: src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/handler/DisableTableHandler.java (working copy) @@ -170,6 +170,7 @@ while (!server.isStopped() && remaining > 0) { Thread.sleep(waitingTimeForEvents); regions = assignmentManager.getRegionsOfTable(tableName); + LOG.debug("Disable waiting until done; " + remaining + " ms remaining; " + regions); if (regions.isEmpty()) break; remaining = timeout - (System.currentTimeMillis() - startTime); } Index: src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy) @@ -1,5 +1,4 @@ /** - * Copyright 2011 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -20,13 +19,25 @@ package org.apache.hadoop.hbase.master.handler; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; @@ -41,21 +52,23 @@ import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.util.FSTableDescriptors; +import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.util.Threads; import org.apache.zookeeper.KeeperException; /** * Handler to create a table. */ +@InterfaceAudience.Private public class CreateTableHandler extends EventHandler { private static final Log LOG = LogFactory.getLog(CreateTableHandler.class); - private MasterFileSystem fileSystemManager; - private final HTableDescriptor hTableDescriptor; - private Configuration conf; - private final AssignmentManager assignmentManager; - private final CatalogTracker catalogTracker; - private final ServerManager serverManager; + protected MasterFileSystem fileSystemManager; + protected final HTableDescriptor hTableDescriptor; + protected Configuration conf; + protected final AssignmentManager assignmentManager; + protected final CatalogTracker catalogTracker; + protected final ServerManager serverManager; private final HRegionInfo [] newRegions; public CreateTableHandler(Server server, MasterFileSystem fileSystemManager, @@ -98,8 +111,7 @@ // table in progress. This will introduce a new zookeeper call. Given // createTable isn't a frequent operation, that should be ok. try { - if (!this.assignmentManager.getZKTable().checkAndSetEnablingTable( - tableName)) + if (!this.assignmentManager.getZKTable().checkAndSetEnablingTable(tableName)) throw new TableExistsException(tableName); } catch (KeeperException e) { throw new IOException("Unable to ensure that the table will be" + @@ -122,66 +134,91 @@ public void process() { String tableName = this.hTableDescriptor.getNameAsString(); try { - LOG.info("Attemping to create the table " + tableName); - handleCreateTable(); - } catch (IOException e) { + LOG.info("Attempting to create the table " + tableName); + handleCreateTable(tableName); + completed(null); + } catch (Throwable e) { LOG.error("Error trying to create the table " + tableName, e); - } catch (KeeperException e) { - LOG.error("Error trying to create the table " + tableName, e); + completed(e); } } - private void handleCreateTable() throws IOException, KeeperException { + /** + * Called after that process() is completed. + * @param exception null if process() is successful or not null if something has failed. + */ + protected void completed(final Throwable exception) { + } - // TODO: Currently we make the table descriptor and as side-effect the - // tableDir is created. Should we change below method to be createTable - // where we create table in tmp dir with its table descriptor file and then - // do rename to move it into place? - FSTableDescriptors.createTableDescriptor(this.hTableDescriptor, this.conf); + /** + * Responsible of table creation (on-disk and META) and assignment. + * - Create the table directory and descriptor (temp folder) + * - Create the on-disk regions (temp folder) + * [If something fails here: we've just some trash in temp] + * - Move the table from temp to the root directory + * [If something fails here: we've the table in place but some of the rows required + * present in META. (hbck needed)] + * - Add regions to META + * [If something fails here: we don't have regions assigned: table disabled] + * - Assign regions to Region Servers + * [If something fails here: we still have the table in disabled state] + * - Update ZooKeeper with the enabled state + */ + private void handleCreateTable(String tableName) throws IOException, KeeperException { + Path tempdir = fileSystemManager.getTempDir(); + FileSystem fs = fileSystemManager.getFileSystem(); - List regionInfos = new ArrayList(); - final int batchSize = - this.conf.getInt("hbase.master.createtable.batchsize", 100); - for (int regionIdx = 0; regionIdx < this.newRegions.length; regionIdx++) { - HRegionInfo newRegion = this.newRegions[regionIdx]; - // 1. Create HRegion - HRegion region = HRegion.createHRegion(newRegion, - this.fileSystemManager.getRootDir(), this.conf, - this.hTableDescriptor, null, false, true); + // 1. Create Table Descriptor + FSTableDescriptors.createTableDescriptor(fs, tempdir, this.hTableDescriptor); + Path tempTableDir = new Path(tempdir, tableName); + Path tableDir = new Path(fileSystemManager.getRootDir(), tableName); - regionInfos.add(region.getRegionInfo()); - if (regionIdx % batchSize == 0) { - // 2. Insert into META - MetaEditor.addRegionsToMeta(this.catalogTracker, regionInfos); - regionInfos.clear(); - } + // 2. Create Regions + List regionInfos = handleCreateHdfsRegions(tempdir, tableName); - // 3. Close the new region to flush to disk. Close log file too. - region.close(); + // 3. Move Table temp directory to the hbase root location + if (!fs.rename(tempTableDir, tableDir)) { + throw new IOException("Unable to move table from temp=" + tempTableDir + + " to hbase root=" + tableDir); } - if (regionInfos.size() > 0) { + + if (regionInfos != null && regionInfos.size() > 0) { + // 4. Add regions to META MetaEditor.addRegionsToMeta(this.catalogTracker, regionInfos); - } - // 4. Trigger immediate assignment of the regions in round-robin fashion - List servers = serverManager.getOnlineServersList(); - // Remove the deadNotExpired servers from the server list. - assignmentManager.removeDeadNotExpiredServers(servers); - try { - this.assignmentManager.assignUserRegions(Arrays.asList(newRegions), - servers); - } catch (InterruptedException ie) { - LOG.error("Caught " + ie + " during round-robin assignment"); - throw new IOException(ie); + // 5. Trigger immediate assignment of the regions in round-robin fashion + List servers = serverManager.getOnlineServersList(); + // Remove the deadNotExpired servers from the server list. + assignmentManager.removeDeadNotExpiredServers(servers); + try { + this.assignmentManager.assignUserRegions(regionInfos, servers); + } catch (InterruptedException e) { + LOG.error("Caught " + e + " during round-robin assignment"); + InterruptedIOException ie = new InterruptedIOException(e.getMessage()); + ie.initCause(e); + throw ie; + } } - // 5. Set table enabled flag up in zk. + // 6. Set table enabled flag up in zk. try { - assignmentManager.getZKTable(). - setEnabledTable(this.hTableDescriptor.getNameAsString()); + assignmentManager.getZKTable().setEnabledTable(tableName); } catch (KeeperException e) { - throw new IOException("Unable to ensure that the table will be" + + throw new IOException("Unable to ensure that " + tableName + " will be" + " enabled because of a ZooKeeper issue", e); } } -} \ No newline at end of file + + /** + * Create the on-disk structure for the table, and returns the regions info. + * @param tableRootDir directory where the table is being created + * @param tableName name of the table under construction + * @return the list of regions created + */ + protected List handleCreateHdfsRegions(final Path tableRootDir, + final String tableName) + throws IOException { + return ModifyRegionUtils.createRegions(conf, tableRootDir, + hTableDescriptor, newRegions, null); + } +} Index: src/main/java/org/apache/hadoop/hbase/master/handler/TableEventHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/TableEventHandler.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/master/handler/TableEventHandler.java (working copy) @@ -160,12 +160,14 @@ } /** + * Gets a TableDescriptor from the masterServices. Can Throw exceptions. + * * @return Table descriptor for this table * @throws TableExistsException * @throws FileNotFoundException * @throws IOException */ - HTableDescriptor getTableDescriptor() + public HTableDescriptor getTableDescriptor() throws FileNotFoundException, IOException { final String name = Bytes.toString(tableName); HTableDescriptor htd = Index: src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java (working copy) @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.coprocessor; import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import java.io.IOException; @@ -289,4 +290,100 @@ */ void postStartMaster(final ObserverContext ctx) throws IOException; + + /** + * Called before a new snapshot is taken. + * Called as part of snapshot RPC call. + * It can't bypass the default action, e.g., ctx.bypass() won't have effect. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor for the snapshot + * @param hTableDescriptor the hTableDescriptor of the table to snapshot + * @throws IOException + */ + void preSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException; + + /** + * Called after the snapshot operation has been requested. + * Called as part of snapshot RPC call. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor for the snapshot + * @param hTableDescriptor the hTableDescriptor of the table to snapshot + * @throws IOException + */ + void postSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException; + + /** + * Called before a snapshot is cloned. + * Called as part of restoreSnapshot RPC call. + * It can't bypass the default action, e.g., ctx.bypass() won't have effect. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor for the snapshot + * @param hTableDescriptor the hTableDescriptor of the table to create + * @throws IOException + */ + void preCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException; + + /** + * Called after a snapshot clone operation has been requested. + * Called as part of restoreSnapshot RPC call. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor for the snapshot + * @param hTableDescriptor the hTableDescriptor of the table to create + * @throws IOException + */ + void postCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException; + + /** + * Called before a snapshot is restored. + * Called as part of restoreSnapshot RPC call. + * It can't bypass the default action, e.g., ctx.bypass() won't have effect. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor for the snapshot + * @param hTableDescriptor the hTableDescriptor of the table to restore + * @throws IOException + */ + void preRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException; + + /** + * Called after a snapshot restore operation has been requested. + * Called as part of restoreSnapshot RPC call. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor for the snapshot + * @param hTableDescriptor the hTableDescriptor of the table to restore + * @throws IOException + */ + void postRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException; + + /** + * Called before a snapshot is deleted. + * Called as part of deleteSnapshot RPC call. + * It can't bypass the default action, e.g., ctx.bypass() won't have effect. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor of the snapshot to delete + * @throws IOException + */ + void preDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException; + + /** + * Called after the delete snapshot operation has been requested. + * Called as part of deleteSnapshot RPC call. + * @param ctx the environment to interact with the framework and master + * @param snapshot the SnapshotDescriptor of the snapshot to delete + * @throws IOException + */ + void postDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException; } Index: src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/coprocessor/BaseMasterObserver.java (working copy) @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.UnknownRegionException; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; import java.io.IOException; @@ -185,4 +186,50 @@ HRegionInfo region, ServerName srcServer, ServerName destServer) throws IOException { } + + @Override + public void preSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void postSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void preCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void postCloneSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void preRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void postRestoreSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) + throws IOException { + } + + @Override + public void preDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException { + } + + @Override + public void postDeleteSnapshot(final ObserverContext ctx, + final SnapshotDescription snapshot) throws IOException { + } } Index: src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java (working copy) @@ -90,6 +90,7 @@ import org.apache.hadoop.hbase.regionserver.RegionOpeningState; import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.regionserver.wal.HLogKey; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ProtoUtil; import org.apache.hadoop.io.MapWritable; @@ -269,6 +270,10 @@ addToMap(FuzzyRowFilter.class, code++); + // we aren't going to bump the rpc version number. + // we don't want to cause incompatiblity with older 0.94/0.92 clients. + addToMap(HSnapshotDescription.class, code); + // make sure that this is the last statement in this static block NEXT_CLASS_CODE = code; } Index: src/main/java/org/apache/hadoop/hbase/io/HFileLink.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/HFileLink.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/io/HFileLink.java (revision 0) @@ -0,0 +1,371 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; + +/** + * HFileLink describes a link to an hfile. + * + * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive) + * HFileLink allows to access the referenced hfile regardless of the location where it is. + * + *

    Searches for hfiles in the following order and locations: + *

      + *
    • /hbase/table/region/cf/hfile
    • + *
    • /hbase/.archive/table/region/cf/hfile
    • + *
    + * + * The link checks first in the original path if it is not present + * it fallbacks to the archived path. + */ +@InterfaceAudience.Private +public class HFileLink extends FileLink { + private static final Log LOG = LogFactory.getLog(HFileLink.class); + + /** + * A non-capture group, for HFileLink, so that this can be embedded. + * The HFileLink describe a link to an hfile in a different table/region + * and the name is in the form: table=region-hfile. + *

    + * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name. + * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name. + * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) + * and the bulk loaded (_SeqId_[0-9]+_) hfiles. + */ + public static final String LINK_NAME_REGEX = + String.format("%s=%s-%s", HTableDescriptor.VALID_USER_TABLE_REGEX, + HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFile.HFILE_NAME_REGEX); + + /** Define the HFile Link name parser in the form of: table=region-hfile */ + private static final Pattern LINK_NAME_PATTERN = + Pattern.compile(String.format("^(%s)=(%s)-(%s)$", HTableDescriptor.VALID_USER_TABLE_REGEX, + HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFile.HFILE_NAME_REGEX)); + + /** + * The pattern should be used for hfile and reference links + * that can be found in /hbase/table/region/family/ + */ + private static final Pattern REF_OR_HFILE_LINK_PATTERN = + Pattern.compile(String.format("^(%s)=(%s)-(.+)$", HTableDescriptor.VALID_USER_TABLE_REGEX, + HRegionInfo.ENCODED_REGION_NAME_REGEX)); + + private final Path archivePath; + private final Path originPath; + private final Path tempPath; + + /** + * @param conf {@link Configuration} from which to extract specific archive locations + * @param path The path of the HFile Link. + * @throws IOException on unexpected error. + */ + public HFileLink(Configuration conf, Path path) throws IOException { + this(FSUtils.getRootDir(conf), HFileArchiveUtil.getArchivePath(conf), path); + } + + /** + * @param rootDir Path to the root directory where hbase files are stored + * @param archiveDir Path to the hbase archive directory + * @param path The path of the HFile Link. + */ + public HFileLink(final Path rootDir, final Path archiveDir, final Path path) { + Path hfilePath = getRelativeTablePath(path); + this.tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); + this.originPath = new Path(rootDir, hfilePath); + this.archivePath = new Path(archiveDir, hfilePath); + setLocations(originPath, tempPath, archivePath); + } + + /** + * @return the origin path of the hfile. + */ + public Path getOriginPath() { + return this.originPath; + } + + /** + * @return the path of the archived hfile. + */ + public Path getArchivePath() { + return this.archivePath; + } + + /** + * @param path Path to check. + * @return True if the path is a HFileLink. + */ + public static boolean isHFileLink(final Path path) { + return isHFileLink(path.getName()); + } + + + /** + * @param fileName File name to check. + * @return True if the path is a HFileLink. + */ + public static boolean isHFileLink(String fileName) { + Matcher m = LINK_NAME_PATTERN.matcher(fileName); + if (!m.matches()) return false; + + return m.groupCount() > 2 && m.group(3) != null && m.group(2) != null && m.group(1) != null; + } + + /** + * Convert a HFileLink path to a table relative path. + * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd + * becomes: /hbase/testtb/4567/cf/abcd + * + * @param path HFileLink path + * @return Relative table path + * @throws IOException on unexpected error. + */ + private static Path getRelativeTablePath(final Path path) { + // table=region-hfile + Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); + if (!m.matches()) { + throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink name!"); + } + + // Convert the HFileLink name into a real table/region/cf/hfile path. + String tableName = m.group(1); + String regionName = m.group(2); + String hfileName = m.group(3); + String familyName = path.getParent().getName(); + return new Path(new Path(tableName, regionName), new Path(familyName, hfileName)); + } + + /** + * Get the HFile name of the referenced link + * + * @param fileName HFileLink file name + * @return the name of the referenced HFile + */ + public static String getReferencedHFileName(final String fileName) { + Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); + if (!m.matches()) { + throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); + } + return(m.group(3)); + } + + /** + * Get the Region name of the referenced link + * + * @param fileName HFileLink file name + * @return the name of the referenced Region + */ + public static String getReferencedRegionName(final String fileName) { + Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); + if (!m.matches()) { + throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); + } + return(m.group(2)); + } + + /** + * Get the Table name of the referenced link + * + * @param fileName HFileLink file name + * @return the name of the referenced Table + */ + public static String getReferencedTableName(final String fileName) { + Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); + if (!m.matches()) { + throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); + } + return(m.group(1)); + } + + /** + * Create a new HFileLink name + * + * @param hfileRegionInfo - Linked HFile Region Info + * @param hfileName - Linked HFile name + * @return file name of the HFile Link + */ + public static String createHFileLinkName(final HRegionInfo hfileRegionInfo, + final String hfileName) { + return createHFileLinkName(hfileRegionInfo.getTableNameAsString(), + hfileRegionInfo.getEncodedName(), hfileName); + } + + /** + * Create a new HFileLink name + * + * @param tableName - Linked HFile table name + * @param regionName - Linked HFile region name + * @param hfileName - Linked HFile name + * @return file name of the HFile Link + */ + public static String createHFileLinkName(final String tableName, + final String regionName, final String hfileName) { + return String.format("%s=%s-%s", tableName, regionName, hfileName); + } + + /** + * Create a new HFileLink + * + *

    It also adds a back-reference to the hfile back-reference directory + * to simplify the reference-count and the cleaning process. + * + * @param conf {@link Configuration} to read for the archive directory name + * @param fs {@link FileSystem} on which to write the HFileLink + * @param dstFamilyPath - Destination path (table/region/cf/) + * @param hfileRegionInfo - Linked HFile Region Info + * @param hfileName - Linked HFile name + * @return true if the file is created, otherwise the file exists. + * @throws IOException on file or parent directory creation failure + */ + public static boolean create(final Configuration conf, final FileSystem fs, + final Path dstFamilyPath, final HRegionInfo hfileRegionInfo, + final String hfileName) throws IOException { + String linkedTable = hfileRegionInfo.getTableNameAsString(); + String linkedRegion = hfileRegionInfo.getEncodedName(); + return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName); + } + + /** + * Create a new HFileLink + * + *

    It also adds a back-reference to the hfile back-reference directory + * to simplify the reference-count and the cleaning process. + * + * @param conf {@link Configuration} to read for the archive directory name + * @param fs {@link FileSystem} on which to write the HFileLink + * @param dstFamilyPath - Destination path (table/region/cf/) + * @param linkedTable - Linked Table Name + * @param linkedRegion - Linked Region Name + * @param hfileName - Linked HFile name + * @return true if the file is created, otherwise the file exists. + * @throws IOException on file or parent directory creation failure + */ + public static boolean create(final Configuration conf, final FileSystem fs, + final Path dstFamilyPath, final String linkedTable, final String linkedRegion, + final String hfileName) throws IOException { + String familyName = dstFamilyPath.getName(); + String regionName = dstFamilyPath.getParent().getName(); + String tableName = dstFamilyPath.getParent().getParent().getName(); + + String name = createHFileLinkName(linkedTable, linkedRegion, hfileName); + String refName = createBackReferenceName(tableName, regionName); + + // Make sure the destination directory exists + fs.mkdirs(dstFamilyPath); + + // Make sure the FileLink reference directory exists + Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, + linkedTable, linkedRegion, familyName); + Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName); + fs.mkdirs(backRefssDir); + + // Create the reference for the link + Path backRefPath = new Path(backRefssDir, refName); + fs.createNewFile(backRefPath); + try { + // Create the link + return fs.createNewFile(new Path(dstFamilyPath, name)); + } catch (IOException e) { + LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e); + // Revert the reference if the link creation failed + fs.delete(backRefPath, false); + throw e; + } + } + + /** + * Create a new HFileLink starting from a hfileLink name + * + *

    It also adds a back-reference to the hfile back-reference directory + * to simplify the reference-count and the cleaning process. + * + * @param conf {@link Configuration} to read for the archive directory name + * @param fs {@link FileSystem} on which to write the HFileLink + * @param dstFamilyPath - Destination path (table/region/cf/) + * @param hfileLinkName - HFileLink name (it contains hfile-region-table) + * @return true if the file is created, otherwise the file exists. + * @throws IOException on file or parent directory creation failure + */ + public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, + final Path dstFamilyPath, final String hfileLinkName) throws IOException { + Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName); + if (!m.matches()) { + throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!"); + } + return create(conf, fs, dstFamilyPath, m.group(1), m.group(2), m.group(3)); + } + + /** + * Create the back reference name + */ + private static String createBackReferenceName(final String tableName, final String regionName) { + return regionName + "." + tableName; + } + + /** + * Get the full path of the HFile referenced by the back reference + * + * @param rootDir root hbase directory + * @param linkRefPath Link Back Reference path + * @return full path of the referenced hfile + * @throws IOException on unexpected error. + */ + public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { + int separatorIndex = linkRefPath.getName().indexOf('.'); + String linkRegionName = linkRefPath.getName().substring(0, separatorIndex); + String linkTableName = linkRefPath.getName().substring(separatorIndex + 1); + String hfileName = getBackReferenceFileName(linkRefPath.getParent()); + Path familyPath = linkRefPath.getParent().getParent(); + Path regionPath = familyPath.getParent(); + Path tablePath = regionPath.getParent(); + + String linkName = createHFileLinkName(tablePath.getName(), regionPath.getName(), hfileName); + Path linkTableDir = FSUtils.getTablePath(rootDir, linkTableName); + Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); + return new Path(new Path(regionDir, familyPath.getName()), linkName); + } + + /** + * Get the full path of the HFile referenced by the back reference + * + * @param conf {@link Configuration} to read for the archive directory name + * @param linkRefPath Link Back Reference path + * @return full path of the referenced hfile + * @throws IOException on unexpected error. + */ + public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) + throws IOException { + return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath); + } +} Index: src/main/java/org/apache/hadoop/hbase/io/HLogLink.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/HLogLink.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/io/HLogLink.java (revision 0) @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.FSUtils; + +/** + * HLogLink describes a link to a WAL. + * + * An hlog can be in /hbase/.logs// + * or it can be in /hbase/.oldlogs/ + * + * The link checks first in the original path, + * if it is not present it fallbacks to the archived path. + */ +@InterfaceAudience.Private +public class HLogLink extends FileLink { + /** + * @param conf {@link Configuration} from which to extract specific archive locations + * @param serverName Region Server owner of the log + * @param logName WAL file name + * @throws IOException on unexpected error. + */ + public HLogLink(final Configuration conf, + final String serverName, final String logName) throws IOException { + this(FSUtils.getRootDir(conf), serverName, logName); + } + + /** + * @param rootDir Path to the root directory where hbase files are stored + * @param serverName Region Server owner of the log + * @param logName WAL file name + */ + public HLogLink(final Path rootDir, final String serverName, final String logName) { + final Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME); + final Path logDir = new Path(new Path(rootDir, HConstants.HREGION_LOGDIR_NAME), serverName); + setLocations(new Path(logDir, logName), new Path(oldLogDir, logName)); + } + + /** + * @param originPath Path to the wal in the log directory + * @param archivePath Path to the wal in the archived log directory + */ + public HLogLink(final Path originPath, final Path archivePath) { + setLocations(originPath, archivePath); + } +} Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (working copy) @@ -180,18 +180,18 @@ static final AtomicLong checksumFailures = new AtomicLong(); // For getting more detailed stats on FS latencies - // If, for some reason, the metrics subsystem stops polling for latencies, + // If, for some reason, the metrics subsystem stops polling for latencies, // I don't want data to pile up in a memory leak // so, after LATENCY_BUFFER_SIZE items have been enqueued for processing, // fs latency stats will be dropped (and this behavior will be logged) private static final int LATENCY_BUFFER_SIZE = 5000; - private static final BlockingQueue fsReadLatenciesNanos = + private static final BlockingQueue fsReadLatenciesNanos = new ArrayBlockingQueue(LATENCY_BUFFER_SIZE); - private static final BlockingQueue fsWriteLatenciesNanos = + private static final BlockingQueue fsWriteLatenciesNanos = new ArrayBlockingQueue(LATENCY_BUFFER_SIZE); - private static final BlockingQueue fsPreadLatenciesNanos = + private static final BlockingQueue fsPreadLatenciesNanos = new ArrayBlockingQueue(LATENCY_BUFFER_SIZE); - + public static final void offerReadLatency(long latencyNanos, boolean pread) { if (pread) { fsPreadLatenciesNanos.offer(latencyNanos); // might be silently dropped, if the queue is full @@ -203,30 +203,30 @@ readOps.incrementAndGet(); } } - + public static final void offerWriteLatency(long latencyNanos) { fsWriteLatenciesNanos.offer(latencyNanos); // might be silently dropped, if the queue is full - + writeTimeNano.addAndGet(latencyNanos); writeOps.incrementAndGet(); } - + public static final Collection getReadLatenciesNanos() { - final List latencies = + final List latencies = Lists.newArrayListWithCapacity(fsReadLatenciesNanos.size()); fsReadLatenciesNanos.drainTo(latencies); return latencies; } public static final Collection getPreadLatenciesNanos() { - final List latencies = + final List latencies = Lists.newArrayListWithCapacity(fsPreadLatenciesNanos.size()); fsPreadLatenciesNanos.drainTo(latencies); return latencies; } - + public static final Collection getWriteLatenciesNanos() { - final List latencies = + final List latencies = Lists.newArrayListWithCapacity(fsWriteLatenciesNanos.size()); fsWriteLatenciesNanos.drainTo(latencies); return latencies; @@ -572,7 +572,7 @@ HFileSystem hfs = null; FSDataInputStream fsdis = fs.open(path); FSDataInputStream fsdisNoFsChecksum = fsdis; - // If the fs is not an instance of HFileSystem, then create an + // If the fs is not an instance of HFileSystem, then create an // instance of HFileSystem that wraps over the specified fs. // In this case, we will not be able to avoid checksumming inside // the filesystem. @@ -592,6 +592,39 @@ } /** + * @param fs A file system + * @param path Path to HFile + * @param fsdis an open checksummed stream of path's file + * @param fsdisNoFsChecksum an open unchecksummed stream of path's file + * @param size max size of the trailer. + * @param cacheConf Cache configuration for hfile's contents + * @param preferredEncodingInCache Preferred in-cache data encoding algorithm. + * @param closeIStream boolean for closing file after the getting the reader version. + * @return A version specific Hfile Reader + * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException + */ + public static Reader createReaderWithEncoding( + FileSystem fs, Path path, FSDataInputStream fsdis, + FSDataInputStream fsdisNoFsChecksum, long size, CacheConfig cacheConf, + DataBlockEncoding preferredEncodingInCache, boolean closeIStream) + throws IOException { + HFileSystem hfs = null; + + // If the fs is not an instance of HFileSystem, then create an + // instance of HFileSystem that wraps over the specified fs. + // In this case, we will not be able to avoid checksumming inside + // the filesystem. + if (!(fs instanceof HFileSystem)) { + hfs = new HFileSystem(fs); + } else { + hfs = (HFileSystem)fs; + } + return pickReaderVersion(path, fsdis, fsdisNoFsChecksum, size, + closeIStream, cacheConf, + preferredEncodingInCache, hfs); + } + + /** * @param fs filesystem * @param path Path to file to read * @param cacheConf This must not be null. @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)} Index: src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java (working copy) @@ -581,15 +581,21 @@ return this.stats.getEvictedCount(); } + EvictionThread getEvictionThread() { + return this.evictionThread; + } + /* * Eviction thread. Sits in waiting state until an eviction is triggered * when the cache size grows above the acceptable level.

    * * Thread is triggered into action by {@link LruBlockCache#runEviction()} */ - private static class EvictionThread extends HasThread { + static class EvictionThread extends HasThread { private WeakReference cache; private boolean go = true; + // flag set after enter the run method, used for test + private boolean enteringRun = false; public EvictionThread(LruBlockCache cache) { super(Thread.currentThread().getName() + ".LruBlockCache.EvictionThread"); @@ -599,6 +605,7 @@ @Override public void run() { + enteringRun = true; while (this.go) { synchronized(this) { try { @@ -621,6 +628,13 @@ this.go = false; interrupt(); } + + /** + * Used for the test. + */ + boolean isEnteringRun() { + return this.enteringRun; + } } /* Index: src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java (working copy) @@ -58,10 +58,12 @@ private boolean firstKeySeeked = false; /** - * @param fs - * @param p + * Creates a half file reader for a normal hfile. + * @param fs fileystem to read from + * @param p path to hfile * @param cacheConf - * @param r + * @param r original reference file (contains top or bottom) + * @param preferredEncodingInCache * @throws IOException */ public HalfStoreFileReader(final FileSystem fs, final Path p, @@ -78,6 +80,30 @@ this.top = Reference.isTopFileRegion(r.getFileRegion()); } + /** + * Creates a half file reader for a hfile referred to by an hfilelink. + * @param fs fileystem to read from + * @param p path to hfile + * @param link + * @param cacheConf + * @param r original reference file (contains top or bottom) + * @param preferredEncodingInCache + * @throws IOException + */ + public HalfStoreFileReader(final FileSystem fs, final Path p, final HFileLink link, + final CacheConfig cacheConf, final Reference r, + DataBlockEncoding preferredEncodingInCache) throws IOException { + super(fs, p, link, link.getFileStatus(fs).getLen(), cacheConf, preferredEncodingInCache, true); + // This is not actual midkey for this half-file; its just border + // around which we split top and bottom. Have to look in files to find + // actual last and first keys for bottom and top halves. Half-files don't + // have an actual midkey themselves. No midkey is how we indicate file is + // not splittable. + this.splitkey = r.getSplitKey(); + // Is it top or bottom half? + this.top = Reference.isTopFileRegion(r.getFileRegion()); + } + protected boolean isTop() { return this.top; } Index: src/main/java/org/apache/hadoop/hbase/io/FileLink.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/FileLink.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/io/FileLink.java (revision 0) @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io; + +import java.util.Collection; + +import java.io.IOException; +import java.io.InputStream; +import java.io.FileNotFoundException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; + +/** + * The FileLink is a sort of hardlink, that allows to access a file given a set of locations. + * + *

    The Problem: + *

      + *
    • + * HDFS doesn't have support for hardlinks, and this make impossible to referencing + * the same data blocks using different names. + *
    • + *
    • + * HBase store files in one location (e.g. table/region/family/) and when the file is not + * needed anymore (e.g. compaction, region deletetion, ...) moves it to an archive directory. + *
    • + *
    + * If we want to create a reference to a file, we need to remember that it can be in its + * original location or in the archive folder. + * The FileLink class tries to abstract this concept and given a set of locations + * it is able to switch between them making this operation transparent for the user. + * More concrete implementations of the FileLink are the {@link HFileLink} and the {@link HLogLink}. + * + *

    Back-references: + * To help the {@link CleanerChore} to keep track of the links to a particular file, + * during the FileLink creation, a new file is placed inside a back-reference directory. + * There's one back-reference directory for each file that has links, + * and in the directory there's one file per link. + * + *

    HFileLink Example + *

      + *
    • + * /hbase/table/region-x/cf/file-k + * (Original File) + *
    • + *
    • + * /hbase/table-cloned/region-y/cf/file-k.region-x.table + * (HFileLink to the original file) + *
    • + *
    • + * /hbase/table-2nd-cloned/region-z/cf/file-k.region-x.table + * (HFileLink to the original file) + *
    • + *
    • + * /hbase/.archive/table/region-x/.links-file-k/region-y.table-cloned + * (Back-reference to the link in table-cloned) + *
    • + *
    • + * /hbase/.archive/table/region-x/.links-file-k/region-z.table-cloned + * (Back-reference to the link in table-2nd-cloned) + *
    • + *
    + */ +@InterfaceAudience.Private +public class FileLink { + private static final Log LOG = LogFactory.getLog(FileLink.class); + + /** Define the Back-reference directory name prefix: .links-/ */ + public static final String BACK_REFERENCES_DIRECTORY_PREFIX = ".links-"; + + /** + * FileLink InputStream that handles the switch between the original path + * and the alternative locations, when the file is moved. + */ + private static class FileLinkInputStream extends InputStream + implements Seekable, PositionedReadable { + private FSDataInputStream in = null; + private Path currentPath = null; + private long pos = 0; + + private final FileLink fileLink; + private final int bufferSize; + private final FileSystem fs; + + public FileLinkInputStream(final FileSystem fs, final FileLink fileLink) + throws IOException { + this(fs, fileLink, fs.getConf().getInt("io.file.buffer.size", 4096)); + } + + public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int bufferSize) + throws IOException { + this.bufferSize = bufferSize; + this.fileLink = fileLink; + this.fs = fs; + + this.in = tryOpen(); + } + + @Override + public int read() throws IOException { + int res; + try { + res = in.read(); + } catch (FileNotFoundException e) { + res = tryOpen().read(); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + res = tryOpen().read(); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + res = tryOpen().read(); + } + if (res > 0) pos += 1; + return res; + } + + @Override + public int read(byte b[]) throws IOException { + return read(b, 0, b.length); + } + + @Override + public int read(byte b[], int off, int len) throws IOException { + int n; + try { + n = in.read(b, off, len); + } catch (FileNotFoundException e) { + n = tryOpen().read(b, off, len); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + n = tryOpen().read(b, off, len); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + n = tryOpen().read(b, off, len); + } + if (n > 0) pos += n; + assert(in.getPos() == pos); + return n; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) throws IOException { + int n; + try { + n = in.read(position, buffer, offset, length); + } catch (FileNotFoundException e) { + n = tryOpen().read(position, buffer, offset, length); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + n = tryOpen().read(position, buffer, offset, length); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + n = tryOpen().read(position, buffer, offset, length); + } + return n; + } + + @Override + public void readFully(long position, byte[] buffer) throws IOException { + readFully(position, buffer, 0, buffer.length); + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { + try { + in.readFully(position, buffer, offset, length); + } catch (FileNotFoundException e) { + tryOpen().readFully(position, buffer, offset, length); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + tryOpen().readFully(position, buffer, offset, length); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + tryOpen().readFully(position, buffer, offset, length); + } + } + + @Override + public long skip(long n) throws IOException { + long skipped; + + try { + skipped = in.skip(n); + } catch (FileNotFoundException e) { + skipped = tryOpen().skip(n); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + skipped = tryOpen().skip(n); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + skipped = tryOpen().skip(n); + } + + if (skipped > 0) pos += skipped; + return skipped; + } + + @Override + public int available() throws IOException { + try { + return in.available(); + } catch (FileNotFoundException e) { + return tryOpen().available(); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + return tryOpen().available(); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + return tryOpen().available(); + } + } + + @Override + public void seek(long pos) throws IOException { + try { + in.seek(pos); + } catch (FileNotFoundException e) { + tryOpen().seek(pos); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + tryOpen().seek(pos); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + tryOpen().seek(pos); + } + this.pos = pos; + } + + @Override + public long getPos() throws IOException { + return pos; + } + + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + boolean res; + try { + res = in.seekToNewSource(targetPos); + } catch (FileNotFoundException e) { + res = tryOpen().seekToNewSource(targetPos); + } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt() + res = tryOpen().seekToNewSource(targetPos); + } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt() + res = tryOpen().seekToNewSource(targetPos); + } + if (res) pos = targetPos; + return res; + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public synchronized void mark(int readlimit) { + } + + @Override + public synchronized void reset() throws IOException { + throw new IOException("mark/reset not supported"); + } + + @Override + public boolean markSupported() { + return false; + } + + /** + * Try to open the file from one of the available locations. + * + * @return FSDataInputStream stream of the opened file link + * @throws IOException on unexpected error, or file not found. + */ + private FSDataInputStream tryOpen() throws IOException { + for (Path path: fileLink.getLocations()) { + if (path.equals(currentPath)) continue; + try { + in = fs.open(path, bufferSize); + in.seek(pos); + assert(in.getPos() == pos) : "Link unable to seek to the right position=" + pos; + if (LOG.isTraceEnabled()) { + if (currentPath != null) { + LOG.debug("link open path=" + path); + } else { + LOG.trace("link switch from path=" + currentPath + " to path=" + path); + } + } + currentPath = path; + return(in); + } catch (FileNotFoundException e) { + // Try another file location + } + } + throw new FileNotFoundException("Unable to open link: " + fileLink); + } + } + + private Path[] locations = null; + + protected FileLink() { + this.locations = null; + } + + /** + * @param originPath Original location of the file to link + * @param alternativePaths Alternative locations to look for the linked file + */ + public FileLink(Path originPath, Path... alternativePaths) { + setLocations(originPath, alternativePaths); + } + + /** + * @param locations locations to look for the linked file + */ + public FileLink(final Collection locations) { + this.locations = locations.toArray(new Path[locations.size()]); + } + + /** + * @return the locations to look for the linked file. + */ + public Path[] getLocations() { + return locations; + } + + public String toString() { + StringBuilder str = new StringBuilder(getClass().getName()); + str.append(" locations=["); + int i = 0; + for (Path location: locations) { + if (i++ > 0) str.append(", "); + str.append(location.toString()); + } + str.append("]"); + return str.toString(); + } + + /** + * @return the path of the first available link. + */ + public Path getAvailablePath(FileSystem fs) throws IOException { + for (Path path: locations) { + if (fs.exists(path)) { + return path; + } + } + throw new FileNotFoundException("Unable to open link: " + this); + } + + /** + * Get the FileStatus of the referenced file. + * + * @param fs {@link FileSystem} on which to get the file status + * @return InputStream for the hfile link. + * @throws IOException on unexpected error. + */ + public FileStatus getFileStatus(FileSystem fs) throws IOException { + for (Path path: locations) { + try { + return fs.getFileStatus(path); + } catch (FileNotFoundException e) { + // Try another file location + } + } + throw new FileNotFoundException("Unable to open link: " + this); + } + + /** + * Open the FileLink for read. + *

    + * It uses a wrapper of FSDataInputStream that is agnostic to the location + * of the file, even if the file switches between locations. + * + * @param fs {@link FileSystem} on which to open the FileLink + * @return InputStream for reading the file link. + * @throws IOException on unexpected error. + */ + public FSDataInputStream open(final FileSystem fs) throws IOException { + return new FSDataInputStream(new FileLinkInputStream(fs, this)); + } + + /** + * Open the FileLink for read. + *

    + * It uses a wrapper of FSDataInputStream that is agnostic to the location + * of the file, even if the file switches between locations. + * + * @param fs {@link FileSystem} on which to open the FileLink + * @param bufferSize the size of the buffer to be used. + * @return InputStream for reading the file link. + * @throws IOException on unexpected error. + */ + public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOException { + return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize)); + } + + /** + * NOTE: This method must be used only in the constructor! + * It creates a List with the specified locations for the link. + */ + protected void setLocations(Path originPath, Path... alternativePaths) { + assert this.locations == null : "Link locations already set"; + this.locations = new Path[1 + alternativePaths.length]; + this.locations[0] = originPath; + for (int i = 0; i < alternativePaths.length; i++) { + this.locations[i + 1] = alternativePaths[i]; + } + } + + /** + * Get the directory to store the link back references + * + *

    To simplify the reference count process, during the FileLink creation + * a back-reference is added to the back-reference directory of the specified file. + * + * @param storeDir Root directory for the link reference folder + * @param fileName File Name with links + * @return Path for the link back references. + */ + public static Path getBackReferencesDir(final Path storeDir, final String fileName) { + return new Path(storeDir, BACK_REFERENCES_DIRECTORY_PREFIX + fileName); + } + + /** + * Get the referenced file name from the reference link directory path. + * + * @param dirPath Link references directory path + * @return Name of the file referenced + */ + public static String getBackReferenceFileName(final Path dirPath) { + return dirPath.getName().substring(BACK_REFERENCES_DIRECTORY_PREFIX.length()); + } + + /** + * Checks if the specified directory path is a back reference links folder. + * + * @param dirPath Directory path to verify + * @return True if the specified directory is a link references folder + */ + public static boolean isBackReferencesDir(final Path dirPath) { + if (dirPath == null) return false; + return dirPath.getName().startsWith(BACK_REFERENCES_DIRECTORY_PREFIX); + } +} + Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaEditor.java (working copy) @@ -18,8 +18,10 @@ package org.apache.hadoop.hbase.catalog; import java.io.IOException; +import java.io.InterruptedIOException; import java.net.ConnectException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; @@ -30,6 +32,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; @@ -124,17 +127,50 @@ * @param d Delete to add to .META. * @throws IOException */ - static void deleteMetaTable(final CatalogTracker ct, final Delete d) - throws IOException { + static void deleteFromMetaTable(final CatalogTracker ct, final Delete d) + throws IOException { + List dels = new ArrayList(1); + dels.add(d); + deleteFromMetaTable(ct, dels); + } + + /** + * Delete the passed deletes from the .META. table. + * @param ct CatalogTracker on whose back we will ride the edit. + * @param deletes Deletes to add to .META. This list should support #remove. + * @throws IOException + */ + public static void deleteFromMetaTable(final CatalogTracker ct, final List deletes) + throws IOException { HTable t = MetaReader.getMetaHTable(ct); try { - t.delete(d); + t.delete(deletes); } finally { t.close(); } } /** + * Execute the passed mutations against .META. table. + * @param ct CatalogTracker on whose back we will ride the edit. + * @param mutations Puts and Deletes to execute on .META. + * @throws IOException + */ + static void mutateMetaTable(final CatalogTracker ct, final List mutations) + throws IOException { + HTable t = MetaReader.getMetaHTable(ct); + try { + t.batch(mutations); + } catch (InterruptedException e) { + InterruptedIOException ie = new InterruptedIOException(e.getMessage()); + ie.initCause(e); + throw ie; + } finally { + t.close(); + } + } + + /** * Adds a META row for the specified new region. * @param regionInfo region information * @throws IOException if problem connecting or updating meta @@ -272,11 +308,57 @@ HRegionInfo regionInfo) throws IOException { Delete delete = new Delete(regionInfo.getRegionName()); - deleteMetaTable(catalogTracker, delete); + deleteFromMetaTable(catalogTracker, delete); LOG.info("Deleted region " + regionInfo.getRegionNameAsString() + " from META"); } /** + * Deletes the specified regions from META. + * @param catalogTracker + * @param regionsInfo list of regions to be deleted from META + * @throws IOException + */ + public static void deleteRegions(CatalogTracker catalogTracker, + List regionsInfo) throws IOException { + List deletes = new ArrayList(regionsInfo.size()); + for (HRegionInfo hri: regionsInfo) { + deletes.add(new Delete(hri.getRegionName())); + } + deleteFromMetaTable(catalogTracker, deletes); + LOG.info("Deleted from META, regions: " + regionsInfo); + } + + /** + * Adds and Removes the specified regions from .META. + * @param catalogTracker + * @param regionsToRemove list of regions to be deleted from META + * @param regionsToAdd list of regions to be added to META + * @throws IOException + */ + public static void mutateRegions(CatalogTracker catalogTracker, + final List regionsToRemove, final List regionsToAdd) + throws IOException { + List mutation = new ArrayList(); + if (regionsToRemove != null) { + for (HRegionInfo hri: regionsToRemove) { + mutation.add(new Delete(hri.getRegionName())); + } + } + if (regionsToAdd != null) { + for (HRegionInfo hri: regionsToAdd) { + mutation.add(makePutFromRegionInfo(hri)); + } + } + mutateMetaTable(catalogTracker, mutation); + if (regionsToRemove != null && regionsToRemove.size() > 0) { + LOG.debug("Deleted from META, regions: " + regionsToRemove); + } + if (regionsToAdd != null && regionsToAdd.size() > 0) { + LOG.debug("Add to META, regions: " + regionsToAdd); + } + } + + /** * Deletes daughters references in offlined split parent. * @param catalogTracker * @param parent Parent row we're to remove daughter reference from @@ -289,7 +371,7 @@ Delete delete = new Delete(parent.getRegionName()); delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER); delete.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER); - deleteMetaTable(catalogTracker, delete); + deleteFromMetaTable(catalogTracker, delete); LOG.info("Deleted daughters references, qualifier=" + Bytes.toStringBinary(HConstants.SPLITA_QUALIFIER) + " and qualifier=" + Bytes.toStringBinary(HConstants.SPLITB_QUALIFIER) + ", from parent " + parent.getRegionNameAsString()); Index: src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (revision 1451296) +++ src/main/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.client.coprocessor.ExecResult; import org.apache.hadoop.hbase.security.TokenInfo; import org.apache.hadoop.hbase.security.KerberosInfo; +import org.apache.hadoop.hbase.snapshot.HSnapshotDescription; import org.apache.hadoop.hbase.util.Pair; /** @@ -286,4 +287,22 @@ */ public ExecResult execCoprocessor(Exec call) throws IOException; + + public long snapshot(final HSnapshotDescription snapshot) + throws IOException; + + public List getCompletedSnapshots() + throws IOException; + + public void deleteSnapshot(final HSnapshotDescription snapshot) + throws IOException; + + public boolean isSnapshotDone(final HSnapshotDescription snapshot) + throws IOException; + + public void restoreSnapshot(final HSnapshotDescription request) + throws IOException; + + public boolean isRestoreSnapshotDone(final HSnapshotDescription request) + throws IOException; } Index: src/main/ruby/hbase/admin.rb =================================================================== --- src/main/ruby/hbase/admin.rb (revision 1451296) +++ src/main/ruby/hbase/admin.rb (working copy) @@ -586,5 +586,35 @@ put.add(org.apache.hadoop.hbase.HConstants::CATALOG_FAMILY, org.apache.hadoop.hbase.HConstants::REGIONINFO_QUALIFIER, org.apache.hadoop.hbase.util.Writables.getBytes(hri)) meta.put(put) end + + #---------------------------------------------------------------------------------------------- + # Take a snapshot of specified table + def snapshot(table, snapshot_name) + @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes) + end + + #---------------------------------------------------------------------------------------------- + # Restore specified snapshot + def restore_snapshot(snapshot_name) + @admin.restoreSnapshot(snapshot_name.to_java_bytes) + end + + #---------------------------------------------------------------------------------------------- + # Create a new table by cloning the snapshot content + def clone_snapshot(snapshot_name, table) + @admin.cloneSnapshot(snapshot_name.to_java_bytes, table.to_java_bytes) + end + + #---------------------------------------------------------------------------------------------- + # Delete specified snapshot + def delete_snapshot(snapshot_name) + @admin.deleteSnapshot(snapshot_name.to_java_bytes) + end + + #---------------------------------------------------------------------------------------------- + # Returns a list of snapshots + def list_snapshot + @admin.listSnapshots + end end end Index: src/main/ruby/shell/commands/list_snapshots.rb =================================================================== --- src/main/ruby/shell/commands/list_snapshots.rb (revision 0) +++ src/main/ruby/shell/commands/list_snapshots.rb (revision 0) @@ -0,0 +1,52 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +require 'time' + +module Shell + module Commands + class ListSnapshots < Command + def help + return <<-EOF +List all snapshots taken (by printing the names and relative information). +Optional regular expression parameter could be used to filter the output +by snapshot name. + +Examples: + hbase> list_snapshots + hbase> list_snapshots 'abc.*' +EOF + end + + def command(regex = ".*") + now = Time.now + formatter.header([ "SNAPSHOT", "TABLE + CREATION TIME"]) + + regex = /#{regex}/ unless regex.is_a?(Regexp) + list = admin.list_snapshot.select {|s| regex.match(s.getName)} + list.each do |snapshot| + creation_time = Time.at(snapshot.getCreationTime() / 1000).to_s + formatter.row([ snapshot.getName, snapshot.getTable + " (" + creation_time + ")" ]) + end + + formatter.footer(now, list.size) + return list.map { |s| s.getName() } + end + end + end +end Index: src/main/ruby/shell/commands/clone_snapshot.rb =================================================================== --- src/main/ruby/shell/commands/clone_snapshot.rb (revision 0) +++ src/main/ruby/shell/commands/clone_snapshot.rb (revision 0) @@ -0,0 +1,40 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class CloneSnapshot < Command + def help + return <<-EOF +Create a new table by cloning the snapshot content. +There're no copies of data involved. +And writing on the newly created table will not influence the snapshot data. + +Examples: + hbase> clone_snapshot 'snapshotName', 'tableName' +EOF + end + + def command(snapshot_name, table) + format_simple_command do + admin.clone_snapshot(snapshot_name, table) + end + end + end + end +end Index: src/main/ruby/shell/commands/restore_snapshot.rb =================================================================== --- src/main/ruby/shell/commands/restore_snapshot.rb (revision 0) +++ src/main/ruby/shell/commands/restore_snapshot.rb (revision 0) @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class RestoreSnapshot < Command + def help + return <<-EOF +Restore a specified snapshot. +The restore will replace the content of the original table, +bringing back the content to the snapshot state. +The table must be disabled. + +Examples: + hbase> restore_snapshot 'snapshotName' +EOF + end + + def command(snapshot_name) + format_simple_command do + admin.restore_snapshot(snapshot_name) + end + end + end + end +end Index: src/main/ruby/shell/commands/delete_snapshot.rb =================================================================== --- src/main/ruby/shell/commands/delete_snapshot.rb (revision 0) +++ src/main/ruby/shell/commands/delete_snapshot.rb (revision 0) @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class DeleteSnapshot < Command + def help + return <<-EOF +Delete a specified snapshot. Examples: + + hbase> delete_snapshot 'snapshotName', +EOF + end + + def command(snapshot_name) + format_simple_command do + admin.delete_snapshot(snapshot_name) + end + end + end + end +end Index: src/main/ruby/shell/commands/snapshot.rb =================================================================== --- src/main/ruby/shell/commands/snapshot.rb (revision 0) +++ src/main/ruby/shell/commands/snapshot.rb (revision 0) @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class Snapshot < Command + def help + return <<-EOF +Take a snapshot of specified table. Examples: + + hbase> snapshot 'sourceTable', 'snapshotName' +EOF + end + + def command(table, snapshot_name) + format_simple_command do + admin.snapshot(table, snapshot_name) + end + end + end + end +end Index: src/main/ruby/shell.rb =================================================================== --- src/main/ruby/shell.rb (revision 1451296) +++ src/main/ruby/shell.rb (working copy) @@ -290,6 +290,18 @@ ) Shell.load_command_group( + 'snapshot', + :full_name => 'CLUSTER SNAPSHOT TOOLS', + :commands => %w[ + snapshot + clone_snapshot + restore_snapshot + delete_snapshot + list_snapshots + ] +) + +Shell.load_command_group( 'security', :full_name => 'SECURITY TOOLS', :comment => "NOTE: Above commands are only applicable if running with the AccessController coprocessor", Index: src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon =================================================================== --- src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon (revision 1451296) +++ src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon (working copy) @@ -43,6 +43,7 @@ org.apache.hadoop.hbase.client.HConnectionManager; org.apache.hadoop.hbase.HTableDescriptor; org.apache.hadoop.hbase.HBaseConfiguration; +org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; <%if format.equals("json") %> <& ../common/TaskMonitorTmpl; filter = filter; format = "json" &> @@ -123,6 +124,7 @@ <%if (metaLocation != null) %> <& userTables &> +<& userSnapshots &> <%if (servers != null) %> <& regionServers &> @@ -169,7 +171,6 @@ <%def userTables> <%java> HTableDescriptor[] tables = admin.listTables(); - HConnectionManager.deleteConnection(admin.getConfiguration()); <%if (tables != null && tables.length > 0)%>

    @@ -197,6 +198,32 @@ +<%def userSnapshots> +<%java> + List snapshots = admin.listSnapshots(); + +<%if (snapshots != null && snapshots.size() > 0)%> +
    + + + + + + +<%for SnapshotDescription snapshotDesc : snapshots%> + + + + + + + + +

    <% snapshots.size() %> snapshot(s) in set.

    +
    SnapshotTableCreation TimeType
    <% snapshotDesc.getName() %><% snapshotDesc.getTable() %><% new Date(snapshotDesc.getCreationTime()) %><% snapshotDesc.getType() %>
    + + + <%def regionServers>

    Region Servers

    <%if (servers != null && servers.size() > 0)%> @@ -254,3 +281,7 @@ + +<%java> + HConnectionManager.deleteConnection(admin.getConfiguration(), false); +