From 079995a1596304169e13be998a3be504b9961e0e Mon Sep 17 00:00:00 2001 From: Rajesh Nishtala Date: Mon, 20 Apr 2015 17:34:25 -0700 Subject: [PATCH] HBASE-13471 Fix a possible infinite loop in doMiniBatchMutation Summary: in doMiniBatchMutation it is possible to get into an infinite loop when a query has a row that is not in the region. If the batch had an invalid row, the row lock further down the function would fail to acquire because it was an invalid row. However we'd catch the exception and improperly treat it as if we had not acquired the lock and then try acquiring the lock again. Thus once we got into this state we'd be stuck in an infinite loop. Worse yet, this infiite loop would occur with the readLock held. So any other opertaions such as doClose() would be locked out and stuck. The patch is to check whether the row is valid and short circuit the failure when it doesn't work. Test Plan: IntegrationTestReplication would consistently fail when trying to disable large tables before the fix. After the test the tests pass consistently. Reviewers: eclark Subscribers: asameet Differential Revision: https://reviews.facebook.net/D37437 --- .../main/java/org/apache/hadoop/hbase/regionserver/HRegion.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 7ba0988..2133206 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -2878,6 +2878,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } else { prepareDelete((Delete) mutation); } + checkRow(mutation.getRow(), "doMiniBatchMutation"); } catch (NoSuchColumnFamilyException nscf) { LOG.warn("No such column family in batch mutation", nscf); batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus( @@ -2890,6 +2891,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi OperationStatusCode.SANITY_CHECK_FAILURE, fsce.getMessage()); lastIndexExclusive++; continue; + } catch (WrongRegionException we) { + LOG.warn("Batch mutation had a row that does not belong to this region", we); + batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus( + OperationStatusCode.SANITY_CHECK_FAILURE, we.getMessage()); + lastIndexExclusive++; + continue; } // If we haven't got any rows in our batch, we should block to @@ -4938,7 +4945,6 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * started (the calling thread has already acquired the region-close-guard lock). */ protected RowLock getRowLockInternal(byte[] row, boolean waitForLock) throws IOException { - checkRow(row, "row lock"); HashedBytes rowKey = new HashedBytes(row); RowLockContext rowLockContext = new RowLockContext(rowKey); -- 2.2.2