Index: src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java (revision 1160314) +++ src/main/java/org/apache/hadoop/hbase/client/RetriesExhaustedWithDetailsException.java (working copy) @@ -22,6 +22,7 @@ import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; import java.util.Collection; import java.util.HashMap; @@ -115,7 +116,12 @@ Map cls = new HashMap(); for (Throwable t : ths) { if (t == null) continue; - String name = t.getClass().getSimpleName(); + String name = ""; + if (t instanceof NoSuchColumnFamilyException) { + name = t.getMessage(); + } else { + name = t.getClass().getSimpleName(); + } Integer i = cls.get(name); if (i == null) { i = 0; Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 1160314) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -1345,13 +1345,14 @@ */ private static class BatchOperationInProgress { T[] operations; - OperationStatusCode[] retCodes; int nextIndexToProcess = 0; + Pair[] retCodeDetails; public BatchOperationInProgress(T[] operations) { this.operations = operations; - retCodes = new OperationStatusCode[operations.length]; - Arrays.fill(retCodes, OperationStatusCode.NOT_RUN); + this.retCodeDetails = new Pair[operations.length]; + Arrays.fill(this.retCodeDetails, new Pair( + OperationStatusCode.NOT_RUN, "")); } public boolean isDone() { @@ -1363,7 +1364,7 @@ * Perform a batch put with no pre-specified locks * @see HRegion#put(Pair[]) */ - public OperationStatusCode[] put(Put[] puts) throws IOException { + public Pair[] put(Put[] puts) throws IOException { @SuppressWarnings("unchecked") Pair putsAndLocks[] = new Pair[puts.length]; @@ -1376,9 +1377,12 @@ /** * Perform a batch of puts. * @param putsAndLocks the list of puts paired with their requested lock IDs. + * @return Returns a Pair object containing the operation status code along with the exception + * message. * @throws IOException */ - public OperationStatusCode[] put(Pair[] putsAndLocks) throws IOException { + public Pair[] put( + Pair[] putsAndLocks) throws IOException { BatchOperationInProgress> batchOp = new BatchOperationInProgress>(putsAndLocks); @@ -1398,10 +1402,11 @@ requestFlush(); } } - return batchOp.retCodes; + return batchOp.retCodeDetails; } - private long doMiniBatchPut(BatchOperationInProgress> batchOp) throws IOException { + private long doMiniBatchPut( + BatchOperationInProgress> batchOp) throws IOException { long now = EnvironmentEdgeManager.currentTimeMillis(); byte[] byteNow = Bytes.toBytes(now); boolean locked = false; @@ -1428,7 +1433,10 @@ checkFamilies(put.getFamilyMap().keySet()); } catch (NoSuchColumnFamilyException nscf) { LOG.warn("No such column family in batch put", nscf); - batchOp.retCodes[lastIndexExclusive] = OperationStatusCode.BAD_FAMILY; + //the exception message is populated to know which was the bad family. + batchOp.retCodeDetails[lastIndexExclusive] = + new Pair( + OperationStatusCode.BAD_FAMILY, nscf.getMessage()); lastIndexExclusive++; continue; } @@ -1472,7 +1480,9 @@ WALEdit walEdit = new WALEdit(); for (int i = firstIndex; i < lastIndexExclusive; i++) { // Skip puts that were determined to be invalid during preprocessing - if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue; + if (batchOp.retCodeDetails[i].getFirst() != OperationStatusCode.NOT_RUN) { + continue; + } Put p = batchOp.operations[i].getFirst(); if (!p.getWriteToWAL()) continue; @@ -1488,11 +1498,14 @@ // ---------------------------------- long addedSize = 0; for (int i = firstIndex; i < lastIndexExclusive; i++) { - if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue; + if (batchOp.retCodeDetails[i].getFirst() != OperationStatusCode.NOT_RUN) { + continue; + } Put p = batchOp.operations[i].getFirst(); addedSize += applyFamilyMapToMemstore(p.getFamilyMap()); - batchOp.retCodes[i] = OperationStatusCode.SUCCESS; + batchOp.retCodeDetails[i] = new Pair( + OperationStatusCode.SUCCESS, ""); } success = true; return addedSize; @@ -1505,8 +1518,9 @@ } if (!success) { for (int i = firstIndex; i < lastIndexExclusive; i++) { - if (batchOp.retCodes[i] == OperationStatusCode.NOT_RUN) { - batchOp.retCodes[i] = OperationStatusCode.FAILURE; + if (batchOp.retCodeDetails[i].getFirst() == OperationStatusCode.NOT_RUN) { + batchOp.retCodeDetails[i] = new Pair( + OperationStatusCode.FAILURE, ""); } } } Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1160314) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -1715,9 +1715,9 @@ } this.requestCount.addAndGet(puts.size()); - OperationStatusCode[] codes = region.put(putsWithLocks); + Pair codes[] = region.put(putsWithLocks); for (i = 0; i < codes.length; i++) { - if (codes[i] != OperationStatusCode.SUCCESS) { + if (codes[i].getFirst() != OperationStatusCode.SUCCESS) { return i; } } @@ -2590,19 +2590,19 @@ this.requestCount.addAndGet(puts.size()); - OperationStatusCode[] codes = + Pair[] codes = region.put(putsWithLocks.toArray(new Pair[]{})); for( int i = 0 ; i < codes.length ; i++) { - OperationStatusCode code = codes[i]; + Pair code = codes[i]; Action theAction = puts.get(i); Object result = null; - if (code == OperationStatusCode.SUCCESS) { + if (code.getFirst() == OperationStatusCode.SUCCESS) { result = new Result(); - } else if (code == OperationStatusCode.BAD_FAMILY) { - result = new NoSuchColumnFamilyException(); + } else if (code.getFirst() == OperationStatusCode.BAD_FAMILY) { + result = new NoSuchColumnFamilyException(code.getSecond()); } // FAILURE && NOT_RUN becomes null, aka: need to run again. Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java (revision 1160314) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java (working copy) @@ -362,10 +362,10 @@ puts[i].add(cf, qual, val); } - OperationStatusCode[] codes = this.region.put(puts); + Pair[] codes = this.region.put(puts); assertEquals(10, codes.length); for (int i = 0; i < 10; i++) { - assertEquals(OperationStatusCode.SUCCESS, codes[i]); + assertEquals(OperationStatusCode.SUCCESS, codes[i].getFirst()); } assertEquals(1, HLog.getSyncOps()); @@ -375,7 +375,7 @@ assertEquals(10, codes.length); for (int i = 0; i < 10; i++) { assertEquals((i == 5) ? OperationStatusCode.BAD_FAMILY : - OperationStatusCode.SUCCESS, codes[i]); + OperationStatusCode.SUCCESS, codes[i].getFirst()); } assertEquals(1, HLog.getSyncOps()); @@ -384,8 +384,8 @@ MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(HBaseConfiguration.create()); - final AtomicReference retFromThread = - new AtomicReference(); + final AtomicReference[]> retFromThread = + new AtomicReference[]>(); TestThread putter = new TestThread(ctx) { @Override public void doWork() throws IOException { @@ -413,7 +413,7 @@ codes = retFromThread.get(); for (int i = 0; i < 10; i++) { assertEquals((i == 5) ? OperationStatusCode.BAD_FAMILY : - OperationStatusCode.SUCCESS, codes[i]); + OperationStatusCode.SUCCESS, codes[i].getFirst()); } LOG.info("Nexta, a batch put which uses an already-held lock"); @@ -430,7 +430,7 @@ LOG.info("...performed put"); for (int i = 0; i < 10; i++) { assertEquals((i == 5) ? OperationStatusCode.BAD_FAMILY : - OperationStatusCode.SUCCESS, codes[i]); + OperationStatusCode.SUCCESS, codes[i].getFirst()); } // Make sure we didn't do an extra batch assertEquals(1, HLog.getSyncOps());