diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java index d760aa2..cbb7ff3 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java @@ -178,11 +178,19 @@ public class CellComparator implements Comparator, Serializable { return compareWithoutRow(left, right); } + /** + * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash) + * so can't be treated as plain byte arrays as this method does. + */ public static int compareRows(final Cell left, final Cell right) { return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(), right.getRowArray(), right.getRowOffset(), right.getRowLength()); } + /** + * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash) + * so can't be treated as plain byte arrays as this method does. + */ public static int compareRows(byte[] left, int loffset, int llength, byte[] right, int roffset, int rlength) { return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); @@ -375,14 +383,16 @@ public class CellComparator implements Comparator, Serializable { /** * Try to return a Cell that falls between left and right but that is - * shorter; i.e. takes up less space. This is trick is used building HFile block index. + * shorter; i.e. takes up less space. This trick is used building HFile block index. * Its an optimization. It does not always work. In this case we'll just return the * right cell. + * @param comparator Comparator to use. * @param left * @param right * @return A cell that sorts between left and right. */ - public static Cell getMidpoint(final Cell left, final Cell right) { + public static Cell getMidpoint(final KeyValue.KVComparator comparator, final Cell left, + final Cell right) { // TODO: Redo so only a single pass over the arrays rather than one to compare and then a // second composing midpoint. if (right == null) { @@ -391,6 +401,12 @@ public class CellComparator implements Comparator, Serializable { if (left == null) { return right; } + // If Cells from meta table, don't mess around. meta table Cells have schema + // (table,startrow,hash) so can't be treated as plain byte arrays. Just skip out without + // trying to do this optimization. + if (comparator != null && comparator instanceof KeyValue.MetaComparator) { + return right; + } int diff = compareRows(left, right); if (diff > 0) { throw new IllegalArgumentException("Left row sorts after right row; left=" + diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellComparator.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellComparator.java index d6a2f72..007f826 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellComparator.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellComparator.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.testclassification.MiscTests; import org.apache.hadoop.hbase.testclassification.SmallTests; @@ -79,56 +80,65 @@ public class TestCellComparator { @Test public void testGetShortMidpoint() { + KeyValue.KVComparator comparator = new KeyValue.KVComparator(); + Cell left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); Cell right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); - Cell mid = CellComparator.getMidpoint(left, right); + Cell mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) <= 0); assertTrue(CellComparator.compare(mid, right, true) <= 0); left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); right = CellUtil.createCell(Bytes.toBytes("b"), Bytes.toBytes("a"), Bytes.toBytes("a")); - mid = CellComparator.getMidpoint(left, right); + mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) < 0); assertTrue(CellComparator.compare(mid, right, true) <= 0); left = CellUtil.createCell(Bytes.toBytes("g"), Bytes.toBytes("a"), Bytes.toBytes("a")); right = CellUtil.createCell(Bytes.toBytes("i"), Bytes.toBytes("a"), Bytes.toBytes("a")); - mid = CellComparator.getMidpoint(left, right); + mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) < 0); assertTrue(CellComparator.compare(mid, right, true) <= 0); left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); right = CellUtil.createCell(Bytes.toBytes("bbbbbbb"), Bytes.toBytes("a"), Bytes.toBytes("a")); - mid = CellComparator.getMidpoint(left, right); + mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) < 0); assertTrue(CellComparator.compare(mid, right, true) < 0); assertEquals(1, (int)mid.getRowLength()); left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("a")); - mid = CellComparator.getMidpoint(left, right); + mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) < 0); assertTrue(CellComparator.compare(mid, right, true) <= 0); left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("aaaaaaaa"), Bytes.toBytes("b")); - mid = CellComparator.getMidpoint(left, right); + mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) < 0); assertTrue(CellComparator.compare(mid, right, true) < 0); assertEquals(2, (int)mid.getFamilyLength()); left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("aaaaaaaaa")); - mid = CellComparator.getMidpoint(left, right); + mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) < 0); assertTrue(CellComparator.compare(mid, right, true) < 0); assertEquals(2, (int)mid.getQualifierLength()); left = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("a")); right = CellUtil.createCell(Bytes.toBytes("a"), Bytes.toBytes("a"), Bytes.toBytes("b")); - mid = CellComparator.getMidpoint(left, right); + mid = CellComparator.getMidpoint(comparator, left, right); assertTrue(CellComparator.compare(left, mid, true) < 0); assertTrue(CellComparator.compare(mid, right, true) <= 0); assertEquals(1, (int)mid.getQualifierLength()); + + // Assert that if meta comparator, it returns the right cell -- i.e. no optimization done. + left = CellUtil.createCell(Bytes.toBytes("g"), Bytes.toBytes("a"), Bytes.toBytes("a")); + right = CellUtil.createCell(Bytes.toBytes("i"), Bytes.toBytes("a"), Bytes.toBytes("a")); + mid = CellComparator.getMidpoint(new KeyValue.MetaComparator(), left, right); + assertTrue(CellComparator.compare(left, mid, true) < 0); + assertTrue(CellComparator.compare(mid, right, true) == 0); } } \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java index 1df8bc2..28c4655 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java @@ -162,9 +162,8 @@ public class HFileWriterV2 extends AbstractHFileWriter { fsBlockWriter.writeHeaderAndData(outputStream); int onDiskSize = fsBlockWriter.getOnDiskSizeWithHeader(); - // Rather than CellComparator, we should be making use of an Interface here with the - // implementation class serialized out to the HFile metadata. TODO. - Cell indexEntry = CellComparator.getMidpoint(lastCellOfPreviousBlock, firstCellInBlock); + Cell indexEntry = + CellComparator.getMidpoint(this.comparator, lastCellOfPreviousBlock, firstCellInBlock); dataBlockIndexWriter.addEntry(CellUtil.getCellKeySerializedAsKeyValueKey(indexEntry), lastDataBlockOffset, onDiskSize); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); @@ -264,8 +263,9 @@ public class HFileWriterV2 extends AbstractHFileWriter { checkBlockBoundary(); } - if (!fsBlockWriter.isWriting()) + if (!fsBlockWriter.isWriting()) { newBlock(); + } fsBlockWriter.write(cell);