0 || !reuseFirst) {
+ final int offset = reuseFirst ? 1 : 0;
+ // Recycle all but the first buffer
+ allocator.recycleByteBlocks(buffers, offset, 1+bufferUpto);
+ Arrays.fill(buffers, offset, 1+bufferUpto, null);
+ }
+ if (reuseFirst) {
+ // Re-use the first buffer
+ bufferUpto = 0;
+ byteUpto = 0;
+ byteOffset = 0;
+ buffer = buffers[0];
+ } else {
+ bufferUpto = -1;
+ byteUpto = BYTE_BLOCK_SIZE;
+ byteOffset = -BYTE_BLOCK_SIZE;
+ buffer = null;
+ }
}
}
-
+ /**
+ * Advances the pool to its next buffer. This method should be called once
+ * after the constructor to initialize the pool. In contrast to the
+ * constructor a {@link ByteBlockPool#reset()} call will advance the pool to
+ * its first buffer immediately.
+ */
public void nextBuffer() {
if (1+bufferUpto == buffers.length) {
byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
@@ -181,7 +209,11 @@ public final class ByteBlockPool {
byteUpto = 0;
byteOffset += BYTE_BLOCK_SIZE;
}
-
+
+ /**
+ * Allocates a new slice with the given size.
+ * @see ByteBlockPool#FIRST_LEVEL_SIZE
+ */
public int newSlice(final int size) {
if (byteUpto > BYTE_BLOCK_SIZE-size)
nextBuffer();
@@ -197,15 +229,32 @@ public final class ByteBlockPool {
// array is the length of each slice, ie first slice is 5
// bytes, next slice is 14 bytes, etc.
- public final static int[] nextLevelArray = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
- public final static int[] levelSizeArray = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
- public final static int FIRST_LEVEL_SIZE = levelSizeArray[0];
+ /**
+ * An array holding the offset into the {@link ByteBlockPool#LEVEL_SIZE_ARRAY}
+ * to quickly navigate to the next slice level.
+ */
+ public final static int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
+
+ /**
+ * An array holding the level sizes for byte slices.
+ */
+ public final static int[] LEVEL_SIZE_ARRAY = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
+
+ /**
+ * The first level size for new slices
+ * @see ByteBlockPool#newSlice(int)
+ */
+ public final static int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
+ /**
+ * Creates a new byte slice with the given starting size and
+ * returns the slices offset in the pool.
+ */
public int allocSlice(final byte[] slice, final int upto) {
final int level = slice[upto] & 15;
- final int newLevel = nextLevelArray[level];
- final int newSize = levelSizeArray[newLevel];
+ final int newLevel = NEXT_LEVEL_ARRAY[level];
+ final int newSize = LEVEL_SIZE_ARRAY[newLevel];
// Maybe allocate another block
if (byteUpto > BYTE_BLOCK_SIZE-newSize)
@@ -288,13 +337,14 @@ public final class ByteBlockPool {
}
/**
- *
+ * Copies bytes from the pool starting at the given offset with the given
+ * length into the given {@link BytesRef} at offset 0 and returns it.
+ * Note: this method allows to copy across block boundaries.
*/
- public final BytesRef copyFrom(final BytesRef bytes) {
- final int length = bytes.length;
- final int offset = bytes.offset;
+ public final BytesRef copyFrom(final BytesRef bytes, final int offset, final int length) {
bytes.offset = 0;
bytes.grow(length);
+ bytes.length = length;
int bufferIndex = offset >> BYTE_BLOCK_SHIFT;
byte[] buffer = buffers[bufferIndex];
int pos = offset & BYTE_BLOCK_MASK;
diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
index 6805fda..e306484 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
@@ -228,7 +228,7 @@ public final class BytesRefHash {
lastCount = count;
count = 0;
if (resetPool) {
- pool.dropBuffersAndReset();
+ pool.reset(false, false); // we don't need to 0-fill the buffers
}
bytesStart = bytesStartArray.clear();
if (lastCount != -1 && shrink(lastCount)) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java b/lucene/core/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java
index 6fd2b79..b31ce2d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java
@@ -1,7 +1,5 @@
package org.apache.lucene.util;
-import java.util.concurrent.atomic.AtomicLong;
-
import org.apache.lucene.util.ByteBlockPool.Allocator;
/*
@@ -22,17 +20,19 @@ import org.apache.lucene.util.ByteBlockPool.Allocator;
*/
/**
- * A threadsafe {@link ByteBlockPool.Allocator} implementation that recycles unused byte
+ * A {@link ByteBlockPool.Allocator} implementation that recycles unused byte
* blocks in a buffer and reuses them in subsequent calls to
* {@link #getByteBlock()}.
- *
+ *
+ * Note: This class is not thread-safe
+ *
* @lucene.internal
*/
public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
private byte[][] freeByteBlocks;
private final int maxBufferedBlocks;
private int freeBlocks = 0;
- private final AtomicLong bytesUsed;
+ private final Counter bytesUsed;
public static final int DEFAULT_BUFFERED_BLOCKS = 64;
/**
@@ -43,12 +43,12 @@ public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
* @param maxBufferedBlocks
* maximum number of buffered byte block
* @param bytesUsed
- * {@link AtomicLong} reference counting internally allocated bytes
+ * {@link Counter} reference counting internally allocated bytes
*/
public RecyclingByteBlockAllocator(int blockSize, int maxBufferedBlocks,
- AtomicLong bytesUsed) {
+ Counter bytesUsed) {
super(blockSize);
- freeByteBlocks = new byte[Math.min(10, maxBufferedBlocks)][];
+ freeByteBlocks = new byte[maxBufferedBlocks][];
this.maxBufferedBlocks = maxBufferedBlocks;
this.bytesUsed = bytesUsed;
}
@@ -62,7 +62,7 @@ public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
* maximum number of buffered byte block
*/
public RecyclingByteBlockAllocator(int blockSize, int maxBufferedBlocks) {
- this(blockSize, maxBufferedBlocks, new AtomicLong());
+ this(blockSize, maxBufferedBlocks, Counter.newCounter(false));
}
/**
@@ -72,11 +72,11 @@ public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
*
*/
public RecyclingByteBlockAllocator() {
- this(ByteBlockPool.BYTE_BLOCK_SIZE, 64, new AtomicLong());
+ this(ByteBlockPool.BYTE_BLOCK_SIZE, 64, Counter.newCounter(false));
}
@Override
- public synchronized byte[] getByteBlock() {
+ public byte[] getByteBlock() {
if (freeBlocks == 0) {
bytesUsed.addAndGet(blockSize);
return new byte[blockSize];
@@ -87,7 +87,7 @@ public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
}
@Override
- public synchronized void recycleByteBlocks(byte[][] blocks, int start, int end) {
+ public void recycleByteBlocks(byte[][] blocks, int start, int end) {
final int numBlocks = Math.min(maxBufferedBlocks - freeBlocks, end - start);
final int size = freeBlocks + numBlocks;
if (size >= freeByteBlocks.length) {
@@ -111,14 +111,14 @@ public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
/**
* @return the number of currently buffered blocks
*/
- public synchronized int numBufferedBlocks() {
+ public int numBufferedBlocks() {
return freeBlocks;
}
/**
* @return the number of bytes currently allocated by this {@link Allocator}
*/
- public synchronized long bytesUsed() {
+ public long bytesUsed() {
return bytesUsed.get();
}
@@ -136,8 +136,8 @@ public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
* the number of byte blocks to remove
* @return the number of actually removed buffers
*/
- public synchronized int freeBlocks(int num) {
- assert num >= 0;
+ public int freeBlocks(int num) {
+ assert num >= 0 : "free blocks must be >= 0 but was: "+ num;
final int stop;
final int count;
if (num > freeBlocks) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/RecyclingIntBlockAllocator.java b/lucene/core/src/java/org/apache/lucene/util/RecyclingIntBlockAllocator.java
new file mode 100644
index 0000000..652f9ba
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/RecyclingIntBlockAllocator.java
@@ -0,0 +1,158 @@
+package org.apache.lucene.util;
+
+import org.apache.lucene.index.IntBlockPool;
+import org.apache.lucene.index.IntBlockPool.Allocator;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A {@link Allocator} implementation that recycles unused int
+ * blocks in a buffer and reuses them in subsequent calls to
+ * {@link #getIntBlock()}.
+ *
+ * Note: This class is not thread-safe
+ *
+ * @lucene.internal
+ */
+public final class RecyclingIntBlockAllocator extends Allocator {
+ private int[][] freeByteBlocks;
+ private final int maxBufferedBlocks;
+ private int freeBlocks = 0;
+ private final Counter bytesUsed;
+ public static final int DEFAULT_BUFFERED_BLOCKS = 64;
+
+ /**
+ * Creates a new {@link RecyclingIntBlockAllocator}
+ *
+ * @param blockSize
+ * the block size in bytes
+ * @param maxBufferedBlocks
+ * maximum number of buffered int block
+ * @param bytesUsed
+ * {@link Counter} reference counting internally allocated bytes
+ */
+ public RecyclingIntBlockAllocator(int blockSize, int maxBufferedBlocks,
+ Counter bytesUsed) {
+ super(blockSize);
+ freeByteBlocks = new int[maxBufferedBlocks][];
+ this.maxBufferedBlocks = maxBufferedBlocks;
+ this.bytesUsed = bytesUsed;
+ }
+
+ /**
+ * Creates a new {@link RecyclingIntBlockAllocator}.
+ *
+ * @param blockSize
+ * the size of each block returned by this allocator
+ * @param maxBufferedBlocks
+ * maximum number of buffered int blocks
+ */
+ public RecyclingIntBlockAllocator(int blockSize, int maxBufferedBlocks) {
+ this(blockSize, maxBufferedBlocks, Counter.newCounter(false));
+ }
+
+ /**
+ * Creates a new {@link RecyclingIntBlockAllocator} with a block size of
+ * {@link IntBlockPool#INT_BLOCK_SIZE}, upper buffered docs limit of
+ * {@link #DEFAULT_BUFFERED_BLOCKS} ({@value #DEFAULT_BUFFERED_BLOCKS}).
+ *
+ */
+ public RecyclingIntBlockAllocator() {
+ this(IntBlockPool.INT_BLOCK_SIZE, 64, Counter.newCounter(false));
+ }
+
+ @Override
+ public int[] getIntBlock() {
+ if (freeBlocks == 0) {
+ bytesUsed.addAndGet(blockSize*RamUsageEstimator.NUM_BYTES_INT);
+ return new int[blockSize];
+ }
+ final int[] b = freeByteBlocks[--freeBlocks];
+ freeByteBlocks[freeBlocks] = null;
+ return b;
+ }
+
+ @Override
+ public void recycleIntBlocks(int[][] blocks, int start, int end) {
+ final int numBlocks = Math.min(maxBufferedBlocks - freeBlocks, end - start);
+ final int size = freeBlocks + numBlocks;
+ if (size >= freeByteBlocks.length) {
+ final int[][] newBlocks = new int[ArrayUtil.oversize(size,
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF)][];
+ System.arraycopy(freeByteBlocks, 0, newBlocks, 0, freeBlocks);
+ freeByteBlocks = newBlocks;
+ }
+ final int stop = start + numBlocks;
+ for (int i = start; i < stop; i++) {
+ freeByteBlocks[freeBlocks++] = blocks[i];
+ blocks[i] = null;
+ }
+ for (int i = stop; i < end; i++) {
+ blocks[i] = null;
+ }
+ bytesUsed.addAndGet(-(end - stop) * (blockSize * RamUsageEstimator.NUM_BYTES_INT));
+ assert bytesUsed.get() >= 0;
+ }
+
+ /**
+ * @return the number of currently buffered blocks
+ */
+ public int numBufferedBlocks() {
+ return freeBlocks;
+ }
+
+ /**
+ * @return the number of bytes currently allocated by this {@link Allocator}
+ */
+ public long bytesUsed() {
+ return bytesUsed.get();
+ }
+
+ /**
+ * @return the maximum number of buffered byte blocks
+ */
+ public int maxBufferedBlocks() {
+ return maxBufferedBlocks;
+ }
+
+ /**
+ * Removes the given number of int blocks from the buffer if possible.
+ *
+ * @param num
+ * the number of int blocks to remove
+ * @return the number of actually removed buffers
+ */
+ public int freeBlocks(int num) {
+ assert num >= 0 : "free blocks must be >= 0 but was: "+ num;
+ final int stop;
+ final int count;
+ if (num > freeBlocks) {
+ stop = 0;
+ count = freeBlocks;
+ } else {
+ stop = freeBlocks - num;
+ count = num;
+ }
+ while (freeBlocks > stop) {
+ freeByteBlocks[--freeBlocks] = null;
+ }
+ bytesUsed.addAndGet(-count*blockSize* RamUsageEstimator.NUM_BYTES_INT);
+ assert bytesUsed.get() >= 0;
+ return count;
+ }
+}
\ No newline at end of file
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java
new file mode 100644
index 0000000..5a42335
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java
@@ -0,0 +1,156 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * tests basic {@link IntBlockPool} functionality
+ */
+public class TestIntBlockPool extends LuceneTestCase {
+
+ public void testSingleWriterReader() {
+ Counter bytesUsed = Counter.newCounter();
+ IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
+
+ for (int j = 0; j < 2; j++) {
+ IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
+ int start = writer.startNewSlice();
+ int num = atLeast(100);
+ for (int i = 0; i < num; i++) {
+ writer.writeInt(i);
+ }
+
+ int upto = writer.getCurrentOffset();
+ IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
+ reader.reset(start, upto);
+ for (int i = 0; i < num; i++) {
+ assertEquals(i, reader.readInt());
+ }
+ assertTrue(reader.endOfSlice());
+ if (random().nextBoolean()) {
+ pool.reset(true, false);
+ assertEquals(0, bytesUsed.get());
+ } else {
+ pool.reset(true, true);
+ assertEquals(IntBlockPool.INT_BLOCK_SIZE
+ * RamUsageEstimator.NUM_BYTES_INT, bytesUsed.get());
+ }
+ }
+ }
+
+ public void testMultipleWriterReader() {
+ Counter bytesUsed = Counter.newCounter();
+ IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
+ for (int j = 0; j < 2; j++) {
+ List holders = new ArrayList();
+ int num = atLeast(4);
+ for (int i = 0; i < num; i++) {
+ holders.add(new StartEndAndValues(random().nextInt(1000)));
+ }
+ IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
+ IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
+
+ int numValuesToWrite = atLeast(10000);
+ for (int i = 0; i < numValuesToWrite; i++) {
+ StartEndAndValues values = holders
+ .get(random().nextInt(holders.size()));
+ if (values.valueCount == 0) {
+ values.start = writer.startNewSlice();
+ } else {
+ writer.reset(values.end);
+ }
+ writer.writeInt(values.nextValue());
+ values.end = writer.getCurrentOffset();
+ if (random().nextInt(5) == 0) {
+ // pick one and reader the ints
+ assertReader(reader, holders.get(random().nextInt(holders.size())));
+ }
+ }
+
+ while (!holders.isEmpty()) {
+ StartEndAndValues values = holders.remove(random().nextInt(
+ holders.size()));
+ assertReader(reader, values);
+ }
+ if (random().nextBoolean()) {
+ pool.reset(true, false);
+ assertEquals(0, bytesUsed.get());
+ } else {
+ pool.reset(true, true);
+ assertEquals(IntBlockPool.INT_BLOCK_SIZE
+ * RamUsageEstimator.NUM_BYTES_INT, bytesUsed.get());
+ }
+ }
+ }
+
+ private static class ByteTrackingAllocator extends IntBlockPool.Allocator {
+ private final Counter bytesUsed;
+
+ public ByteTrackingAllocator(Counter bytesUsed) {
+ this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed);
+ }
+
+ public ByteTrackingAllocator(int blockSize, Counter bytesUsed) {
+ super(blockSize);
+ this.bytesUsed = bytesUsed;
+ }
+
+ public int[] getIntBlock() {
+ bytesUsed.addAndGet(blockSize * RamUsageEstimator.NUM_BYTES_INT);
+ return new int[blockSize];
+ }
+
+ @Override
+ public void recycleIntBlocks(int[][] blocks, int start, int end) {
+ bytesUsed
+ .addAndGet(-((end - start) * blockSize * RamUsageEstimator.NUM_BYTES_INT));
+ }
+
+ }
+
+ private void assertReader(IntBlockPool.SliceReader reader,
+ StartEndAndValues values) {
+ reader.reset(values.start, values.end);
+ for (int i = 0; i < values.valueCount; i++) {
+ assertEquals(values.valueOffset + i, reader.readInt());
+ }
+ assertTrue(reader.endOfSlice());
+ }
+
+ private static class StartEndAndValues {
+ int valueOffset;
+ int valueCount;
+ int start;
+ int end;
+
+ public StartEndAndValues(int valueOffset) {
+ this.valueOffset = valueOffset;
+ }
+
+ public int nextValue() {
+ return valueOffset + valueCount++;
+ }
+
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
index 2888b29..dd8486b 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
@@ -28,41 +28,53 @@ import org.apache.lucene.store.RAMDirectory;
public class TestByteBlockPool extends LuceneTestCase {
public void testCopyRefAndWrite() throws IOException {
- List list = new ArrayList();
- int maxLength = atLeast(500);
- ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectAllocator());
+ Counter bytesUsed = Counter.newCounter();
+ ByteBlockPool pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
pool.nextBuffer();
- final int numValues = atLeast(100);
- BytesRef ref = new BytesRef();
- for (int i = 0; i < numValues; i++) {
- final String value = _TestUtil.randomRealisticUnicodeString(random(),
- maxLength);
- list.add(value);
- ref.copyChars(value);
- pool.copy(ref);
+ boolean reuseFirst = random().nextBoolean();
+ for (int j = 0; j < 2; j++) {
+
+ List list = new ArrayList();
+ int maxLength = atLeast(500);
+ final int numValues = atLeast(100);
+ BytesRef ref = new BytesRef();
+ for (int i = 0; i < numValues; i++) {
+ final String value = _TestUtil.randomRealisticUnicodeString(random(),
+ maxLength);
+ list.add(value);
+ ref.copyChars(value);
+ pool.copy(ref);
+ }
+ RAMDirectory dir = new RAMDirectory();
+ IndexOutput stream = dir.createOutput("foo.txt", newIOContext(random()));
+ pool.writePool(stream);
+ stream.flush();
+ stream.close();
+ IndexInput input = dir.openInput("foo.txt", newIOContext(random()));
+ assertEquals(pool.byteOffset + pool.byteUpto, stream.length());
+ BytesRef expected = new BytesRef();
+ BytesRef actual = new BytesRef();
+ for (String string : list) {
+ expected.copyChars(string);
+ actual.grow(expected.length);
+ actual.length = expected.length;
+ input.readBytes(actual.bytes, 0, actual.length);
+ assertEquals(expected, actual);
+ }
+ try {
+ input.readByte();
+ fail("must be EOF");
+ } catch (EOFException e) {
+ // expected - read past EOF
+ }
+ pool.reset(random().nextBoolean(), reuseFirst);
+ if (reuseFirst) {
+ assertEquals(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed.get());
+ } else {
+ assertEquals(0, bytesUsed.get());
+ pool.nextBuffer(); // prepare for next iter
+ }
+ dir.close();
}
- RAMDirectory dir = new RAMDirectory();
- IndexOutput stream = dir.createOutput("foo.txt", newIOContext(random()));
- pool.writePool(stream);
- stream.flush();
- stream.close();
- IndexInput input = dir.openInput("foo.txt", newIOContext(random()));
- assertEquals(pool.byteOffset + pool.byteUpto, stream.length());
- BytesRef expected = new BytesRef();
- BytesRef actual = new BytesRef();
- for (String string : list) {
- expected.copyChars(string);
- actual.grow(expected.length);
- actual.length = expected.length;
- input.readBytes(actual.bytes, 0, actual.length);
- assertEquals(expected, actual);
- }
- try {
- input.readByte();
- fail("must be EOF");
- } catch (EOFException e) {
- // expected - read past EOF
- }
- dir.close();
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java b/lucene/core/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java
index 504cba6..8d81f65 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java
@@ -39,7 +39,7 @@ public class TestRecyclingByteBlockAllocator extends LuceneTestCase {
private RecyclingByteBlockAllocator newAllocator() {
return new RecyclingByteBlockAllocator(1 << (2 + random().nextInt(15)),
- random().nextInt(97), new AtomicLong());
+ random().nextInt(97), Counter.newCounter());
}
@Test
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRecyclingIntBlockAllocator.java b/lucene/core/src/test/org/apache/lucene/util/TestRecyclingIntBlockAllocator.java
new file mode 100644
index 0000000..46ad8cd
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/TestRecyclingIntBlockAllocator.java
@@ -0,0 +1,143 @@
+package org.apache.lucene.util;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+import org.junit.Before;
+import org.junit.Test;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Testcase for {@link RecyclingIntBlockAllocator}
+ */
+public class TestRecyclingIntBlockAllocator extends LuceneTestCase {
+
+ /**
+ */
+ @Override
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ }
+
+ private RecyclingIntBlockAllocator newAllocator() {
+ return new RecyclingIntBlockAllocator(1 << (2 + random().nextInt(15)),
+ random().nextInt(97), Counter.newCounter());
+ }
+
+ @Test
+ public void testAllocate() {
+ RecyclingIntBlockAllocator allocator = newAllocator();
+ HashSet set = new HashSet();
+ int[] block = allocator.getIntBlock();
+ set.add(block);
+ assertNotNull(block);
+ final int size = block.length;
+
+ int num = atLeast(97);
+ for (int i = 0; i < num; i++) {
+ block = allocator.getIntBlock();
+ assertNotNull(block);
+ assertEquals(size, block.length);
+ assertTrue("block is returned twice", set.add(block));
+ assertEquals(4 * size * (i + 2), allocator.bytesUsed()); // zero based + 1
+ assertEquals(0, allocator.numBufferedBlocks());
+ }
+ }
+
+ @Test
+ public void testAllocateAndRecycle() {
+ RecyclingIntBlockAllocator allocator = newAllocator();
+ HashSet allocated = new HashSet();
+
+ int[] block = allocator.getIntBlock();
+ allocated.add(block);
+ assertNotNull(block);
+ final int size = block.length;
+
+ int numIters = atLeast(97);
+ for (int i = 0; i < numIters; i++) {
+ int num = 1 + random().nextInt(39);
+ for (int j = 0; j < num; j++) {
+ block = allocator.getIntBlock();
+ assertNotNull(block);
+ assertEquals(size, block.length);
+ assertTrue("block is returned twice", allocated.add(block));
+ assertEquals(4 * size * (allocated.size() + allocator.numBufferedBlocks()), allocator
+ .bytesUsed());
+ }
+ int[][] array = allocated.toArray(new int[0][]);
+ int begin = random().nextInt(array.length);
+ int end = begin + random().nextInt(array.length - begin);
+ List selected = new ArrayList();
+ for (int j = begin; j < end; j++) {
+ selected.add(array[j]);
+ }
+ allocator.recycleIntBlocks(array, begin, end);
+ for (int j = begin; j < end; j++) {
+ assertNull(array[j]);
+ int[] b = selected.remove(0);
+ assertTrue(allocated.remove(b));
+ }
+ }
+ }
+
+ @Test
+ public void testAllocateAndFree() {
+ RecyclingIntBlockAllocator allocator = newAllocator();
+ HashSet allocated = new HashSet();
+ int freeButAllocated = 0;
+ int[] block = allocator.getIntBlock();
+ allocated.add(block);
+ assertNotNull(block);
+ final int size = block.length;
+
+ int numIters = atLeast(97);
+ for (int i = 0; i < numIters; i++) {
+ int num = 1 + random().nextInt(39);
+ for (int j = 0; j < num; j++) {
+ block = allocator.getIntBlock();
+ freeButAllocated = Math.max(0, freeButAllocated - 1);
+ assertNotNull(block);
+ assertEquals(size, block.length);
+ assertTrue("block is returned twice", allocated.add(block));
+ assertEquals("" + (4 * size * (allocated.size() + allocator.numBufferedBlocks()) - allocator.bytesUsed()), 4 * size * (allocated.size() + allocator.numBufferedBlocks()),
+ allocator.bytesUsed());
+ }
+
+ int[][] array = allocated.toArray(new int[0][]);
+ int begin = random().nextInt(array.length);
+ int end = begin + random().nextInt(array.length - begin);
+ for (int j = begin; j < end; j++) {
+ int[] b = array[j];
+ assertTrue(allocated.remove(b));
+ }
+ allocator.recycleIntBlocks(array, begin, end);
+ for (int j = begin; j < end; j++) {
+ assertNull(array[j]);
+ }
+ // randomly free blocks
+ int numFreeBlocks = allocator.numBufferedBlocks();
+ int freeBlocks = allocator.freeBlocks(random().nextInt(7 + allocator
+ .maxBufferedBlocks()));
+ assertEquals(allocator.numBufferedBlocks(), numFreeBlocks - freeBlocks);
+ }
+ }
+}
\ No newline at end of file
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
index 4412738..6282fa7 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@@ -44,6 +44,8 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.RecyclingByteBlockAllocator;
/**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
@@ -53,7 +55,7 @@ public class WeightedSpanTermExtractor {
private String fieldName;
private TokenStream tokenStream;
- private Map readers = new HashMap(10);
+ private Map readers = new HashMap(10);
private String defaultField;
private boolean expandMultiTermQuery;
private boolean cachedTokenStream;
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index de10a04..9a2a170 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -36,6 +36,9 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IntBlockPool;
+import org.apache.lucene.index.IntBlockPool.SliceReader;
+import org.apache.lucene.index.IntBlockPool.SliceWriter;
import org.apache.lucene.index.Norm;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -58,9 +61,16 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.RAMDirectory; // for javadocs
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.Constants; // for javadocs
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.RecyclingByteBlockAllocator;
+import org.apache.lucene.util.RecyclingIntBlockAllocator;
+
/**
* High-performance single-document main memory Apache Lucene fulltext search index.
@@ -191,15 +201,18 @@ public class MemoryIndex {
/** fields sorted ascending by fieldName; lazily computed on demand */
private transient Map.Entry[] sortedFields;
- /** pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */
- private final int stride;
+ private final boolean storeOffsets;
- /** Could be made configurable; */
- private static final float docBoost = 1.0f;
-
private static final boolean DEBUG = false;
+ private final ByteBlockPool byteBlockPool;
+ private final IntBlockPool intBlockPool;
+// private final IntBlockPool.SliceReader postingsReader;
+ private final IntBlockPool.SliceWriter postingsWriter;
+
private HashMap fieldInfos = new HashMap();
+
+ private Counter bytesUsed;
/**
* Sorts term entries into ascending order; also works for
@@ -233,8 +246,26 @@ public class MemoryIndex {
* whether or not to store the start and end character offset of
* each token term in the text
*/
- protected MemoryIndex(boolean storeOffsets) {
- this.stride = storeOffsets ? 3 : 1;
+ public MemoryIndex(boolean storeOffsets) {
+ this(storeOffsets, 5*1024*1024 /*~5mb*/);
+
+ }
+
+ /**
+ * Expert: This constructor accepts a byte and int block allocator that is used internally to allocate
+ * int & byte blocks for term and posting storage.
+ * @param storeOffsets true if offsets should be stored
+ * @param maxBufferedBytes the number of bytes that should remain in the internal memory pools after {@link #reset()} is called
+ */
+ MemoryIndex(boolean storeOffsets, long maxBufferedBytes) {
+ this.storeOffsets = storeOffsets;
+ this.bytesUsed = Counter.newCounter();
+ final int maxBufferedByteBlocks = (int)((maxBufferedBytes/2) / ByteBlockPool.BYTE_BLOCK_SIZE );
+ final int maxBufferedIntBlocks = (int) ((maxBufferedBytes - (maxBufferedByteBlocks*ByteBlockPool.BYTE_BLOCK_SIZE))/(IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT));
+ assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE) + (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT) <= maxBufferedBytes;
+ byteBlockPool = new ByteBlockPool(new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed));
+ intBlockPool = new IntBlockPool(new RecyclingIntBlockAllocator(IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
+ postingsWriter = new SliceWriter(intBlockPool);
}
/**
@@ -265,7 +296,7 @@ public class MemoryIndex {
throw new RuntimeException(ex);
}
- addField(fieldName, stream);
+ addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName));
}
/**
@@ -319,7 +350,7 @@ public class MemoryIndex {
public void addField(String fieldName, TokenStream stream) {
addField(fieldName, stream, 1.0f);
}
-
+
/**
* Iterates over the given token stream and adds the resulting terms to the index;
* Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
@@ -333,9 +364,14 @@ public class MemoryIndex {
* the token stream to retrieve tokens from.
* @param boost
* the boost factor for hits for this field
+ *
* @see org.apache.lucene.document.Field#setBoost(float)
*/
+
public void addField(String fieldName, TokenStream stream, float boost) {
+ addField(fieldName, stream, boost, 0);
+ }
+ public void addField(String fieldName, TokenStream stream, float boost, int positionIncrementGap) {
try {
if (fieldName == null)
throw new IllegalArgumentException("fieldName must not be null");
@@ -343,24 +379,36 @@ public class MemoryIndex {
throw new IllegalArgumentException("token stream must not be null");
if (boost <= 0.0f)
throw new IllegalArgumentException("boost factor must be greater than 0.0");
- if (fields.get(fieldName) != null)
- throw new IllegalArgumentException("field must not be added more than once");
-
- HashMap terms = new HashMap();
int numTokens = 0;
int numOverlapTokens = 0;
int pos = -1;
+ final BytesRefHash terms;
+ final SliceByteStartArray sliceArray;
+ Info info = null;
+ long sumTotalTermFreq = 0;
+ if ((info = fields.get(fieldName)) != null) {
+ numTokens = info.numTokens;
+ numOverlapTokens = info.numOverlapTokens;
+ pos = info.lastPosition + positionIncrementGap;
+ terms = info.terms;
+ boost *= info.boost;
+ sliceArray = info.sliceArray;
+ sumTotalTermFreq = info.sumTotalTermFreq;
+ } else {
+ sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
+ terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
+ }
if (!fieldInfos.containsKey(fieldName)) {
fieldInfos.put(fieldName,
new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null, null, null));
}
-
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
BytesRef ref = termAtt.getBytesRef();
stream.reset();
+
while (stream.incrementToken()) {
termAtt.fillBytesRef();
if (ref.length == 0) continue; // nothing to do
@@ -370,27 +418,33 @@ public class MemoryIndex {
if (posIncr == 0)
numOverlapTokens++;
pos += posIncr;
-
- ArrayIntList positions = terms.get(ref);
- if (positions == null) { // term not seen before
- positions = new ArrayIntList(stride);
- terms.put(BytesRef.deepCopyOf(ref), positions);
+ int ord = terms.add(ref);
+ if (ord < 0) {
+ ord = (-ord) - 1;
+ postingsWriter.reset(sliceArray.end[ord]);
+ } else {
+ sliceArray.start[ord] = postingsWriter.startNewSlice();
}
- if (stride == 1) {
- positions.add(pos);
+ sliceArray.freq[ord]++;
+ sumTotalTermFreq++;
+ if (!storeOffsets) {
+ postingsWriter.writeInt(pos);
} else {
- positions.add(pos, offsetAtt.startOffset(), offsetAtt.endOffset());
+ postingsWriter.writeInt(pos);
+ postingsWriter.writeInt(offsetAtt.startOffset());
+ postingsWriter.writeInt(offsetAtt.endOffset());
}
+ sliceArray.end[ord] = postingsWriter.getCurrentOffset();
}
stream.end();
// ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
if (numTokens > 0) {
- boost = boost * docBoost; // see DocumentWriter.addDocument(...)
- fields.put(fieldName, new Info(terms, numTokens, numOverlapTokens, boost));
+ fields.put(fieldName, new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, sumTotalTermFreq));
sortedFields = null; // invalidate sorted view, if any
}
- } catch (IOException e) { // can never happen
+ } catch (Exception e) { // can never happen
+ e.printStackTrace();
throw new RuntimeException(e);
} finally {
try {
@@ -484,10 +538,6 @@ public class MemoryIndex {
return RamUsageEstimator.sizeOf(this);
}
- private int numPositions(ArrayIntList positions) {
- return positions.size() / stride;
- }
-
/** sorts into ascending order (on demand), reusing memory along the way */
private void sortFields() {
if (sortedFields == null) sortedFields = sort(fields);
@@ -519,31 +569,50 @@ public class MemoryIndex {
sortFields();
int sumPositions = 0;
int sumTerms = 0;
-
+ final BytesRef spare = new BytesRef();
for (int i=0; i < sortedFields.length; i++) {
Map.Entry entry = sortedFields[i];
String fieldName = entry.getKey();
Info info = entry.getValue();
info.sortTerms();
result.append(fieldName + ":\n");
-
+ SliceByteStartArray sliceArray = info.sliceArray;
int numPositions = 0;
- for (int j=0; j < info.sortedTerms.length; j++) {
- Map.Entry e = info.sortedTerms[j];
- BytesRef term = e.getKey();
- ArrayIntList positions = e.getValue();
- result.append("\t'" + term + "':" + numPositions(positions) + ":");
- result.append(positions.toString(stride)); // ignore offsets
+ SliceReader postingsReader = new SliceReader(intBlockPool);
+ for (int j=0; j < info.terms.size(); j++) {
+ int ord = info.sortedTerms[j];
+ info.terms.get(ord, spare);
+ int freq = sliceArray.freq[ord];
+ result.append("\t'" + spare + "':" + freq + ":");
+ postingsReader.reset(sliceArray.start[ord], sliceArray.end[ord]);
+ result.append(" [");
+ final int iters = storeOffsets ? 3 : 1;
+ while(!postingsReader.endOfSlice()) {
+ result.append("(");
+
+ for (int k = 0; k < iters; k++) {
+ result.append(postingsReader.readInt());
+ if (k < iters-1) {
+ result.append(", ");
+ }
+ }
+ result.append(")");
+ if (!postingsReader.endOfSlice()) {
+ result.append(",");
+ }
+
+ }
+ result.append("]");
result.append("\n");
- numPositions += numPositions(positions);
+ numPositions += freq;
}
- result.append("\tterms=" + info.sortedTerms.length);
+ result.append("\tterms=" + info.terms.size());
result.append(", positions=" + numPositions);
result.append(", memory=" + RamUsageEstimator.humanReadableUnits(RamUsageEstimator.sizeOf(info)));
result.append("\n");
sumPositions += numPositions;
- sumTerms += info.sortedTerms.length;
+ sumTerms += info.terms.size();
}
result.append("\nfields=" + sortedFields.length);
@@ -563,10 +632,12 @@ public class MemoryIndex {
* Term strings and their positions for this field: Map
*/
- private final HashMap terms;
+ private final BytesRefHash terms;
+
+ private final SliceByteStartArray sliceArray;
/** Terms sorted ascending by term text; computed on demand */
- private transient Map.Entry[] sortedTerms;
+ private transient int[] sortedTerms;
/** Number of added tokens for this field */
private final int numTokens;
@@ -579,16 +650,17 @@ public class MemoryIndex {
private final long sumTotalTermFreq;
- public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost) {
+ /** the last position encountered in this field for multi field support*/
+ private int lastPosition;
+
+ public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, long sumTotalTermFreq) {
this.terms = terms;
+ this.sliceArray = sliceArray;
this.numTokens = numTokens;
this.numOverlapTokens = numOverlapTokens;
this.boost = boost;
- long sum = 0;
- for(Map.Entry ent : terms.entrySet()) {
- sum += ent.getValue().size();
- }
- sumTotalTermFreq = sum;
+ this.sumTotalTermFreq = sumTotalTermFreq;
+ this.lastPosition = lastPosition;
}
public long getSumTotalTermFreq() {
@@ -604,83 +676,15 @@ public class MemoryIndex {
* apart from more sophisticated Tries / prefix trees).
*/
public void sortTerms() {
- if (sortedTerms == null) sortedTerms = sort(terms);
+ if (sortedTerms == null)
+ sortedTerms = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
}
public float getBoost() {
return boost;
}
-
}
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- /**
- * Efficient resizable auto-expanding list holding int elements;
- * implemented with arrays.
- */
- private static final class ArrayIntList {
-
- private int[] elements;
- private int size = 0;
-
- public ArrayIntList(int initialCapacity) {
- elements = new int[initialCapacity];
- }
-
- public void add(int elem) {
- if (size == elements.length) ensureCapacity(size + 1);
- elements[size++] = elem;
- }
-
- public void add(int pos, int start, int end) {
- if (size + 3 > elements.length) ensureCapacity(size + 3);
- elements[size] = pos;
- elements[size+1] = start;
- elements[size+2] = end;
- size += 3;
- }
-
- public int get(int index) {
- if (index >= size) throwIndex(index);
- return elements[index];
- }
-
- public int size() {
- return size;
- }
-
- private void ensureCapacity(int minCapacity) {
- int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1);
- int[] newElements = new int[newCapacity];
- System.arraycopy(elements, 0, newElements, 0, size);
- elements = newElements;
- }
-
- private void throwIndex(int index) {
- throw new IndexOutOfBoundsException("index: " + index
- + ", size: " + size);
- }
-
- /** returns the first few positions (without offsets); debug only */
- public String toString(int stride) {
- int s = size() / stride;
- int len = Math.min(10, s); // avoid printing huge lists
- StringBuilder buf = new StringBuilder(4*len);
- buf.append("[");
- for (int i = 0; i < len; i++) {
- buf.append(get(i*stride));
- if (i < len-1) buf.append(", ");
- }
- if (len != s) buf.append(", ..."); // and some more...
- buf.append("]");
- return buf.toString();
- }
- }
-
-
///////////////////////////////////////////////////////////////////////////////
// Nested classes:
///////////////////////////////////////////////////////////////////////////////
@@ -764,7 +768,7 @@ public class MemoryIndex {
@Override
public long size() {
- return info.sortedTerms.length;
+ return info.terms.size();
}
@Override
@@ -775,17 +779,17 @@ public class MemoryIndex {
@Override
public long getSumDocFreq() {
// each term has df=1
- return info.sortedTerms.length;
+ return info.terms.size();
}
@Override
public int getDocCount() {
- return info.sortedTerms.length > 0 ? 1 : 0;
+ return info.terms.size() > 0 ? 1 : 0;
}
@Override
public boolean hasOffsets() {
- return stride == 3;
+ return storeOffsets;
}
@Override
@@ -822,48 +826,62 @@ public class MemoryIndex {
this.info = info;
info.sortTerms();
}
+
+ private final int binarySearch(BytesRef b, BytesRef bytesRef, int low,
+ int high, BytesRefHash hash, int[] ords, Comparator comparator) {
+ int mid = 0;
+ while (low <= high) {
+ mid = (low + high) >>> 1;
+ hash.get(ords[mid], bytesRef);
+ final int cmp = comparator.compare(bytesRef, b);
+ if (cmp < 0) {
+ low = mid + 1;
+ } else if (cmp > 0) {
+ high = mid - 1;
+ } else {
+ return mid;
+ }
+ }
+ assert comparator.compare(bytesRef, b) != 0;
+ return -(low + 1);
+ }
+
@Override
public boolean seekExact(BytesRef text, boolean useCache) {
- termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
- if (termUpto >= 0) {
- br.copyBytes(info.sortedTerms[termUpto].getKey());
- return true;
- } else {
- return false;
- }
+ termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator());
+ return termUpto >= 0;
}
@Override
public SeekStatus seekCeil(BytesRef text, boolean useCache) {
- termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
+ termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator());
if (termUpto < 0) { // not found; choose successor
- termUpto = -termUpto -1;
- if (termUpto >= info.sortedTerms.length) {
+ termUpto = -termUpto-1;
+ if (termUpto >= info.terms.size()) {
return SeekStatus.END;
} else {
- br.copyBytes(info.sortedTerms[termUpto].getKey());
+ info.terms.get(info.sortedTerms[termUpto], br);
return SeekStatus.NOT_FOUND;
}
} else {
- br.copyBytes(info.sortedTerms[termUpto].getKey());
return SeekStatus.FOUND;
}
}
@Override
public void seekExact(long ord) {
- assert ord < info.sortedTerms.length;
+ assert ord < info.terms.size();
termUpto = (int) ord;
}
@Override
public BytesRef next() {
termUpto++;
- if (termUpto >= info.sortedTerms.length) {
+ if (termUpto >= info.terms.size()) {
return null;
} else {
- br.copyBytes(info.sortedTerms[termUpto].getKey());
+ info.terms.get(info.sortedTerms[termUpto], br);
return br;
}
}
@@ -885,7 +903,7 @@ public class MemoryIndex {
@Override
public long totalTermFreq() {
- return info.sortedTerms[termUpto].getValue().size();
+ return info.sliceArray.freq[info.sortedTerms[termUpto]];
}
@Override
@@ -893,7 +911,7 @@ public class MemoryIndex {
if (reuse == null || !(reuse instanceof MemoryDocsEnum)) {
reuse = new MemoryDocsEnum();
}
- return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sortedTerms[termUpto].getValue());
+ return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sliceArray.freq[info.sortedTerms[termUpto]]);
}
@Override
@@ -901,7 +919,8 @@ public class MemoryIndex {
if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) {
reuse = new MemoryDocsAndPositionsEnum();
}
- return ((MemoryDocsAndPositionsEnum) reuse).reset(liveDocs, info.sortedTerms[termUpto].getValue());
+ final int ord = info.sortedTerms[termUpto];
+ return ((MemoryDocsAndPositionsEnum) reuse).reset(liveDocs, info.sliceArray.start[ord], info.sliceArray.end[ord], info.sliceArray.freq[ord]);
}
@Override
@@ -924,16 +943,16 @@ public class MemoryIndex {
}
private class MemoryDocsEnum extends DocsEnum {
- private ArrayIntList positions;
private boolean hasNext;
private Bits liveDocs;
private int doc = -1;
+ private int freq;
- public DocsEnum reset(Bits liveDocs, ArrayIntList positions) {
+ public DocsEnum reset(Bits liveDocs, int freq) {
this.liveDocs = liveDocs;
- this.positions = positions;
hasNext = true;
doc = -1;
+ this.freq = freq;
return this;
}
@@ -959,26 +978,35 @@ public class MemoryIndex {
@Override
public int freq() throws IOException {
- return positions.size();
+ return freq;
}
}
private class MemoryDocsAndPositionsEnum extends DocsAndPositionsEnum {
- private ArrayIntList positions;
- private int posUpto;
+ private int posUpto; // for assert
private boolean hasNext;
private Bits liveDocs;
private int doc = -1;
+ private SliceReader sliceReader;
+ private int freq;
+ private int startOffset;
+ private int endOffset;
+
+ public MemoryDocsAndPositionsEnum() {
+ this.sliceReader = new SliceReader(intBlockPool);
+ }
- public DocsAndPositionsEnum reset(Bits liveDocs, ArrayIntList positions) {
+ public DocsAndPositionsEnum reset(Bits liveDocs, int start, int end, int freq) {
this.liveDocs = liveDocs;
- this.positions = positions;
- posUpto = 0;
+ this.sliceReader.reset(start, end);
+ posUpto = 0; // for assert
hasNext = true;
doc = -1;
+ this.freq = freq;
return this;
}
+
@Override
public int docID() {
return doc;
@@ -1001,22 +1029,31 @@ public class MemoryIndex {
@Override
public int freq() throws IOException {
- return positions.size() / stride;
+ return freq;
}
@Override
public int nextPosition() {
- return positions.get(posUpto++ * stride);
+ assert posUpto++ < freq;
+ assert !sliceReader.endOfSlice() : " stores offsets : " + startOffset;
+ if (storeOffsets) {
+ int pos = sliceReader.readInt();
+ startOffset = sliceReader.readInt();
+ endOffset = sliceReader.readInt();
+ return pos;
+ } else {
+ return sliceReader.readInt();
+ }
}
@Override
public int startOffset() {
- return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 1);
+ return startOffset;
}
@Override
public int endOffset() {
- return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 2);
+ return endOffset;
}
@Override
@@ -1105,4 +1142,58 @@ public class MemoryIndex {
return norms;
}
}
+
+ /**
+ * Resets the {@link MemoryIndex} to its initial state and recycles all internal buffers.
+ */
+ public void reset() {
+ this.fieldInfos.clear();
+ this.fields.clear();
+ this.sortedFields = null;
+ byteBlockPool.reset(false, true); // no need to 0-fill the buffers
+ intBlockPool.reset(true, true); // here must must 0-fill since we use slices
+ }
+
+ private static final class SliceByteStartArray extends DirectBytesStartArray {
+ int[] start; // the start offset in the IntBlockPool per term
+ int[] end; // the end pointer in the IntBlockPool for the postings slice per term
+ int[] freq; // the term frequency
+
+ public SliceByteStartArray(int initSize) {
+ super(initSize);
+ }
+
+ @Override
+ public int[] init() {
+ final int[] ord = super.init();
+ start = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+ end = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+ freq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+ assert start.length >= ord.length;
+ assert end.length >= ord.length;
+ assert freq.length >= ord.length;
+ return ord;
+ }
+
+ @Override
+ public int[] grow() {
+ final int[] ord = super.grow();
+ if (start.length < ord.length) {
+ start = ArrayUtil.grow(start, ord.length);
+ end = ArrayUtil.grow(end, ord.length);
+ freq = ArrayUtil.grow(freq, ord.length);
+ }
+ assert start.length >= ord.length;
+ assert end.length >= ord.length;
+ assert freq.length >= ord.length;
+ return ord;
+ }
+
+ @Override
+ public int[] clear() {
+ start = end = null;
+ return super.clear();
+ }
+
+ }
}
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
index 77dcedf..ee4b556 100644
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
@@ -37,14 +37,19 @@ import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
@@ -52,8 +57,13 @@ import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util._TestUtil;
/**
@@ -94,15 +104,18 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
* runs random tests, up to ITERATIONS times.
*/
public void testRandomQueries() throws Exception {
- for (int i = 0; i < ITERATIONS; i++)
- assertAgainstRAMDirectory();
+ MemoryIndex index = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
+ for (int i = 0; i < ITERATIONS; i++) {
+ assertAgainstRAMDirectory(index);
+ }
}
-
+
/**
* Build a randomish document for both RAMDirectory and MemoryIndex,
* and run all the queries against it.
*/
- public void assertAgainstRAMDirectory() throws Exception {
+ public void assertAgainstRAMDirectory(MemoryIndex memory) throws Exception {
+ memory.reset();
StringBuilder fooField = new StringBuilder();
StringBuilder termField = new StringBuilder();
@@ -132,7 +145,6 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
writer.addDocument(doc);
writer.close();
- MemoryIndex memory = new MemoryIndex();
memory.addField("foo", fooField.toString(), analyzer);
memory.addField("term", termField.toString(), analyzer);
@@ -144,10 +156,66 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
} else {
assertTrue(memory.getMemorySize() > 0L);
}
-
+ AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
+ IndexReader competitor = DirectoryReader.open(ramdir);
+ duellReaders(competitor, reader);
+ IOUtils.close(reader, competitor);
assertAllQueries(memory, ramdir, analyzer);
ramdir.close();
}
+
+ private void duellReaders(IndexReader competitor, AtomicReader memIndexReader)
+ throws IOException {
+ Fields memFields = memIndexReader.fields();
+ for (String field : MultiFields.getFields(competitor)) {
+ Terms memTerms = memFields.terms(field);
+ Terms iwTerms = MultiFields.getTerms(memIndexReader, field);
+ if (iwTerms == null) {
+ assertNull(memTerms);
+ } else {
+ assertNotNull(memTerms);
+ assertEquals(iwTerms.getDocCount(), memTerms.getDocCount());
+ assertEquals(iwTerms.getSumDocFreq(), memTerms.getSumDocFreq());
+ assertEquals(iwTerms.getSumTotalTermFreq(), memTerms.getSumTotalTermFreq());
+ TermsEnum iwTermsIter = iwTerms.iterator(null);
+ TermsEnum memTermsIter = memTerms.iterator(null);
+ if (iwTerms.hasPositions()) {
+ final boolean offsets = iwTerms.hasOffsets() && memTerms.hasOffsets();
+
+ while(iwTermsIter.next() != null) {
+ assertNotNull(memTermsIter.next());
+ assertEquals(iwTermsIter.term(), memTermsIter.term());
+ DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.docsAndPositions(null, null);
+ DocsAndPositionsEnum memDocsAndPos = memTermsIter.docsAndPositions(null, null);
+ while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
+ assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
+ assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
+ for (int i = 0; i < iwDocsAndPos.freq(); i++) {
+ assertEquals("term: " + iwTermsIter.term().utf8ToString(), iwDocsAndPos.nextPosition(), memDocsAndPos.nextPosition());
+ if (offsets) {
+ assertEquals(iwDocsAndPos.startOffset(), memDocsAndPos.startOffset());
+ assertEquals(iwDocsAndPos.endOffset(), memDocsAndPos.endOffset());
+ }
+ }
+
+ }
+
+ }
+ } else {
+ while(iwTermsIter.next() != null) {
+ assertEquals(iwTermsIter.term(), memTermsIter.term());
+ DocsEnum iwDocsAndPos = iwTermsIter.docs(null, null);
+ DocsEnum memDocsAndPos = memTermsIter.docs(null, null);
+ while(iwDocsAndPos.nextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) {
+ assertEquals(iwDocsAndPos.docID(), memDocsAndPos.nextDoc());
+ assertEquals(iwDocsAndPos.freq(), memDocsAndPos.freq());
+ }
+ }
+ }
+ }
+
+ }
+ }
/**
* Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same.
@@ -160,7 +228,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
for (String query : queries) {
TopDocs ramDocs = ram.search(qp.parse(query), 1);
TopDocs memDocs = mem.search(qp.parse(query), 1);
- assertEquals(ramDocs.totalHits, memDocs.totalHits);
+ assertEquals(query, ramDocs.totalHits, memDocs.totalHits);
}
reader.close();
}
@@ -202,7 +270,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
public void testDocsEnumStart() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
- MemoryIndex memory = new MemoryIndex();
+ MemoryIndex memory = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
memory.addField("foo", "bar", analyzer);
AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
DocsEnum disi = _TestUtil.docs(random(), reader, "foo", new BytesRef("bar"), null, null, 0);
@@ -220,27 +288,40 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
reader.close();
}
+ private Allocator randomByteBlockAllocator() {
+ if (random().nextBoolean()) {
+ return new RecyclingByteBlockAllocator();
+ } else {
+ return new ByteBlockPool.DirectAllocator();
+ }
+ }
+
public void testDocsAndPositionsEnumStart() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
- MemoryIndex memory = new MemoryIndex(true);
- memory.addField("foo", "bar", analyzer);
- AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
- DocsAndPositionsEnum disi = reader.termPositionsEnum(new Term("foo", "bar"));
- int docid = disi.docID();
- assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
- assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
- assertEquals(0, disi.nextPosition());
- assertEquals(0, disi.startOffset());
- assertEquals(3, disi.endOffset());
-
- // now reuse and check again
- TermsEnum te = reader.terms("foo").iterator(null);
- assertTrue(te.seekExact(new BytesRef("bar"), true));
- disi = te.docsAndPositions(null, disi);
- docid = disi.docID();
- assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
- assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
- reader.close();
+ int numIters = atLeast(3);
+ MemoryIndex memory = new MemoryIndex(true, random().nextInt(50) * 1024 * 1024);
+ for (int i = 0; i < numIters; i++) { // check reuse
+ memory.addField("foo", "bar", analyzer);
+ AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader();
+ assertEquals(1, reader.terms("foo").getSumTotalTermFreq());
+ DocsAndPositionsEnum disi = reader.termPositionsEnum(new Term("foo", "bar"));
+ int docid = disi.docID();
+ assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
+ assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ assertEquals(0, disi.nextPosition());
+ assertEquals(0, disi.startOffset());
+ assertEquals(3, disi.endOffset());
+
+ // now reuse and check again
+ TermsEnum te = reader.terms("foo").iterator(null);
+ assertTrue(te.seekExact(new BytesRef("bar"), true));
+ disi = te.docsAndPositions(null, disi);
+ docid = disi.docID();
+ assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
+ assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ reader.close();
+ memory.reset();
+ }
}
// LUCENE-3831
@@ -248,7 +329,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanMultiTermQueryWrapper(regex);
- MemoryIndex mindex = new MemoryIndex();
+ MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there")));
// This throws an NPE
@@ -260,10 +341,65 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper(regex));
- MemoryIndex mindex = new MemoryIndex();
+ MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there")));
// This passes though
assertEquals(0, mindex.search(wrappedquery), 0.00001f);
}
+
+ public void testSameFieldAddedMultipleTimes() throws IOException {
+ MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
+ MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
+ mindex.addField("field", "the quick brown fox", mockAnalyzer);
+ mindex.addField("field", "jumps over the", mockAnalyzer);
+ AtomicReader reader = (AtomicReader) mindex.createSearcher().getIndexReader();
+ assertEquals(7, reader.terms("field").getSumTotalTermFreq());
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("field", "fox"));
+ query.add(new Term("field", "jumps"));
+ assertTrue(mindex.search(query) > 0.1);
+ mindex.reset();
+ mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10));
+ mindex.addField("field", "the quick brown fox", mockAnalyzer);
+ mindex.addField("field", "jumps over the", mockAnalyzer);
+ assertEquals(0, mindex.search(query), 0.00001f);
+ query.setSlop(10);
+ assertTrue("posGap" + mockAnalyzer.getPositionIncrementGap("field") , mindex.search(query) > 0.0001);
+ }
+
+
+ public void testDuellMemIndex() throws IOException {
+ LineFileDocs lineFileDocs = new LineFileDocs(random());
+ int numDocs = atLeast(10);
+ MemoryIndex memory = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024);
+ for (int i = 0; i < numDocs; i++) {
+ Directory dir = newDirectory();
+ MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
+ Document nextDoc = lineFileDocs.nextDoc();
+ Document doc = new Document();
+ for (Field field : nextDoc.getFields()) {
+ if (field.fieldType().indexed()) {
+ doc.add(field);
+ if (random().nextInt(3) == 0) {
+ doc.add(field); // randomly add the same field twice
+ }
+ }
+ }
+
+ writer.addDocument(doc);
+ writer.close();
+ for (IndexableField field : doc.indexableFields()) {
+ memory.addField(field.name(), ((Field)field).stringValue(), mockAnalyzer);
+ }
+ IndexReader competitor = DirectoryReader.open(dir);
+ AtomicReader memIndexReader= (AtomicReader) memory.createSearcher().getIndexReader();
+ duellReaders(competitor, memIndexReader);
+ IOUtils.close(competitor, memIndexReader);
+ memory.reset();
+ dir.close();
+ }
+ lineFileDocs.close();
+ }
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
index ff8c400..c0d7d1a 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
@@ -64,7 +64,7 @@ public final class BytesRefList {
lastElement = 0;
currentOffset = 0;
Arrays.fill(offsets, 0);
- pool.reset();
+ pool.reset(false, true); // no need to 0 fill the buffers we control the allocator
}
/**
@@ -101,10 +101,10 @@ public final class BytesRefList {
*/
public BytesRef get(BytesRef spare, int ord) {
if (lastElement > ord) {
- spare.offset = offsets[ord];
- spare.length = ord == lastElement - 1 ? currentOffset - spare.offset
- : offsets[ord + 1] - spare.offset;
- pool.copyFrom(spare);
+ int offset = offsets[ord];
+ int length = ord == lastElement - 1 ? currentOffset - offset
+ : offsets[ord + 1] - offset;
+ pool.copyFrom(spare, offset, length);
return spare;
}
throw new IndexOutOfBoundsException("index " + ord