Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 651026)
+++ CHANGES.txt (working copy)
@@ -94,6 +94,9 @@
hitting an exception in readInternal, the buffer is incorrectly
filled with stale bytes such that subsequent calls to readByte()
return incorrect results. (Trejkaz via Mike McCandless)
+
+ 8. LUCENE-1267: Added numDocs() and maxDoc() to IndexWriter;
+ deprecated docCount(). (Mike McCandless)
New features
Index: src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 651026)
+++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -104,8 +104,11 @@
// optimize the index and check that the new doc count is correct
writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
+ assertEquals(100, writer.maxDoc());
+ assertEquals(60, writer.numDocs());
writer.optimize();
- assertEquals(60, writer.docCount());
+ assertEquals(60, writer.maxDoc());
+ assertEquals(60, writer.numDocs());
writer.close();
// check that the index reader gives the same numbers.
@@ -117,7 +120,8 @@
// make sure opening a new index for create over
// this existing one works correctly:
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
- assertEquals(0, writer.docCount());
+ assertEquals(0, writer.maxDoc());
+ assertEquals(0, writer.numDocs());
writer.close();
}
@@ -3030,7 +3034,10 @@
writer = new IndexWriter(dir,
false, new StandardAnalyzer(),
IndexWriter.MaxFieldLength.LIMITED);
+ assertEquals(8, writer.numDocs());
+ assertEquals(10, writer.maxDoc());
writer.expungeDeletes();
+ assertEquals(8, writer.numDocs());
writer.close();
ir = IndexReader.open(dir);
assertEquals(8, ir.maxDoc());
@@ -3075,6 +3082,7 @@
false, new StandardAnalyzer(),
IndexWriter.MaxFieldLength.LIMITED);
writer.setMergeFactor(3);
+ assertEquals(49, writer.numDocs());
writer.expungeDeletes();
writer.close();
ir = IndexReader.open(dir);
Index: src/test/org/apache/lucene/index/TestCheckIndex.java
===================================================================
--- src/test/org/apache/lucene/index/TestCheckIndex.java (revision 651026)
+++ src/test/org/apache/lucene/index/TestCheckIndex.java (working copy)
@@ -49,7 +49,11 @@
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex.out = new PrintStream(bos);
- assertTrue(CheckIndex.check(dir, false, null));
+ if (!CheckIndex.check(dir, false, null)) {
+ System.out.println("CheckIndex failed");
+ System.out.println(bos.toString());
+ fail();
+ }
final List onlySegments = new ArrayList();
onlySegments.add("_0");
assertTrue(CheckIndex.check(dir, false, onlySegments));
Index: src/java/org/apache/lucene/index/SegmentInfo.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfo.java (revision 651026)
+++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy)
@@ -20,6 +20,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BitVector;
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
@@ -73,6 +74,9 @@
// other segments
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
+ private int delCount; // How many deleted docs in this segment, or -1 if not yet known
+ // (if it's an older index)
+
public SegmentInfo(String name, int docCount, Directory dir) {
this.name = name;
this.docCount = docCount;
@@ -84,6 +88,7 @@
docStoreOffset = -1;
docStoreSegment = name;
docStoreIsCompoundFile = false;
+ delCount = 0;
}
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) {
@@ -99,6 +104,7 @@
this.docStoreOffset = docStoreOffset;
this.docStoreSegment = docStoreSegment;
this.docStoreIsCompoundFile = docStoreIsCompoundFile;
+ delCount = 0;
assert docStoreOffset == -1 || docStoreSegment != null;
}
@@ -122,6 +128,7 @@
}
isCompoundFile = src.isCompoundFile;
hasSingleNormFile = src.hasSingleNormFile;
+ delCount = src.delCount;
}
/**
@@ -168,6 +175,11 @@
}
isCompoundFile = input.readByte();
preLockless = (isCompoundFile == CHECK_DIR);
+ if (format <= SegmentInfos.FORMAT_DEL_COUNT) {
+ delCount = input.readInt();
+ assert delCount <= docCount;
+ } else
+ delCount = -1;
} else {
delGen = CHECK_DIR;
normGen = null;
@@ -177,6 +189,7 @@
docStoreOffset = -1;
docStoreIsCompoundFile = false;
docStoreSegment = null;
+ delCount = -1;
}
}
@@ -263,6 +276,7 @@
SegmentInfo si = new SegmentInfo(name, docCount, dir);
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
+ si.delCount = delCount;
si.preLockless = preLockless;
si.hasSingleNormFile = hasSingleNormFile;
if (normGen != null) {
@@ -429,6 +443,23 @@
}
}
+ int getDelCount() throws IOException {
+ if (delCount == -1) {
+ if (hasDeletions()) {
+ final String delFileName = getDelFileName();
+ delCount = new BitVector(dir, delFileName).count();
+ } else
+ delCount = 0;
+ }
+ assert delCount <= docCount;
+ return delCount;
+ }
+
+ void setDelCount(int delCount) {
+ this.delCount = delCount;
+ assert delCount <= docCount;
+ }
+
int getDocStoreOffset() {
return docStoreOffset;
}
@@ -475,6 +506,7 @@
}
}
output.writeByte(isCompoundFile);
+ output.writeInt(delCount);
}
private void addIfExists(List files, String fileName) throws IOException {
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (revision 651026)
+++ src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -55,10 +55,12 @@
private boolean deletedDocsDirty = false;
private boolean normsDirty = false;
private boolean undeleteAll = false;
+ private int pendingDeleteCount;
private boolean rollbackDeletedDocsDirty = false;
private boolean rollbackNormsDirty = false;
private boolean rollbackUndeleteAll = false;
+ private int rollbackPendingDeleteCount;
IndexInput freqStream;
IndexInput proxStream;
@@ -351,11 +353,16 @@
if (hasDeletions(si)) {
deletedDocs = new BitVector(directory(), si.getDelFileName());
- // Verify # deletes does not exceed maxDoc for this segment:
- if (deletedDocs.count() > maxDoc()) {
- throw new CorruptIndexException("number of deletes (" + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name);
- }
- }
+ assert si.getDelCount() == deletedDocs.count() :
+ "delete count mismatch: info=" + si.getDelCount() + " vs BitVector=" + deletedDocs.count();
+
+ // Verify # deletes does not exceed maxDoc for this
+ // segment:
+ assert si.getDelCount() <= maxDoc() :
+ "delete count mismatch: " + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name;
+
+ } else
+ assert si.getDelCount() == 0;
}
protected synchronized DirectoryIndexReader doReopen(SegmentInfos infos) throws CorruptIndexException, IOException {
@@ -525,9 +532,12 @@
// .tmp & renaming it) because the file is not live
// until segments file is written:
deletedDocs.write(directory(), si.getDelFileName());
+
+ si.setDelCount(si.getDelCount()+pendingDeleteCount);
}
if (undeleteAll && si.hasDeletions()) {
si.clearDelGen();
+ si.setDelCount(0);
}
if (normsDirty) { // re-write norms
si.setNumFields(fieldInfos.size());
@@ -620,7 +630,8 @@
deletedDocs = new BitVector(maxDoc());
deletedDocsDirty = true;
undeleteAll = false;
- deletedDocs.set(docNum);
+ if (!deletedDocs.getAndSet(docNum))
+ pendingDeleteCount++;
}
protected void doUndeleteAll() {
@@ -1009,6 +1020,7 @@
rollbackDeletedDocsDirty = deletedDocsDirty;
rollbackNormsDirty = normsDirty;
rollbackUndeleteAll = undeleteAll;
+ rollbackPendingDeleteCount = pendingDeleteCount;
Iterator it = norms.values().iterator();
while (it.hasNext()) {
Norm norm = (Norm) it.next();
@@ -1021,6 +1033,7 @@
deletedDocsDirty = rollbackDeletedDocsDirty;
normsDirty = rollbackNormsDirty;
undeleteAll = rollbackUndeleteAll;
+ pendingDeleteCount = rollbackPendingDeleteCount;
Iterator it = norms.values().iterator();
while (it.hasNext()) {
Norm norm = (Norm) it.next();
Index: src/java/org/apache/lucene/index/DocumentsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 651026)
+++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy)
@@ -1187,7 +1187,8 @@
int docStart = 0;
boolean any = false;
for (int i = 0; i < infosEnd; i++) {
- IndexReader reader = SegmentReader.get(infos.info(i), false);
+ final SegmentInfo info = infos.info(i);
+ SegmentReader reader = SegmentReader.get(info, false);
boolean success = false;
try {
any |= applyDeletes(reader, docStart);
@@ -1212,7 +1213,7 @@
// Apply buffered delete terms, queries and docIDs to the
// provided reader
- private final synchronized boolean applyDeletes(IndexReader reader, int docIDStart)
+ private final synchronized boolean applyDeletes(SegmentReader reader, int docIDStart)
throws CorruptIndexException, IOException {
final int docEnd = docIDStart + reader.maxDoc();
Index: src/java/org/apache/lucene/index/SegmentInfos.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfos.java (revision 651026)
+++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy)
@@ -61,8 +61,12 @@
* ensure all bytes were successfully written. */
public static final int FORMAT_CHECKSUM = -5;
+ /** This format adds the deletion count for each segment.
+ * This way IndexWriter can efficiently report numDocs(). */
+ public static final int FORMAT_DEL_COUNT = -6;
+
/* This must always point to the most recent file format. */
- private static final int CURRENT_FORMAT = FORMAT_CHECKSUM;
+ static final int CURRENT_FORMAT = FORMAT_DEL_COUNT;
public int counter = 0; // used to name new segments
/**
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (revision 651026)
+++ src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -1798,17 +1798,48 @@
return analyzer;
}
- /** Returns the number of documents currently in this index. */
+ /** Returns the number of documents currently in this
+ * index, not counting deletions.
+ * @deprecated Please use {@link #maxDoc()} (same as this
+ * method) or {@link #numDocs()} (also takes deletions
+ * into account), instead. */
public synchronized int docCount() {
ensureOpen();
+ return maxDoc();
+ }
+
+ /** Returns total number of docs in this index, including
+ * docs not yet flushed (still in the RAM buffer),
+ * not counting deletions.
+ * @see #numDocs */
+ public synchronized int maxDoc() {
int count;
if (docWriter != null)
count = docWriter.getNumDocsInRAM();
else
count = 0;
+
+ for (int i = 0; i < segmentInfos.size(); i++)
+ count += segmentInfos.info(i).docCount;
+ return count;
+ }
+
+ /** Returns total number of docs in this index, including
+ * docs not yet flushed (still in the RAM buffer), and
+ * including deletions. NOTE: buffered deletions
+ * are not counted. If you really need these to be
+ * counted you should call {@link #commit()} first.
+ * @see #numDocs */
+ public synchronized int numDocs() throws IOException {
+ int count;
+ if (docWriter != null)
+ count = docWriter.getNumDocsInRAM();
+ else
+ count = 0;
+
for (int i = 0; i < segmentInfos.size(); i++) {
- SegmentInfo si = segmentInfos.info(i);
- count += si.docCount;
+ final SegmentInfo info = segmentInfos.info(i);
+ count += info.docCount - info.getDelCount();
}
return count;
}
@@ -3354,6 +3385,7 @@
BitVector deletes = null;
int docUpto = 0;
+ int delCount = 0;
final int numSegmentsToMerge = sourceSegments.size();
for(int i=0;ibit to true, and
+ * returns true if bit was already set */
+ public final boolean getAndSet(int bit) {
+ if (bit >= size) {
+ throw new ArrayIndexOutOfBoundsException(bit);
+ }
+ final int pos = bit >> 3;
+ final int v = bits[pos];
+ final int flag = 1 << (bit & 7);
+ if ((flag & v) != 0)
+ return true;
+ else {
+ bits[pos] = (byte) ((v | flag)&0xff);
+ if (count != -1)
+ count++;
+ return false;
+ }
+ }
+
/** Sets the value of bit to zero. */
public final void clear(int bit) {
if (bit >= size) {