Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java --- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Jan 29 09:26:17 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Jan 29 11:15:04 2011 -0500 @@ -35,9 +35,11 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMFile; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitVector; +import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.RecyclingByteBlockAllocator; import org.apache.lucene.util.ThreadInterruptedException; -import org.apache.lucene.util.RamUsageEstimator; + import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; @@ -577,7 +579,29 @@ final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos, numDocs, writer.getConfig().getTermIndexInterval(), - SegmentCodecs.build(fieldInfos, writer.codecs)); + SegmentCodecs.build(fieldInfos, writer.codecs), + pendingDeletes); + // Apply delete-by-docID now (delete-byDocID only + // happens when an exception is hit processing that + // doc, eg if analyzer has some problem w/ the text): + if (pendingDeletes.docIDs.size() > 0) { + // nocommit -- must remove where we apply docIDs + // now! it should never happen; put assert + flushState.deletedDocs = new BitVector(numDocs); + for(int delDocID : pendingDeletes.docIDs) { + flushState.deletedDocs.set(delDocID); + } + pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * SegmentDeletes.BYTES_PER_DEL_DOCID); + pendingDeletes.docIDs.clear(); + } + + // nocommit -- if nothing is left in the + // pendingDeletes (ie there are no buffered + // del-by-Query) then don't push it! + + // nocommit -- on push, cutover to more efficient + // structure; eg just sort the terms into an array? + // (after landing delGen) -- it's frozen at that point newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false); @@ -589,10 +613,14 @@ double startMBUsed = bytesUsed()/1024./1024.; consumer.flush(threads, flushState); + newSegment.setHasVectors(flushState.hasVectors); if (infoStream != null) { message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); + if (flushState.deletedDocs != null) { + message("new segment has " + flushState.deletedDocs.count() + " deleted docs"); + } message("flushedFiles=" + newSegment.files()); message("flushed codecs=" + newSegment.getSegmentCodecs()); } @@ -613,6 +641,30 @@ newSegment.setUseCompoundFile(true); } + // Must write deleted docs after the CFS so we don't + // slurp the del file into CFS: + if (flushState.deletedDocs != null) { + final int delCount = flushState.deletedDocs.count(); + assert delCount > 0; + newSegment.setDelCount(delCount); + newSegment.advanceDelGen(); + final String delFileName = newSegment.getDelFileName(); + boolean success2 = false; + try { + flushState.deletedDocs.write(directory, delFileName); + success2 = true; + } finally { + if (!success2) { + try { + directory.deleteFile(delFileName); + } catch (Throwable t) { + // suppress this so we keep throwing the + // original exception + } + } + } + } + if (infoStream != null) { message("flush: segment=" + newSegment); final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; Index: lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Sat Jan 29 09:26:17 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Sat Jan 29 11:15:04 2011 -0500 @@ -26,8 +26,9 @@ import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.PostingsConsumer; +import org.apache.lucene.index.codecs.TermStats; import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.index.codecs.TermStats; +import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; @@ -108,7 +109,7 @@ // If this field has postings then add them to the // segment - appendPostings(fields, consumer); + appendPostings(fieldName, state, fields, consumer); for(int i=0;i