Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/realtime_search:r953476-1097796 Property changes on: lucene ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/realtime_search/lucene:r953476-1097796 Merged /lucene/solr/branches/newtrunk/lucene:r924462-924482 Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1097796) +++ lucene/CHANGES.txt (working copy) @@ -173,7 +173,71 @@ globally across IndexWriter sessions and persisted into a X.fnx file on successful commit. The corresponding file format changes are backwards- compatible. (Michael Busch, Simon Willnauer) + +* LUCENE-2956, LUCENE-2573, LUCENE-2324, LUCENE-2555: Changes from + DocumentsWriterPerThread: + - IndexWriter now uses a DocumentsWriter per thread when indexing documents. + Each DocumentsWriterPerThread indexes documents in its own private segment, + and the in memory segments are no longer merged on flush. Instead, each + segment is separately flushed to disk and subsequently merged with normal + segment merging. + + - DocumentsWriterPerThread (DWPT) is now flushed concurrently based on a + FlushPolicy. When a DWPT is flushed, a fresh DWPT is swapped in so that + indexing may continue concurrently with flushing. The selected + DWPT flushes all its RAM resident documents do disk. Note: Segment flushes + don't flush all RAM resident documents but only the documents private to + the DWPT selected for flushing. + + - Flushing is now controlled by FlushPolicy that is called for every add, + update or delete on IndexWriter. By default DWPTs are flushed either on + maxBufferedDocs per DWPT or the global active used memory. Once the active + memory exceeds ramBufferSizeMB only the largest DWPT is selected for + flushing and the memory used by this DWPT is substracted from the active + memory and added to a flushing memory pool, which can lead to temporarily + higher memory usage due to ongoing indexing. + + - IndexWriter now can utilize ramBufferSize > 2048 MB. Each DWPT can address + up to 2048 MB memory such that the ramBufferSize is now bounded by the max + number of DWPT avaliable in the used DocumentsWriterPerThreadPool. + IndexWriters net memory consumption can grow far beyond the 2048 MB limit if + the applicatoin can use all available DWPTs. To prevent a DWPT from + exhausting its address space IndexWriter will forcefully flush a DWPT if its + hard memory limit is exceeded. The RAMPerThreadHardLimitMB can be controlled + via IndexWriterConfig and defaults to 1945 MB. + Since IndexWriter flushes DWPT concurrently not all memory is released + immediately. Applications should still use a ramBufferSize significantly + lower than the JVMs avaliable heap memory since under high load multiple + flushing DWPT can consume substantial transient memory when IO performance + is slow relative to indexing rate. + + - IndexWriter#commit now doesn't block concurrent indexing while flushing all + 'currently' RAM resident documents to disk. Yet, flushes that occur while a + a full flush is running are queued and will happen after all DWPT involved + in the full flush are done flushing. Applications using multiple threads + during indexing and trigger a full flush (eg call commmit() or open a new + NRT reader) can use significantly more transient memory. + + - IndexWriter#addDocument and IndexWriter.updateDocument can block indexing + threads if the number of active + number of flushing DWPT exceed a + safety limit. By default this happens if 2 * max number available thread + states (DWPTPool) is exceeded. This safety limit prevents applications from + exhausting their available memory if flushing can't keep up with + concurrently indexing threads. + + - IndexWriter only applies and flushes deletes if the maxBufferedDelTerms + limit is reached during indexing. No segment flushes will be triggered + due to this setting. + + - IndexWriter#flush(boolean, boolean) doesn't synchronized on IndexWriter + anymore. A dedicated flushLock has been introduced to prevent multiple full- + flushes happening concurrently. + + - DocumentsWriter doesn't write shared doc stores anymore. + + (Mike McCandless, Michael Busch, Simon Willnauer) + API Changes * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer Index: lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (working copy) @@ -81,6 +81,6 @@ } public int getAddress() { - return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); + return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK); } } \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/DocConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocConsumer.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocConsumer.java (working copy) @@ -18,11 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; abstract class DocConsumer { - abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread) throws IOException; - abstract void flush(final Collection threads, final SegmentWriteState state) throws IOException; + abstract void processDocument(FieldInfos fieldInfos) throws IOException; + abstract void finishDocument() throws IOException; + abstract void flush(final SegmentWriteState state) throws IOException; abstract void abort(); abstract boolean freeRAM(); + abstract void doAfterFlush(); } Index: lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java (working copy) @@ -1,34 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class DocConsumerPerThread { - - /** Process the document. If there is - * something for this document to be done in docID order, - * you should encapsulate that as a - * DocumentsWriter.DocWriter and return it. - * DocumentsWriter then calls finish() on this object - * when it's its turn. */ - abstract DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException; - - abstract void doAfterFlush(); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (working copy) @@ -18,22 +18,25 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class DocFieldConsumer { - /** Called when DocumentsWriter decides to create a new + /** Called when DocumentsWriterPerThread decides to create a new * segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; /** Called when an aborting exception is hit */ abstract void abort(); - /** Add a new thread */ - abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException; - - /** Called when DocumentsWriter is using too much RAM. + /** Called when DocumentsWriterPerThread is using too much RAM. * The consumer should free RAM, if possible, returning * true if any RAM was in fact freed. */ abstract boolean freeRAM(); - } + + abstract void startDocument() throws IOException; + + abstract DocFieldConsumerPerField addField(FieldInfo fi); + + abstract void finishDocument() throws IOException; + +} Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (working copy) @@ -24,4 +24,5 @@ /** Processes all occurrences of a single field */ abstract void processFields(Fieldable[] fields, int count) throws IOException; abstract void abort(); + abstract FieldInfo getFieldInfo(); } Index: lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class DocFieldConsumerPerThread { - abstract void startDocument() throws IOException; - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract DocFieldConsumerPerField addField(FieldInfo fi); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (working copy) @@ -19,10 +19,15 @@ import java.io.IOException; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; import java.util.Map; -import java.util.HashMap; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; + /** * This is a DocConsumer that gathers all fields under the * same name, and calls per-field consumers to process field @@ -33,26 +38,39 @@ final class DocFieldProcessor extends DocConsumer { - final DocumentsWriter docWriter; final DocFieldConsumer consumer; final StoredFieldsWriter fieldsWriter; - public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) { - this.docWriter = docWriter; + // Holds all fields seen in current doc + DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; + int fieldCount; + + // Hash table for all fields ever seen + DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; + int hashMask = 1; + int totalFieldCount; + + float docBoost; + int fieldGen; + final DocumentsWriterPerThread.DocState docState; + + public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer) { + this.docState = docWriter.docState; this.consumer = consumer; fieldsWriter = new StoredFieldsWriter(docWriter); } @Override - public void flush(Collection threads, SegmentWriteState state) throws IOException { + public void flush(SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - for ( DocConsumerPerThread thread : threads) { - DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread; - childThreadsAndFields.put(perThread.consumer, perThread.fields()); + Map childFields = new HashMap(); + Collection fields = fields(); + for (DocFieldConsumerPerField f : fields) { + childFields.put(f.getFieldInfo(), f); } + fieldsWriter.flush(state); - consumer.flush(childThreadsAndFields, state); + consumer.flush(childFields, state); // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, @@ -64,8 +82,20 @@ @Override public void abort() { - fieldsWriter.abort(); - consumer.abort(); + for(int i=0;i fields() { + Collection fields = new HashSet(); + for(int i=0;i fieldHash.length; + + final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; + + // Rehash + int newHashMask = newHashSize-1; + for(int j=0;j docFields = doc.getFields(); + final int numDocFields = docFields.size(); + + // Absorb any new fields first seen in this document. + // Also absorb any changes to fields we had already + // seen before (eg suddenly turning on norms or + // vectors, etc.): + + for(int i=0;i= fieldHash.length/2) + rehash(); + } else { + fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), + field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), + field.getOmitNorms(), false, field.getOmitTermFreqAndPositions()); + } + + if (thisFieldGen != fp.lastGen) { + + // First time we're seeing this field for this doc + fp.fieldCount = 0; + + if (fieldCount == fields.length) { + final int newSize = fields.length*2; + DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; + System.arraycopy(fields, 0, newArray, 0, fieldCount); + fields = newArray; + } + + fields[fieldCount++] = fp; + fp.lastGen = thisFieldGen; + } + + fp.addField(field); + + if (field.isStored()) { + fieldsWriter.addField(field, fp.fieldInfo); + } + } + + // If we are writing vectors then we must visit + // fields in sorted order so they are written in + // sorted order. TODO: we actually only need to + // sort the subset of fields that have vectors + // enabled; we could save [small amount of] CPU + // here. + quickSort(fields, 0, fieldCount-1); + + for(int i=0;i= hi) + return; + else if (hi == 1+lo) { + if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + final DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[hi]; + array[hi] = tmp; + } + return; + } + + int mid = (lo + hi) >>> 1; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp; + } + + if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp = array[mid]; + array[mid] = array[hi]; + array[hi] = tmp; + + if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) { + DocFieldProcessorPerField tmp2 = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return; + + DocFieldProcessorPerField partition = array[mid]; + + for (; ;) { + while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0) + --right; + + while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0) + ++left; + + if (left < right) { + DocFieldProcessorPerField tmp = array[left]; + array[left] = array[right]; + array[right] = tmp; + --right; + } else { + break; + } + } + + quickSort(array, lo, left); + quickSort(array, left + 1, hi); + } } Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java (working copy) @@ -18,6 +18,8 @@ */ import org.apache.lucene.document.Fieldable; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; /** * Holds all per thread, per field state. @@ -34,11 +36,22 @@ int fieldCount; Fieldable[] fields = new Fieldable[1]; - public DocFieldProcessorPerField(final DocFieldProcessorPerThread perThread, final FieldInfo fieldInfo) { - this.consumer = perThread.consumer.addField(fieldInfo); + public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) { + this.consumer = docFieldProcessor.consumer.addField(fieldInfo); this.fieldInfo = fieldInfo; } + public void addField(Fieldable field) { + if (fieldCount == fields.length) { + int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + Fieldable[] newArray = new Fieldable[newSize]; + System.arraycopy(fields, 0, newArray, 0, fieldCount); + fields = newArray; + } + + fields[fieldCount++] = field; + } + public void abort() { consumer.abort(); } Index: lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (working copy) @@ -1,307 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.io.IOException; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -/** - * Gathers all Fieldables for a document under the same - * name, updates FieldInfos, and calls per-field consumers - * to process field by field. - * - * Currently, only a single thread visits the fields, - * sequentially, for processing. - */ - -final class DocFieldProcessorPerThread extends DocConsumerPerThread { - - float docBoost; - int fieldGen; - final DocFieldProcessor docFieldProcessor; - final DocFieldConsumerPerThread consumer; - - // Holds all fields seen in current doc - DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; - int fieldCount; - - // Hash table for all fields seen in current segment - DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; - int hashMask = 1; - int totalFieldCount; - - final StoredFieldsWriterPerThread fieldsWriter; - - final DocumentsWriter.DocState docState; - - public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException { - this.docState = threadState.docState; - this.docFieldProcessor = docFieldProcessor; - this.consumer = docFieldProcessor.consumer.addThread(this); - fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState); - } - - @Override - public void abort() { - for(int i=0;i fields() { - Collection fields = new HashSet(); - for(int i=0;i fieldHash.length; - - final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize]; - - // Rehash - int newHashMask = newHashSize-1; - for(int j=0;j docFields = doc.getFields(); - final int numDocFields = docFields.size(); - - // Absorb any new fields first seen in this document. - // Also absorb any changes to fields we had already - // seen before (eg suddenly turning on norms or - // vectors, etc.): - - for(int i=0;i= fieldHash.length/2) - rehash(); - } else { - fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), - field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false, field.getOmitTermFreqAndPositions()); - } - if (thisFieldGen != fp.lastGen) { - - // First time we're seeing this field for this doc - fp.fieldCount = 0; - - if (fieldCount == fields.length) { - final int newSize = fields.length*2; - DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize]; - System.arraycopy(fields, 0, newArray, 0, fieldCount); - fields = newArray; - } - - fields[fieldCount++] = fp; - fp.lastGen = thisFieldGen; - } - - if (fp.fieldCount == fp.fields.length) { - Fieldable[] newArray = new Fieldable[fp.fields.length*2]; - System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount); - fp.fields = newArray; - } - - fp.fields[fp.fieldCount++] = field; - if (field.isStored()) { - fieldsWriter.addField(field, fp.fieldInfo); - } - } - - // If we are writing vectors then we must visit - // fields in sorted order so they are written in - // sorted order. TODO: we actually only need to - // sort the subset of fields that have vectors - // enabled; we could save [small amount of] CPU - // here. - ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp); - - for(int i=0;i fieldsComp = new Comparator() { - public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) { - return o1.fieldInfo.name.compareTo(o2.fieldInfo.name); - } - }; - - PerDoc[] docFreeList = new PerDoc[1]; - int freeCount; - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else - return docFreeList[--freeCount]; - } - - synchronized void freePerDoc(PerDoc perDoc) { - assert freeCount < docFreeList.length; - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - - DocumentsWriter.DocWriter one; - DocumentsWriter.DocWriter two; - - @Override - public long sizeInBytes() { - return one.sizeInBytes() + two.sizeInBytes(); - } - - @Override - public void finish() throws IOException { - try { - try { - one.finish(); - } finally { - two.finish(); - } - } finally { - freePerDoc(this); - } - } - - @Override - public void abort() { - try { - try { - one.abort(); - } finally { - two.abort(); - } - } finally { - freePerDoc(this); - } - } - } -} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/DocInverter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocInverter.java (working copy) @@ -18,13 +18,14 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; - import java.util.Map; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.util.AttributeSource; + /** This is a DocFieldConsumer that inverts each field, * separately, from a Document, and accepts a * InvertedTermsConsumer to process those terms. */ @@ -34,42 +35,72 @@ final InvertedDocConsumer consumer; final InvertedDocEndConsumer endConsumer; - public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + final DocumentsWriterPerThread.DocState docState; + + final FieldInvertState fieldState = new FieldInvertState(); + + final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); + + static class SingleTokenAttributeSource extends AttributeSource { + final CharTermAttribute termAttribute; + final OffsetAttribute offsetAttribute; + + private SingleTokenAttributeSource() { + termAttribute = addAttribute(CharTermAttribute.class); + offsetAttribute = addAttribute(OffsetAttribute.class); + } + + public void reinit(String stringValue, int startOffset, int endOffset) { + termAttribute.setEmpty().append(stringValue); + offsetAttribute.setOffset(startOffset, endOffset); + } + } + + // Used to read a string value for a field + final ReusableStringReader stringReader = new ReusableStringReader(); + + public DocInverter(DocumentsWriterPerThread.DocState docState, InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) { + this.docState = docState; this.consumer = consumer; this.endConsumer = endConsumer; } @Override - void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> endChildThreadsAndFields = new HashMap>(); + Map childFieldsToFlush = new HashMap(); + Map endChildFieldsToFlush = new HashMap(); - for (Map.Entry> entry : threadsAndFields.entrySet() ) { + for (Map.Entry fieldToFlush : fieldsToFlush.entrySet()) { + DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue(); + childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer); + endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer); + } + consumer.flush(childFieldsToFlush, state); + endConsumer.flush(endChildFieldsToFlush, state); + } - DocInverterPerThread perThread = (DocInverterPerThread) entry.getKey(); + @Override + public void startDocument() throws IOException { + consumer.startDocument(); + endConsumer.startDocument(); + } - Collection childFields = new HashSet(); - Collection endChildFields = new HashSet(); - for (final DocFieldConsumerPerField field: entry.getValue() ) { - DocInverterPerField perField = (DocInverterPerField) field; - childFields.add(perField.consumer); - endChildFields.add(perField.endConsumer); - } - - childThreadsAndFields.put(perThread.consumer, childFields); - endChildThreadsAndFields.put(perThread.endConsumer, endChildFields); - } - - consumer.flush(childThreadsAndFields, state); - endConsumer.flush(endChildThreadsAndFields, state); + public void finishDocument() throws IOException { + // TODO: allow endConsumer.finishDocument to also return + // a DocWriter + endConsumer.finishDocument(); + consumer.finishDocument(); } @Override void abort() { - consumer.abort(); - endConsumer.abort(); + try { + consumer.abort(); + } finally { + endConsumer.abort(); + } } @Override @@ -78,7 +109,8 @@ } @Override - public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) { - return new DocInverterPerThread(docFieldProcessorPerThread, this); + public DocFieldConsumerPerField addField(FieldInfo fi) { + return new DocInverterPerField(this, fi); } + } Index: lucene/src/java/org/apache/lucene/index/DocInverterPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) @@ -35,20 +35,20 @@ final class DocInverterPerField extends DocFieldConsumerPerField { - final private DocInverterPerThread perThread; - final private FieldInfo fieldInfo; + final private DocInverter parent; + final FieldInfo fieldInfo; final InvertedDocConsumerPerField consumer; final InvertedDocEndConsumerPerField endConsumer; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; - public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) { - this.perThread = perThread; + public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) { + this.parent = parent; this.fieldInfo = fieldInfo; - docState = perThread.docState; - fieldState = perThread.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); - this.endConsumer = perThread.endConsumer.addField(this, fieldInfo); + docState = parent.docState; + fieldState = parent.fieldState; + this.consumer = parent.consumer.addField(this, fieldInfo); + this.endConsumer = parent.endConsumer.addField(this, fieldInfo); } @Override @@ -80,8 +80,8 @@ if (!field.isTokenized()) { // un-tokenized field String stringValue = field.stringValue(); final int valueLength = stringValue.length(); - perThread.singleToken.reinit(stringValue, 0, valueLength); - fieldState.attributeSource = perThread.singleToken; + parent.singleToken.reinit(stringValue, 0, valueLength); + fieldState.attributeSource = parent.singleToken; consumer.start(field); boolean success = false; @@ -89,8 +89,9 @@ consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.offset += valueLength; fieldState.length++; @@ -114,8 +115,8 @@ if (stringValue == null) { throw new IllegalArgumentException("field must have either TokenStream, String or Reader value"); } - perThread.stringReader.init(stringValue); - reader = perThread.stringReader; + parent.stringReader.init(stringValue); + reader = parent.stringReader; } // Tokenize field and add to postingTable @@ -166,8 +167,9 @@ consumer.add(); success = true; } finally { - if (!success) + if (!success) { docState.docWriter.setAborting(); + } } fieldState.length++; fieldState.position++; @@ -195,4 +197,9 @@ consumer.finish(); endConsumer.finish(); } + + @Override + FieldInfo getFieldInfo() { + return fieldInfo; + } } Index: lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java (working copy) @@ -1,92 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -/** This is a DocFieldConsumer that inverts each field, - * separately, from a Document, and accepts a - * InvertedTermsConsumer to process those terms. */ - -final class DocInverterPerThread extends DocFieldConsumerPerThread { - final DocInverter docInverter; - final InvertedDocConsumerPerThread consumer; - final InvertedDocEndConsumerPerThread endConsumer; - final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource(); - - static class SingleTokenAttributeSource extends AttributeSource { - final CharTermAttribute termAttribute; - final OffsetAttribute offsetAttribute; - - private SingleTokenAttributeSource() { - termAttribute = addAttribute(CharTermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - } - - public void reinit(String stringValue, int startOffset, int endOffset) { - termAttribute.setEmpty().append(stringValue); - offsetAttribute.setOffset(startOffset, endOffset); - } - } - - final DocumentsWriter.DocState docState; - - final FieldInvertState fieldState = new FieldInvertState(); - - // Used to read a string value for a field - final ReusableStringReader stringReader = new ReusableStringReader(); - - public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter) { - this.docInverter = docInverter; - docState = docFieldProcessorPerThread.docState; - consumer = docInverter.consumer.addThread(this); - endConsumer = docInverter.endConsumer.addThread(this); - } - - @Override - public void startDocument() throws IOException { - consumer.startDocument(); - endConsumer.startDocument(); - } - - @Override - public DocumentsWriter.DocWriter finishDocument() throws IOException { - // TODO: allow endConsumer.finishDocument to also return - // a DocWriter - endConsumer.finishDocument(); - return consumer.finishDocument(); - } - - @Override - void abort() { - try { - consumer.abort(); - } finally { - endConsumer.abort(); - } - } - - @Override - public DocFieldConsumerPerField addField(FieldInfo fi) { - return new DocInverterPerField(this, fi); - } -} Index: lucene/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -19,36 +19,27 @@ import java.io.IOException; import java.io.PrintStream; -import java.text.NumberFormat; -import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; import java.util.List; -import java.util.concurrent.atomic.AtomicLong; +import java.util.Queue; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; +import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.search.Query; import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMFile; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BitVector; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.RecyclingByteBlockAllocator; -import org.apache.lucene.util.ThreadInterruptedException; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE; - /** * This class accepts multiple added documents and directly - * writes a single segment file. It does this more - * efficiently than creating a single segment per document - * (with DocumentWriter) and doing standard merges on those - * segments. + * writes segment files. * * Each added document is passed to the {@link DocConsumer}, * which in turn processes the document and interacts with @@ -111,266 +102,117 @@ */ final class DocumentsWriter { - final AtomicLong bytesUsed = new AtomicLong(0); - IndexWriter writer; Directory directory; - String segment; // Current segment we are working on + private volatile boolean closed; - private int nextDocID; // Next docID to be added - private int numDocs; // # of docs added, but not yet flushed - - // Max # ThreadState instances; if there are more threads - // than this they share ThreadStates - private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0]; - private final HashMap threadBindings = new HashMap(); - - boolean bufferIsFull; // True when it's time to write segment - private boolean aborting; // True if an abort is pending - PrintStream infoStream; SimilarityProvider similarityProvider; - // max # simultaneous threads; if there are more than - // this, they wait for others to finish first - private final int maxThreadStates; + List newFiles; - // TODO: cutover to BytesRefHash - // Deletes for our still-in-RAM (to be flushed next) segment - private BufferedDeletes pendingDeletes = new BufferedDeletes(false); - - static class DocState { - DocumentsWriter docWriter; - Analyzer analyzer; - PrintStream infoStream; - SimilarityProvider similarityProvider; - int docID; - Document doc; - String maxTermPrefix; + final IndexWriter indexWriter; - // Only called by asserts - public boolean testPoint(String name) { - return docWriter.writer.testPoint(name); - } + private AtomicInteger numDocsInRAM = new AtomicInteger(0); - public void clear() { - // don't hold onto doc nor analyzer, in case it is - // largish: - doc = null; - analyzer = null; - } - } + // TODO: cut over to BytesRefHash in BufferedDeletes + volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue(); + private final Queue ticketQueue = new LinkedList(); - /** Consumer returns this on each doc. This holds any - * state that must be flushed synchronized "in docID - * order". We gather these and flush them in order. */ - abstract static class DocWriter { - DocWriter next; - int docID; - abstract void finish() throws IOException; - abstract void abort(); - abstract long sizeInBytes(); + private Collection abortedFiles; // List of files that were written before last abort() - void setNext(DocWriter next) { - this.next = next; - } - } + final IndexingChain chain; - /** - * Create and return a new DocWriterBuffer. - */ - PerDocBuffer newPerDocBuffer() { - return new PerDocBuffer(); - } - - /** - * RAMFile buffer for DocWriters. - */ - class PerDocBuffer extends RAMFile { - - /** - * Allocate bytes used from shared pool. - */ - @Override - protected byte[] newBuffer(int size) { - assert size == PER_DOC_BLOCK_SIZE; - return perDocAllocator.getByteBlock(); + final DocumentsWriterPerThreadPool perThreadPool; + final FlushPolicy flushPolicy; + final DocumentsWriterFlushControl flushControl; + final Healthiness healthiness; + DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers, + BufferedDeletesStream bufferedDeletesStream) throws IOException { + this.directory = directory; + this.indexWriter = writer; + this.similarityProvider = config.getSimilarityProvider(); + this.perThreadPool = config.getIndexerThreadPool(); + this.chain = config.getIndexingChain(); + this.perThreadPool.initialize(this, globalFieldNumbers, config); + final FlushPolicy configuredPolicy = config.getFlushPolicy(); + if (configuredPolicy == null) { + flushPolicy = new FlushByRamOrCountsPolicy(); + } else { + flushPolicy = configuredPolicy; } + flushPolicy.init(this); - /** - * Recycle the bytes used. - */ - synchronized void recycle() { - if (buffers.size() > 0) { - setLength(0); - - // Recycle the blocks - perDocAllocator.recycleByteBlocks(buffers); - buffers.clear(); - sizeInBytes = 0; - - assert numBuffers() == 0; - } - } + healthiness = new Healthiness(); + final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024; + flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT); } - - /** - * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method - * which returns the DocConsumer that the DocumentsWriter calls to process the - * documents. - */ - abstract static class IndexingChain { - abstract DocConsumer getChain(DocumentsWriter documentsWriter); - } - - static final IndexingChain defaultIndexingChain = new IndexingChain() { - @Override - DocConsumer getChain(DocumentsWriter documentsWriter) { - /* - This is the current indexing chain: - - DocConsumer / DocConsumerPerThread - --> code: DocFieldProcessor / DocFieldProcessorPerThread - --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField - --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField - --> code: DocInverter / DocInverterPerThread / DocInverterPerField - --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: TermsHash / TermsHashPerThread / TermsHashPerField - --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField - --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField - --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField - --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField - --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField - */ - - // Build up indexing chain: - - final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); - final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); - /* - * nesting TermsHash instances here to allow the secondary (TermVectors) share the interned postings - * via a shared ByteBlockPool. See TermsHashPerField for details. - */ - final TermsHash termVectorsTermHash = new TermsHash(documentsWriter, false, termVectorsWriter, null); - final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, termVectorsTermHash); - final NormsWriter normsWriter = new NormsWriter(); - final DocInverter docInverter = new DocInverter(termsHash, normsWriter); - return new DocFieldProcessor(documentsWriter, docInverter); + synchronized void deleteQueries(final Query... queries) throws IOException { + deleteQueue.addDelete(queries); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - }; - - final DocConsumer consumer; - - // How much RAM we can use before flushing. This is 0 if - // we are flushing by doc count instead. - - private final IndexWriterConfig config; - - private boolean closed; - private FieldInfos fieldInfos; - - private final BufferedDeletesStream bufferedDeletesStream; - private final IndexWriter.FlushControl flushControl; - - DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, IndexingChain indexingChain, FieldInfos fieldInfos, - BufferedDeletesStream bufferedDeletesStream) throws IOException { - this.directory = directory; - this.writer = writer; - this.similarityProvider = config.getSimilarityProvider(); - this.maxThreadStates = config.getMaxThreadStates(); - this.fieldInfos = fieldInfos; - this.bufferedDeletesStream = bufferedDeletesStream; - flushControl = writer.flushControl; - consumer = config.getIndexingChain().getChain(this); - this.config = config; } - // Buffer a specific docID for deletion. Currently only - // used when we hit a exception when adding a document - synchronized void deleteDocID(int docIDUpto) { - pendingDeletes.addDocID(docIDUpto); - // NOTE: we do not trigger flush here. This is - // potentially a RAM leak, if you have an app that tries - // to add docs but every single doc always hits a - // non-aborting exception. Allowing a flush here gets - // very messy because we are only invoked when handling - // exceptions so to do this properly, while handling an - // exception we'd have to go off and flush new deletes - // which is risky (likely would hit some other - // confounding exception). - } - - boolean deleteQueries(Query... queries) { - final boolean doFlush = flushControl.waitUpdate(0, queries.length); - synchronized(this) { - for (Query query : queries) { - pendingDeletes.addQuery(query, numDocs); - } + // TODO: we could check w/ FreqProxTermsWriter: if the + // term doesn't exist, don't bother buffering into the + // per-DWPT map (but still must go into the global map) + synchronized void deleteTerms(final Term... terms) throws IOException { + final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue; + deleteQueue.addDelete(terms); + flushControl.doOnDelete(); + if (flushControl.doApplyAllDeletes()) { + applyAllDeletes(deleteQueue); } - return doFlush; } - - boolean deleteQuery(Query query) { - final boolean doFlush = flushControl.waitUpdate(0, 1); - synchronized(this) { - pendingDeletes.addQuery(query, numDocs); - } - return doFlush; + + DocumentsWriterDeleteQueue currentDeleteSession() { + return deleteQueue; } - boolean deleteTerms(Term... terms) { - final boolean doFlush = flushControl.waitUpdate(0, terms.length); - synchronized(this) { - for (Term term : terms) { - pendingDeletes.addTerm(term, numDocs); + private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException { + if (deleteQueue != null) { + synchronized (ticketQueue) { + // Freeze and insert the delete flush ticket in the queue + ticketQueue.add(new FlushTicket(deleteQueue.freezeGlobalBuffer(null), false)); + applyFlushTickets(); } } - return doFlush; + indexWriter.applyAllDeletes(); + indexWriter.flushCount.incrementAndGet(); } - // TODO: we could check w/ FreqProxTermsWriter: if the - // term doesn't exist, don't bother buffering into the - // per-DWPT map (but still must go into the global map) - boolean deleteTerm(Term term, boolean skipWait) { - final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait); - synchronized(this) { - pendingDeletes.addTerm(term, numDocs); - } - return doFlush; - } - - /** If non-null, various details of indexing are printed - * here. */ synchronized void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; - for(int i=0;i it = perThreadPool.getAllPerThreadsIterator(); + while (it.hasNext()) { + it.next().perThread.docState.infoStream = infoStream; } } - /** Get current segment name we are writing. */ - synchronized String getSegment() { - return segment; + /** Returns how many docs are currently buffered in RAM. */ + int getNumDocs() { + return numDocsInRAM.get(); } - /** Returns how many docs are currently buffered in RAM. */ - synchronized int getNumDocs() { - return numDocs; + Collection abortedFiles() { + return abortedFiles; } - void message(String message) { + // returns boolean for asserts + boolean message(String message) { if (infoStream != null) { - writer.message("DW: " + message); + indexWriter.message("DW: " + message); } + return true; } - synchronized void setAborting() { - if (infoStream != null) { - message("setAborting"); + private void ensureOpen() throws AlreadyClosedException { + if (closed) { + throw new AlreadyClosedException("this IndexWriter is closed"); } - aborting = true; } /** Called if we hit an exception at a bad time (when @@ -378,816 +220,335 @@ * currently buffered docs. This resets our state, * discarding any docs added since last flush. */ synchronized void abort() throws IOException { - if (infoStream != null) { - message("docWriter: abort"); - } - boolean success = false; - try { + synchronized (this) { + deleteQueue.clear(); + } - // Forcefully remove waiting ThreadStates from line - waitQueue.abort(); - - // Wait for all other threads to finish with - // DocumentsWriter: - waitIdle(); - + try { if (infoStream != null) { - message("docWriter: abort waitIdle done"); + message("docWriter: abort"); } - assert 0 == waitQueue.numWaiting: "waitQueue.numWaiting=" + waitQueue.numWaiting; + final Iterator threadsIterator = perThreadPool.getActivePerThreadsIterator(); - waitQueue.waitingBytes = 0; - - pendingDeletes.clear(); - - for (DocumentsWriterThreadState threadState : threadStates) + while (threadsIterator.hasNext()) { + ThreadState perThread = threadsIterator.next(); + perThread.lock(); try { - threadState.consumer.abort(); - } catch (Throwable t) { + if (perThread.isActive()) { // we might be closed + perThread.perThread.abort(); + perThread.perThread.checkAndResetHasAborted(); + } else { + assert closed; + } + } finally { + perThread.unlock(); } - - try { - consumer.abort(); - } catch (Throwable t) { } - // Reset all postings data - doAfterFlush(); success = true; } finally { - aborting = false; - notifyAll(); if (infoStream != null) { - message("docWriter: done abort; success=" + success); + message("docWriter: done abort; abortedFiles=" + abortedFiles + " success=" + success); } } } - /** Reset after a flush */ - private void doAfterFlush() throws IOException { - // All ThreadStates should be idle when we are called - assert allThreadsIdle(); - for (DocumentsWriterThreadState threadState : threadStates) { - threadState.consumer.doAfterFlush(); - } - - threadBindings.clear(); - waitQueue.reset(); - segment = null; - fieldInfos = new FieldInfos(fieldInfos); - numDocs = 0; - nextDocID = 0; - bufferIsFull = false; - for(int i=0;i BD - final long delGen = bufferedDeletesStream.getNextGen(); - if (pendingDeletes.any()) { - if (segmentInfos.size() > 0 || newSegment != null) { - final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen); - if (infoStream != null) { - message("flush: push buffered deletes startSize=" + pendingDeletes.bytesUsed.get() + " frozenSize=" + packet.bytesUsed); - } - bufferedDeletesStream.push(packet); - if (infoStream != null) { - message("flush: delGen=" + packet.gen); - } - if (newSegment != null) { - newSegment.setBufferedDeletesGen(packet.gen); - } - } else { - if (infoStream != null) { - message("flush: drop buffered deletes: no segments"); - } - // We can safely discard these deletes: since - // there are no segments, the deletions cannot - // affect anything. - } - pendingDeletes.clear(); - } else if (newSegment != null) { - newSegment.setBufferedDeletesGen(delGen); - } + void close() { + closed = true; + flushControl.setClosed(); } - public boolean anyDeletions() { - return pendingDeletes.any(); - } + boolean updateDocument(final Document doc, final Analyzer analyzer, + final Term delTerm) throws CorruptIndexException, IOException { + ensureOpen(); + boolean maybeMerge = false; + final boolean isUpdate = delTerm != null; + if (healthiness.anyStalledThreads()) { - /** Flush all pending docs to a new segment */ - // Lock order: IW -> DW - synchronized SegmentInfo flush(IndexWriter writer, IndexFileDeleter deleter, MergePolicy mergePolicy, SegmentInfos segmentInfos) throws IOException { - - final long startTime = System.currentTimeMillis(); - - // We change writer's segmentInfos: - assert Thread.holdsLock(writer); - - waitIdle(); - - if (numDocs == 0) { - // nothing to do! + // Help out flushing any pending DWPTs so we can un-stall: if (infoStream != null) { - message("flush: no docs; skipping"); + message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)"); } - // Lock order: IW -> DW -> BD - pushDeletes(null, segmentInfos); - return null; - } - if (aborting) { - if (infoStream != null) { - message("flush: skip because aborting is set"); - } - return null; - } - - boolean success = false; - - SegmentInfo newSegment; - - try { - assert nextDocID == numDocs; - assert waitQueue.numWaiting == 0; - assert waitQueue.waitingBytes == 0; - - if (infoStream != null) { - message("flush postings as segment " + segment + " numDocs=" + numDocs); - } - - final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos, - numDocs, writer.getConfig().getTermIndexInterval(), - fieldInfos.buildSegmentCodecs(true), - pendingDeletes); - // Apply delete-by-docID now (delete-byDocID only - // happens when an exception is hit processing that - // doc, eg if analyzer has some problem w/ the text): - if (pendingDeletes.docIDs.size() > 0) { - flushState.deletedDocs = new BitVector(numDocs); - for(int delDocID : pendingDeletes.docIDs) { - flushState.deletedDocs.set(delDocID); + // Try pick up pending threads here if possible + DocumentsWriterPerThread flushingDWPT; + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + // Don't push the delete here since the update could fail! + maybeMerge = doFlush(flushingDWPT); + if (!healthiness.anyStalledThreads()) { + break; } - pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID); - pendingDeletes.docIDs.clear(); } - newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos); - - Collection threads = new HashSet(); - for (DocumentsWriterThreadState threadState : threadStates) { - threads.add(threadState.consumer); + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter still has stalled threads; waiting"); } - double startMBUsed = bytesUsed()/1024./1024.; + healthiness.waitIfStalled(); // block if stalled - consumer.flush(threads, flushState); - - newSegment.setHasVectors(flushState.hasVectors); - - if (infoStream != null) { - message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors")); - if (flushState.deletedDocs != null) { - message("new segment has " + flushState.deletedDocs.count() + " deleted docs"); - } - message("flushedFiles=" + newSegment.files()); - message("flushed codecs=" + newSegment.getSegmentCodecs()); + if (infoStream != null && healthiness.anyStalledThreads()) { + message("WARNING DocumentsWriter done waiting"); } + } - if (mergePolicy.useCompoundFile(segmentInfos, newSegment)) { - final String cfsFileName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(), + this, doc); + final DocumentsWriterPerThread flushingDWPT; + + try { - if (infoStream != null) { - message("flush: create compound file \"" + cfsFileName + "\""); - } - - CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, cfsFileName); - for(String fileName : newSegment.files()) { - cfsWriter.addFile(fileName); - } - cfsWriter.close(); - deleter.deleteNewFiles(newSegment.files()); - newSegment.setUseCompoundFile(true); + if (!perThread.isActive()) { + ensureOpen(); + assert false: "perThread is not active but we are still open"; } - - // Must write deleted docs after the CFS so we don't - // slurp the del file into CFS: - if (flushState.deletedDocs != null) { - final int delCount = flushState.deletedDocs.count(); - assert delCount > 0; - newSegment.setDelCount(delCount); - newSegment.advanceDelGen(); - final String delFileName = newSegment.getDelFileName(); - if (infoStream != null) { - message("flush: write " + delCount + " deletes to " + delFileName); + + final DocumentsWriterPerThread dwpt = perThread.perThread; + try { + dwpt.updateDocument(doc, analyzer, delTerm); + numDocsInRAM.incrementAndGet(); + } finally { + if (dwpt.checkAndResetHasAborted()) { + flushControl.doOnAbort(perThread); } - boolean success2 = false; - try { - // TODO: in the NRT case it'd be better to hand - // this del vector over to the - // shortly-to-be-opened SegmentReader and let it - // carry the changes; there's no reason to use - // filesystem as intermediary here. - flushState.deletedDocs.write(directory, delFileName); - success2 = true; - } finally { - if (!success2) { - try { - directory.deleteFile(delFileName); - } catch (Throwable t) { - // suppress this so we keep throwing the - // original exception - } - } - } } - - if (infoStream != null) { - message("flush: segment=" + newSegment); - final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.; - final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.; - message(" ramUsed=" + nf.format(startMBUsed) + " MB" + - " newFlushedSize=" + nf.format(newSegmentSize) + " MB" + - " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" + - " docs/MB=" + nf.format(numDocs / newSegmentSize) + - " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%"); - } - - success = true; + flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); } finally { - notifyAll(); - if (!success) { - if (segment != null) { - deleter.refresh(segment); - } - abort(); - } + perThread.unlock(); } - - doAfterFlush(); - - // Lock order: IW -> DW -> BD - pushDeletes(newSegment, segmentInfos); - if (infoStream != null) { - message("flush time " + (System.currentTimeMillis()-startTime) + " msec"); - } - - return newSegment; - } - - synchronized void close() { - closed = true; - notifyAll(); - } - - /** Returns a free (idle) ThreadState that may be used for - * indexing this one document. This call also pauses if a - * flush is pending. If delTerm is non-null then we - * buffer this deleted term after the thread state has - * been acquired. */ - synchronized DocumentsWriterThreadState getThreadState(Document doc, Term delTerm) throws IOException { - - final Thread currentThread = Thread.currentThread(); - assert !Thread.holdsLock(writer); - - // First, find a thread state. If this thread already - // has affinity to a specific ThreadState, use that one - // again. - DocumentsWriterThreadState state = threadBindings.get(currentThread); - if (state == null) { - - // First time this thread has called us since last - // flush. Find the least loaded thread state: - DocumentsWriterThreadState minThreadState = null; - for(int i=0;i= maxThreadStates)) { - state = minThreadState; - state.numThreads++; - } else { - // Just create a new "private" thread state - DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1+threadStates.length]; - if (threadStates.length > 0) { - System.arraycopy(threadStates, 0, newArray, 0, threadStates.length); - } - state = newArray[threadStates.length] = new DocumentsWriterThreadState(this); - threadStates = newArray; - } - threadBindings.put(currentThread, state); } - - // Next, wait until my thread state is idle (in case - // it's shared with other threads), and no flush/abort - // pending - waitReady(state); - - // Allocate segment name if this is the first doc since - // last flush: - if (segment == null) { - segment = writer.newSegmentName(); - assert numDocs == 0; - } - - state.docState.docID = nextDocID++; - - if (delTerm != null) { - pendingDeletes.addTerm(delTerm, state.docState.docID); - } - - numDocs++; - state.isIdle = false; - return state; + return maybeMerge; } - - boolean addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - return updateDocument(doc, analyzer, null); - } - - boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm) - throws CorruptIndexException, IOException { - // Possibly trigger a flush, or wait until any running flush completes: - boolean doFlush = flushControl.waitUpdate(1, delTerm != null ? 1 : 0); - - // This call is synchronized but fast - final DocumentsWriterThreadState state = getThreadState(doc, delTerm); - - final DocState docState = state.docState; - docState.doc = doc; - docState.analyzer = analyzer; - - boolean success = false; - try { - // This call is not synchronized and does all the - // work - final DocWriter perDoc; + private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException { + boolean maybeMerge = false; + while (flushingDWPT != null) { + maybeMerge = true; + boolean success = false; + FlushTicket ticket = null; + try { - perDoc = state.consumer.processDocument(fieldInfos); - } finally { - docState.clear(); - } - - // This call is synchronized but fast - finishDocument(state, perDoc); - - success = true; - } finally { - if (!success) { - - // If this thread state had decided to flush, we - // must clear it so another thread can flush - if (doFlush) { - flushControl.clearFlushPending(); - } - - if (infoStream != null) { - message("exception in updateDocument aborting=" + aborting); - } - - synchronized(this) { - - state.isIdle = true; - notifyAll(); - - if (aborting) { - abort(); - } else { - skipDocWriter.docID = docState.docID; - boolean success2 = false; - try { - waitQueue.add(skipDocWriter); - success2 = true; - } finally { - if (!success2) { - abort(); - return false; - } + assert currentFullFlushDelQueue == null + || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: " + + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue + + " " + flushControl.isFullFlush(); + /* + * Since with DWPT the flush process is concurrent and several DWPT + * could flush at the same time we must maintain the order of the + * flushes before we can apply the flushed segment and the frozen global + * deletes it is buffering. The reason for this is that the global + * deletes mark a certain point in time where we took a DWPT out of + * rotation and freeze the global deletes. + * + * Example: A flush 'A' starts and freezes the global deletes, then + * flush 'B' starts and freezes all deletes occurred since 'A' has + * started. if 'B' finishes before 'A' we need to wait until 'A' is done + * otherwise the deletes frozen by 'B' are not applied to 'A' and we + * might miss to deletes documents in 'A'. + */ + try { + synchronized (ticketQueue) { + // Each flush is assigned a ticket in the order they accquire the ticketQueue lock + ticket = new FlushTicket(flushingDWPT.prepareFlush(), true); + ticketQueue.add(ticket); + } + + // flush concurrently without locking + final FlushedSegment newSegment = flushingDWPT.flush(); + synchronized (ticketQueue) { + ticket.segment = newSegment; + } + // flush was successful once we reached this point - new seg. has been assigned to the ticket! + success = true; + } finally { + if (!success && ticket != null) { + synchronized (ticketQueue) { + // In the case of a failure make sure we are making progress and + // apply all the deletes since the segment flush failed since the flush + // ticket could hold global deletes see FlushTicket#canPublish() + ticket.isSegmentFlush = false; } - - // Immediately mark this document as deleted - // since likely it was partially added. This - // keeps indexing as "all or none" (atomic) when - // adding a document: - deleteDocID(state.docState.docID); } } + /* + * Now we are done and try to flush the ticket queue if the head of the + * queue has already finished the flush. + */ + applyFlushTickets(); + } finally { + flushControl.doAfterFlush(flushingDWPT); + flushingDWPT.checkAndResetHasAborted(); + indexWriter.flushCount.incrementAndGet(); } + + flushingDWPT = flushControl.nextPendingFlush(); } - - doFlush |= flushControl.flushByRAMUsage("new document"); - - return doFlush; + return maybeMerge; } - public synchronized void waitIdle() { - while (!allThreadsIdle()) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + private void applyFlushTickets() throws IOException { + synchronized (ticketQueue) { + while (true) { + // Keep publishing eligible flushed segments: + final FlushTicket head = ticketQueue.peek(); + if (head != null && head.canPublish()) { + ticketQueue.poll(); + finishFlush(head.segment, head.frozenDeletes); + } else { + break; + } } } } - synchronized void waitReady(DocumentsWriterThreadState state) { - while (!closed && (!state.isIdle || aborting)) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + private void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes) + throws IOException { + // Finish the flushed segment and publish it to IndexWriter + if (newSegment == null) { + assert bufferedDeletes != null; + if (bufferedDeletes != null && bufferedDeletes.any()) { + indexWriter.bufferedDeletesStream.push(bufferedDeletes); + if (infoStream != null) { + message("flush: push buffered deletes: " + bufferedDeletes); + } } + } else { + publishFlushedSegment(newSegment, bufferedDeletes); } - - if (closed) { - throw new AlreadyClosedException("this IndexWriter is closed"); - } } - /** Does the synchronized work to finish/flush the - * inverted document. */ - private void finishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter) throws IOException { - - // Must call this w/o holding synchronized(this) else - // we'll hit deadlock: - balanceRAM(); - - synchronized(this) { - - assert docWriter == null || docWriter.docID == perThread.docState.docID; - - if (aborting) { - - // We are currently aborting, and another thread is - // waiting for me to become idle. We just forcefully - // idle this threadState; it will be fully reset by - // abort() - if (docWriter != null) { - try { - docWriter.abort(); - } catch (Throwable t) { - } - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); - - return; - } - - final boolean doPause; - - if (docWriter != null) { - doPause = waitQueue.add(docWriter); - } else { - skipDocWriter.docID = perThread.docState.docID; - doPause = waitQueue.add(skipDocWriter); - } - - if (doPause) { - waitForWaitQueue(); - } - - perThread.isIdle = true; - - // wakes up any threads waiting on the wait queue - notifyAll(); + final void subtractFlushedNumDocs(int numFlushed) { + int oldValue = numDocsInRAM.get(); + while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) { + oldValue = numDocsInRAM.get(); } } - - synchronized void waitForWaitQueue() { - do { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); + + /** + * Publishes the flushed segment, segment private deletes (if any) and its + * associated global delete (if present) to IndexWriter. The actual + * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s + * delete generation is always GlobalPacket_deleteGeneration + 1 + */ + private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket) + throws IOException { + assert newSegment != null; + final SegmentInfo segInfo = indexWriter.prepareFlushedSegment(newSegment); + final BufferedDeletes deletes = newSegment.segmentDeletes; + FrozenBufferedDeletes packet = null; + if (deletes != null && deletes.any()) { + // Segment private delete + packet = new FrozenBufferedDeletes(deletes, true); + if (infoStream != null) { + message("flush: push buffered seg private deletes: " + packet); } - } while (!waitQueue.doResume()); - } - - private static class SkipDocWriter extends DocWriter { - @Override - void finish() { } - @Override - void abort() { - } - @Override - long sizeInBytes() { - return 0; - } - } - final SkipDocWriter skipDocWriter = new SkipDocWriter(); - NumberFormat nf = NumberFormat.getInstance(); - - /* Initial chunks size of the shared byte[] blocks used to - store postings data */ - final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; - - /* if you increase this, you must fix field cache impl for - * getTerms/getTermsIndex requires <= 32768. */ - final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2; - - /* Initial chunks size of the shared int[] blocks used to - store postings data */ - final static int INT_BLOCK_SHIFT = 13; - final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT; - final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1; - - private List freeIntBlocks = new ArrayList(); - - /* Allocate another int[] from the shared pool */ - synchronized int[] getIntBlock() { - final int size = freeIntBlocks.size(); - final int[] b; - if (0 == size) { - b = new int[INT_BLOCK_SIZE]; - bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT); - } else { - b = freeIntBlocks.remove(size-1); - } - return b; + // now publish! + indexWriter.publishFlushedSegment(segInfo, packet, globalPacket); } - - long bytesUsed() { - return bytesUsed.get() + pendingDeletes.bytesUsed.get(); + + // for asserts + private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null; + // for asserts + private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) { + currentFullFlushDelQueue = session; + return true; } + + /* + * FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a + * two stage operation; the caller must ensure (in try/finally) that finishFlush + * is called after this method, to release the flush lock in DWFlushControl + */ + final boolean flushAllThreads() + throws IOException { + final DocumentsWriterDeleteQueue flushingDeleteQueue; - /* Return int[]s to the pool */ - synchronized void recycleIntBlocks(int[][] blocks, int start, int end) { - for(int i=start;i= ramBufferSize; - } - - if (doBalance) { - - if (infoStream != null) { - message(" RAM: balance allocations: usedMB=" + toMB(bytesUsed()) + - " vs trigger=" + toMB(ramBufferSize) + - " deletesMB=" + toMB(deletesRAMUsed) + - " byteBlockFree=" + toMB(byteBlockAllocator.bytesUsed()) + - " perDocFree=" + toMB(perDocAllocator.bytesUsed())); + boolean anythingFlushed = false; + try { + DocumentsWriterPerThread flushingDWPT; + // Help out with flushing: + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + anythingFlushed |= doFlush(flushingDWPT); } - - final long startBytesUsed = bytesUsed() + deletesRAMUsed; - - int iter = 0; - - // We free equally from each pool in 32 KB - // chunks until we are below our threshold - // (freeLevel) - - boolean any = true; - - final long freeLevel = (long) (0.95 * ramBufferSize); - - while(bytesUsed()+deletesRAMUsed > freeLevel) { - - synchronized(this) { - if (0 == perDocAllocator.numBufferedBlocks() && - 0 == byteBlockAllocator.numBufferedBlocks() && - 0 == freeIntBlocks.size() && !any) { - // Nothing else to free -- must flush now. - bufferIsFull = bytesUsed()+deletesRAMUsed > ramBufferSize; - if (infoStream != null) { - if (bytesUsed()+deletesRAMUsed > ramBufferSize) { - message(" nothing to free; set bufferIsFull"); - } else { - message(" nothing to free"); - } - } - break; - } - - if ((0 == iter % 4) && byteBlockAllocator.numBufferedBlocks() > 0) { - byteBlockAllocator.freeBlocks(1); - } - if ((1 == iter % 4) && freeIntBlocks.size() > 0) { - freeIntBlocks.remove(freeIntBlocks.size()-1); - bytesUsed.addAndGet(-INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT); - } - if ((2 == iter % 4) && perDocAllocator.numBufferedBlocks() > 0) { - perDocAllocator.freeBlocks(32); // Remove upwards of 32 blocks (each block is 1K) - } + // If a concurrent flush is still in flight wait for it + while (flushControl.anyFlushing()) { + flushControl.waitForFlush(); + } + if (!anythingFlushed) { // apply deletes if we did not flush any document + synchronized (ticketQueue) { + ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false)); } - - if ((3 == iter % 4) && any) { - // Ask consumer to free any recycled state - any = consumer.freeRAM(); - } - - iter++; + applyFlushTickets(); } - - if (infoStream != null) { - message(" after free: freedMB=" + nf.format((startBytesUsed-bytesUsed()-deletesRAMUsed)/1024./1024.) + " usedMB=" + nf.format((bytesUsed()+deletesRAMUsed)/1024./1024.)); - } + } finally { + assert flushingDeleteQueue == currentFullFlushDelQueue; } + return anythingFlushed; } - - final WaitQueue waitQueue = new WaitQueue(); - - private class WaitQueue { - DocWriter[] waiting; - int nextWriteDocID; - int nextWriteLoc; - int numWaiting; - long waitingBytes; - - public WaitQueue() { - waiting = new DocWriter[10]; + + final void finishFullFlush(boolean success) { + assert setFlushingDeleteQueue(null); + if (success) { + // Release the flush lock + flushControl.finishFullFlush(); + } else { + flushControl.abortFullFlushes(); } + } - synchronized void reset() { - // NOTE: nextWriteLoc doesn't need to be reset - assert numWaiting == 0; - assert waitingBytes == 0; - nextWriteDocID = 0; + static final class FlushTicket { + final FrozenBufferedDeletes frozenDeletes; + /* access to non-final members must be synchronized on DW#ticketQueue */ + FlushedSegment segment; + boolean isSegmentFlush; + + FlushTicket(FrozenBufferedDeletes frozenDeletes, boolean isSegmentFlush) { + this.frozenDeletes = frozenDeletes; + this.isSegmentFlush = isSegmentFlush; } - - synchronized boolean doResume() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueueResumeBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueueResumeBytes = 2*1024*1024; - } else { - waitQueueResumeBytes = (long) (mb*1024*1024*0.05); - } - return waitingBytes <= waitQueueResumeBytes; + + boolean canPublish() { + return (!isSegmentFlush || segment != null); } - - synchronized boolean doPause() { - final double mb = config.getRAMBufferSizeMB(); - final long waitQueuePauseBytes; - if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) { - waitQueuePauseBytes = 4*1024*1024; - } else { - waitQueuePauseBytes = (long) (mb*1024*1024*0.1); - } - return waitingBytes > waitQueuePauseBytes; - } - - synchronized void abort() { - int count = 0; - for(int i=0;i= nextWriteDocID; - - if (doc.docID == nextWriteDocID) { - writeDocument(doc); - while(true) { - doc = waiting[nextWriteLoc]; - if (doc != null) { - numWaiting--; - waiting[nextWriteLoc] = null; - waitingBytes -= doc.sizeInBytes(); - writeDocument(doc); - } else { - break; - } - } - } else { - - // I finished before documents that were added - // before me. This can easily happen when I am a - // small doc and the docs before me were large, or, - // just due to luck in the thread scheduling. Just - // add myself to the queue and when that large doc - // finishes, it will flush me: - int gap = doc.docID - nextWriteDocID; - if (gap >= waiting.length) { - // Grow queue - DocWriter[] newArray = new DocWriter[ArrayUtil.oversize(gap, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - assert nextWriteLoc >= 0; - System.arraycopy(waiting, nextWriteLoc, newArray, 0, waiting.length-nextWriteLoc); - System.arraycopy(waiting, 0, newArray, waiting.length-nextWriteLoc, nextWriteLoc); - nextWriteLoc = 0; - waiting = newArray; - gap = doc.docID - nextWriteDocID; - } - - int loc = nextWriteLoc + gap; - if (loc >= waiting.length) { - loc -= waiting.length; - } - - // We should only wrap one time - assert loc < waiting.length; - - // Nobody should be in my spot! - assert waiting[loc] == null; - waiting[loc] = doc; - numWaiting++; - waitingBytes += doc.sizeInBytes(); - } - - return doPause(); - } } } Index: lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java (working copy) @@ -1,47 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -/** Used by DocumentsWriter to maintain per-thread state. - * We keep a separate Posting hash and other state for each - * thread and then merge postings hashes from all threads - * when writing the segment. */ -final class DocumentsWriterThreadState { - - boolean isIdle = true; // false if this is currently in use by a thread - int numThreads = 1; // Number of threads that share this instance - final DocConsumerPerThread consumer; - final DocumentsWriter.DocState docState; - - final DocumentsWriter docWriter; - - public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException { - this.docWriter = docWriter; - docState = new DocumentsWriter.DocState(); - docState.infoStream = docWriter.infoStream; - docState.similarityProvider = docWriter.similarityProvider; - docState.docWriter = docWriter; - consumer = docWriter.consumer.addThread(this); - } - - void doAfterFlush() { - numThreads = 0; - } -} Index: lucene/src/java/org/apache/lucene/index/FieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldsWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/FieldsWriter.java (working copy) @@ -2,13 +2,13 @@ /** * Copyright 2004 The Apache Software Foundation - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the @@ -22,15 +22,14 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMOutputStream; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.IOUtils; final class FieldsWriter { static final byte FIELD_IS_TOKENIZED = 0x1; static final byte FIELD_IS_BINARY = 0x2; - + // Lucene 3.0: Removal of compressed fields static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; @@ -38,7 +37,7 @@ // than the current one, and always change this if you // switch to a new format! static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; - + // when removing support for old versions, leave the last supported version here static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; @@ -83,10 +82,9 @@ // and adds a new entry for this document into the index // stream. This assumes the buffer was already written // in the correct fields format. - void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException { + void startDocument(int numStoredFields) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); fieldsStream.writeVInt(numStoredFields); - buffer.writeTo(fieldsStream); } void skipDocument() throws IOException { @@ -121,8 +119,8 @@ } } - final void writeField(FieldInfo fi, Fieldable field) throws IOException { - fieldsStream.writeVInt(fi.number); + final void writeField(int fieldNumber, Fieldable field) throws IOException { + fieldsStream.writeVInt(fieldNumber); byte bits = 0; if (field.isTokenized()) bits |= FieldsWriter.FIELD_IS_TOKENIZED; @@ -175,10 +173,9 @@ fieldsStream.writeVInt(storedCount); - for (Fieldable field : fields) { if (field.isStored()) - writeField(fieldInfos.fieldInfo(field.name()), field); + writeField(fieldInfos.fieldNumber(field.name()), field); } } } Index: lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (working copy) @@ -1,115 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Comparator; - -import org.apache.lucene.util.ByteBlockPool; -import org.apache.lucene.util.BytesRef; - -import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; - -// TODO FI: some of this is "generic" to TermsHash* so we -// should factor it out so other consumers don't have to -// duplicate this code - -/** Used by DocumentsWriter to merge the postings from - * multiple ThreadStates when creating a segment */ -final class FreqProxFieldMergeState { - - final FreqProxTermsWriterPerField field; - final int numPostings; - private final ByteBlockPool bytePool; - final int[] termIDs; - final FreqProxPostingsArray postings; - int currentTermID; - - final BytesRef text = new BytesRef(); - - private int postingUpto = -1; - - final ByteSliceReader freq = new ByteSliceReader(); - final ByteSliceReader prox = new ByteSliceReader(); - - int docID; - int termFreq; - - public FreqProxFieldMergeState(FreqProxTermsWriterPerField field, Comparator termComp) { - this.field = field; - this.numPostings = field.termsHashPerField.bytesHash.size(); - this.bytePool = field.perThread.termsHashPerThread.bytePool; - this.termIDs = field.termsHashPerField.sortPostings(termComp); - this.postings = (FreqProxPostingsArray) field.termsHashPerField.postingsArray; - } - - boolean nextTerm() throws IOException { - postingUpto++; - if (postingUpto == numPostings) { - return false; - } - - currentTermID = termIDs[postingUpto]; - docID = 0; - - // Get BytesRef - final int textStart = postings.textStarts[currentTermID]; - bytePool.setBytesRef(text, textStart); - - field.termsHashPerField.initReader(freq, currentTermID, 0); - if (!field.fieldInfo.omitTermFreqAndPositions) { - field.termsHashPerField.initReader(prox, currentTermID, 1); - } - - // Should always be true - boolean result = nextDoc(); - assert result; - - return true; - } - - public boolean nextDoc() throws IOException { - if (freq.eof()) { - if (postings.lastDocCodes[currentTermID] != -1) { - // Return last doc - docID = postings.lastDocIDs[currentTermID]; - if (!field.omitTermFreqAndPositions) - termFreq = postings.docFreqs[currentTermID]; - postings.lastDocCodes[currentTermID] = -1; - return true; - } else - // EOF - return false; - } - - final int code = freq.readVInt(); - if (field.omitTermFreqAndPositions) - docID += code; - else { - docID += code >>> 1; - if ((code & 1) != 0) - termFreq = 1; - else - termFreq = freq.readVInt(); - } - - assert docID != postings.lastDocIDs[currentTermID]; - - return true; - } -} Index: lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (working copy) @@ -19,55 +19,35 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.List; import java.util.Map; import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.codecs.PostingsConsumer; -import org.apache.lucene.index.codecs.TermStats; -import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; final class FreqProxTermsWriter extends TermsHashConsumer { @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread perThread) { - return new FreqProxTermsWriterPerThread(perThread); - } - - @Override void abort() {} - private int flushedDocCount; - // TODO: would be nice to factor out more of this, eg the // FreqProxFieldMergeState, and code to visit all Fields // under the same FieldInfo together, up into TermsHash*. // Other writers would presumably share alot of this... @Override - public void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + public void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { // Gather all FieldData's that have postings, across all // ThreadStates List allFields = new ArrayList(); - - flushedDocCount = state.numDocs; - for (Map.Entry> entry : threadsAndFields.entrySet()) { - - Collection fields = entry.getValue(); - - - for (final TermsHashConsumerPerField i : fields) { - final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) i; - if (perField.termsHashPerField.bytesHash.size() > 0) + for (TermsHashConsumerPerField f : fieldsToFlush.values()) { + final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f; + if (perField.termsHashPerField.bytesHash.size() > 0) { allFields.add(perField); - } + } } final int numAllFields = allFields.size(); @@ -77,6 +57,8 @@ final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); + TermsHash termsHash = null; + /* Current writer chain: FieldsConsumer @@ -89,257 +71,48 @@ -> IMPL: FormatPostingsPositionsWriter */ - int start = 0; - while(start < numAllFields) { - final FieldInfo fieldInfo = allFields.get(start).fieldInfo; - final String fieldName = fieldInfo.name; + for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { + final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo; - int end = start+1; - while(end < numAllFields && allFields.get(end).fieldInfo.name.equals(fieldName)) - end++; - - FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end-start]; - for(int i=start;i> entry : threadsAndFields.entrySet()) { - FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); + if (termsHash != null) { + termsHash.reset(); } consumer.close(); } BytesRef payload; - /* Walk through all unique text tokens (Posting - * instances) found in this field and serialize them - * into a single RAM segment. */ - void appendPostings(String fieldName, SegmentWriteState state, - FreqProxTermsWriterPerField[] fields, - FieldsConsumer consumer) - throws CorruptIndexException, IOException { + @Override + public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { + return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); + } - int numFields = fields.length; + @Override + void finishDocument(TermsHash termsHash) throws IOException { + } - final BytesRef text = new BytesRef(); - - final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; - - final TermsConsumer termsConsumer = consumer.addField(fields[0].fieldInfo); - final Comparator termComp = termsConsumer.getComparator(); - - for(int i=0;i 0; if (omitTermFreqAndPositions) { @@ -169,7 +177,7 @@ } } } - + @Override ParallelPostingsArray createPostingsArray(int size) { return new FreqProxPostingsArray(size); @@ -212,7 +220,180 @@ return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT; } } - + public void abort() {} + + BytesRef payload; + + /* Walk through all unique text tokens (Posting + * instances) found in this field and serialize them + * into a single RAM segment. */ + void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state) + throws CorruptIndexException, IOException { + + final TermsConsumer termsConsumer = consumer.addField(fieldInfo); + final Comparator termComp = termsConsumer.getComparator(); + + final Term protoTerm = new Term(fieldName); + + final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + + final Map segDeletes; + if (state.segDeletes != null && state.segDeletes.terms.size() > 0) { + segDeletes = state.segDeletes.terms; + } else { + segDeletes = null; + } + + final int[] termIDs = termsHashPerField.sortPostings(termComp); + final int numTerms = termsHashPerField.bytesHash.size(); + final BytesRef text = new BytesRef(); + final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray; + final ByteSliceReader freq = new ByteSliceReader(); + final ByteSliceReader prox = new ByteSliceReader(); + + long sumTotalTermFreq = 0; + for (int i = 0; i < numTerms; i++) { + final int termID = termIDs[i]; + // Get BytesRef + final int textStart = postings.textStarts[termID]; + termsHashPerField.bytePool.setBytesRef(text, textStart); + + termsHashPerField.initReader(freq, termID, 0); + if (!fieldInfo.omitTermFreqAndPositions) { + termsHashPerField.initReader(prox, termID, 1); + } + + // TODO: really TermsHashPerField should take over most + // of this loop, including merge sort of terms from + // multiple threads and interacting with the + // TermsConsumer, only calling out to us (passing us the + // DocsConsumer) to handle delivery of docs/positions + + final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text); + + final int delDocLimit; + if (segDeletes != null) { + final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text)); + if (docIDUpto != null) { + delDocLimit = docIDUpto; + } else { + delDocLimit = 0; + } + } else { + delDocLimit = 0; + } + + // Now termStates has numToMerge FieldMergeStates + // which all share the same term. Now we must + // interleave the docID streams. + int numDocs = 0; + long totTF = 0; + int docID = 0; + int termFreq = 0; + + while(true) { + if (freq.eof()) { + if (postings.lastDocCodes[termID] != -1) { + // Return last doc + docID = postings.lastDocIDs[termID]; + if (!omitTermFreqAndPositions) { + termFreq = postings.docFreqs[termID]; + } + postings.lastDocCodes[termID] = -1; + } else { + // EOF + break; + } + } else { + final int code = freq.readVInt(); + if (omitTermFreqAndPositions) { + docID += code; + } else { + docID += code >>> 1; + if ((code & 1) != 0) { + termFreq = 1; + } else { + termFreq = freq.readVInt(); + } + } + + assert docID != postings.lastDocIDs[termID]; + } + + numDocs++; + assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs; + final int termDocFreq = termFreq; + + // NOTE: we could check here if the docID was + // deleted, and skip it. However, this is somewhat + // dangerous because it can yield non-deterministic + // behavior since we may see the docID before we see + // the term that caused it to be deleted. This + // would mean some (but not all) of its postings may + // make it into the index, which'd alter the docFreq + // for those terms. We could fix this by doing two + // passes, ie first sweep marks all del docs, and + // 2nd sweep does the real flush, but I suspect + // that'd add too much time to flush. + postingsConsumer.startDoc(docID, termDocFreq); + if (docID < delDocLimit) { + // Mark it deleted. TODO: we could also skip + // writing its postings; this would be + // deterministic (just for this Term's docs). + if (state.deletedDocs == null) { + state.deletedDocs = new BitVector(state.numDocs); + } + state.deletedDocs.set(docID); + } + + // Carefully copy over the prox + payload info, + // changing the format to match Lucene's segment + // format. + if (!currentFieldOmitTermFreqAndPositions) { + // omitTermFreqAndPositions == false so we do write positions & + // payload + int position = 0; + totTF += termDocFreq; + for(int j=0;j> 1; + + final int payloadLength; + final BytesRef thisPayload; + + if ((code & 1) != 0) { + // This position has a payload + payloadLength = prox.readVInt(); + + if (payload == null) { + payload = new BytesRef(); + payload.bytes = new byte[payloadLength]; + } else if (payload.bytes.length < payloadLength) { + payload.grow(payloadLength); + } + + prox.readBytes(payload.bytes, 0, payloadLength); + payload.length = payloadLength; + thisPayload = payload; + + } else { + payloadLength = 0; + thisPayload = null; + } + + postingsConsumer.addPosition(position, thisPayload); + } + + postingsConsumer.finishDoc(); + } + } + termsConsumer.finishTerm(text, new TermStats(numDocs, totTF)); + sumTotalTermFreq += totTF; + } + + termsConsumer.finish(sumTotalTermFreq); + } + } Index: lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java (working copy) @@ -1,45 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class FreqProxTermsWriterPerThread extends TermsHashConsumerPerThread { - final TermsHashPerThread termsHashPerThread; - final DocumentsWriter.DocState docState; - - public FreqProxTermsWriterPerThread(TermsHashPerThread perThread) { - docState = perThread.docState; - termsHashPerThread = perThread; - } - - @Override - public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) { - return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); - } - - @Override - void startDocument() { - } - - @Override - DocumentsWriter.DocWriter finishDocument() { - return null; - } - - @Override - public void abort() {} -} Index: lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -21,7 +21,13 @@ import java.io.FilenameFilter; import java.io.IOException; import java.io.PrintStream; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.Directory; @@ -49,12 +55,12 @@ * (IndexDeletionPolicy) is consulted on creation (onInit) * and once per commit (onCommit), to decide when a commit * should be removed. - * + * * It is the business of the IndexDeletionPolicy to choose * when to delete commit points. The actual mechanics of * file deletion, retrying, etc, derived from the deletion * of commit points is the business of the IndexFileDeleter. - * + * * The current default deletion policy is {@link * KeepOnlyLastCommitDeletionPolicy}, which removes all * prior commits when a new commit has completed. This @@ -72,7 +78,7 @@ * so we will retry them again later: */ private List deletable; - /* Reference count for all files in the index. + /* Reference count for all files in the index. * Counts how many existing commits reference a file. **/ private Map refCounts = new HashMap(); @@ -88,7 +94,7 @@ * non-commit checkpoint: */ private List> lastFiles = new ArrayList>(); - /* Commits that the IndexDeletionPolicy have decided to delete: */ + /* Commits that the IndexDeletionPolicy have decided to delete: */ private List commitsToDelete = new ArrayList(); private PrintStream infoStream; @@ -108,7 +114,7 @@ message("setInfoStream deletionPolicy=" + policy); } } - + private void message(String message) { infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); } @@ -139,12 +145,12 @@ // counts: long currentGen = segmentInfos.getGeneration(); indexFilenameFilter = new IndexFileNameFilter(codecs); - + CommitPoint currentCommitPoint = null; String[] files = null; try { files = directory.listAll(); - } catch (NoSuchDirectoryException e) { + } catch (NoSuchDirectoryException e) { // it means the directory is empty, so ignore it. files = new String[0]; } @@ -152,7 +158,7 @@ for (String fileName : files) { if ((indexFilenameFilter.accept(null, fileName)) && !fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) { - + // Add this file to refCounts with initial count 0: getRefCount(fileName); @@ -233,7 +239,7 @@ // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. - for(Map.Entry entry : refCounts.entrySet() ) { + for(Map.Entry entry : refCounts.entrySet() ) { RefCount rc = entry.getValue(); final String fileName = entry.getKey(); if (0 == rc.count) { @@ -253,7 +259,7 @@ // Always protect the incoming segmentInfos since // sometime it may not be the most recent commit checkpoint(segmentInfos, false); - + startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted(); deleteCommits(); @@ -327,7 +333,7 @@ segmentPrefix1 = null; segmentPrefix2 = null; } - + for(int i=0;i oldDeletable = deletable; @@ -397,7 +403,7 @@ /** * For definition of "check point" see IndexWriter comments: * "Clarification: Check Points (and commits)". - * + * * Writer calls this when it has made a "consistent * change" to the index, meaning new files are written to * the index and the in-memory SegmentInfos have been @@ -417,7 +423,7 @@ public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException { if (infoStream != null) { - message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); + message("now checkpoint \"" + segmentInfos + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -35,6 +35,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; @@ -46,6 +47,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.Constants; import org.apache.lucene.util.ThreadInterruptedException; @@ -54,17 +56,16 @@ /** An IndexWriter creates and maintains an index. -

The create argument to the {@link - #IndexWriter(Directory, IndexWriterConfig) constructor} determines +

The {@link OpenMode} option on + {@link IndexWriterConfig#setOpenMode(OpenMode)} determines whether a new index is created, or whether an existing index is - opened. Note that you can open an index with create=true - even while readers are using the index. The old readers will + opened. Note that you can open an index with {@link OpenMode#CREATE} + even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, - and won't see the newly created index until they re-open. There are - also {@link #IndexWriter(Directory, IndexWriterConfig) constructors} - with no create argument which will create a new index - if there is not already an index at the provided path and otherwise - open the existing index.

+ and won't see the newly created index until they re-open. If + {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a + new index if there is not already an index at the provided path + and otherwise open the existing index.

In either case, documents are added with {@link #addDocument(Document) addDocument} and removed with {@link #deleteDocuments(Term)} or {@link @@ -76,15 +77,19 @@

These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method - calls). A flush is triggered when there are enough + calls). A flush is triggered when there are enough added documents + since the last flush. Flushing is triggered either by RAM usage of the + documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the + number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). + The default is to flush when RAM usage hits + {@value IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For + best indexing speed you should flush by RAM usage with a + large RAM buffer. Additionally, if IndexWriter reaches the configured number of buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) - or enough added documents since the last flush, whichever - is sooner. For the added documents, flushing is triggered - either by RAM usage of the documents (see {@link - IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents. - The default is to flush when RAM usage hits 16 MB. For - best indexing speed you should flush by RAM usage with a - large RAM buffer. Note that flushing just moves the + the deleted terms and queries are flushed and applied to existing segments. + In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and + {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms + won't trigger a segment flush. Note that flushing just moves the internal buffered state in IndexWriter into the index, but these changes are not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may @@ -165,21 +170,21 @@ /* * Clarification: Check Points (and commits) * IndexWriter writes new index files to the directory without writing a new segments_N - * file which references these new files. It also means that the state of + * file which references these new files. It also means that the state of * the in memory SegmentInfos object is different than the most recent * segments_N file written to the directory. - * - * Each time the SegmentInfos is changed, and matches the (possibly - * modified) directory files, we have a new "check point". - * If the modified/new SegmentInfos is written to disk - as a new - * (generation of) segments_N file - this check point is also an + * + * Each time the SegmentInfos is changed, and matches the (possibly + * modified) directory files, we have a new "check point". + * If the modified/new SegmentInfos is written to disk - as a new + * (generation of) segments_N file - this check point is also an * IndexCommit. - * - * A new checkpoint always replaces the previous checkpoint and - * becomes the new "front" of the index. This allows the IndexFileDeleter + * + * A new checkpoint always replaces the previous checkpoint and + * becomes the new "front" of the index. This allows the IndexFileDeleter * to delete files that are referenced only by stale checkpoints. * (files that were created since the last commit, but are no longer - * referenced by the "front" of the index). For this, IndexFileDeleter + * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ public class IndexWriter implements Closeable { @@ -195,7 +200,7 @@ * printed to infoStream, if set (see {@link * #setInfoStream}). */ - public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH_UTF8; + public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; // The normal read buffer size defaults to 1024, but // increasing this during merging seems to yield @@ -225,7 +230,7 @@ final FieldNumberBiMap globalFieldNumberMap; private DocumentsWriter docWriter; - private IndexFileDeleter deleter; + final IndexFileDeleter deleter; private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization private int optimizeMaxNumSegments; @@ -247,12 +252,12 @@ private long mergeGen; private boolean stopMerges; - private final AtomicInteger flushCount = new AtomicInteger(); - private final AtomicInteger flushDeletesCount = new AtomicInteger(); + final AtomicInteger flushCount = new AtomicInteger(); + final AtomicInteger flushDeletesCount = new AtomicInteger(); final ReaderPool readerPool = new ReaderPool(); final BufferedDeletesStream bufferedDeletesStream; - + // This is a "write once" variable (like the organic dye // on a DVD-R that may or may not be heated by a laser and // then cooled to permanently record the event): it's @@ -339,31 +344,58 @@ */ IndexReader getReader(boolean applyAllDeletes) throws IOException { ensureOpen(); - + final long tStart = System.currentTimeMillis(); if (infoStream != null) { message("flush at getReader"); } - // Do this up front before flushing so that the readers // obtained during this flush are pooled, the first time // this method is called: poolReaders = true; - - // Prevent segmentInfos from changing while opening the - // reader; in theory we could do similar retry logic, - // just like we do when loading segments_N - IndexReader r; - synchronized(this) { - flush(false, applyAllDeletes); - r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); - if (infoStream != null) { - message("return reader version=" + r.getVersion() + " reader=" + r); + final IndexReader r; + doBeforeFlush(); + final boolean anySegmentFlushed; + /* + * for releasing a NRT reader we must ensure that + * DW doesn't add any segments or deletes until we are + * done with creating the NRT DirectoryReader. + * We release the two stage full flush after we are done opening the + * directory reader! + */ + synchronized (fullFlushLock) { + boolean success = false; + try { + anySegmentFlushed = docWriter.flushAllThreads(); + if (!anySegmentFlushed) { + // prevent double increment since docWriter#doFlush increments the flushcount + // if we flushed anything. + flushCount.incrementAndGet(); + } + success = true; + // Prevent segmentInfos from changing while opening the + // reader; in theory we could do similar retry logic, + // just like we do when loading segments_N + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); + if (infoStream != null) { + message("return reader version=" + r.getVersion() + " reader=" + r); + } + } + } finally { + if (!success && infoStream != null) { + message("hit exception during while NRT reader"); + } + // Done: finish the full flush! + docWriter.finishFullFlush(success); + doAfterFlush(); } } - maybeMerge(); - + if (anySegmentFlushed) { + maybeMerge(); + } if (infoStream != null) { message("getReader took " + (System.currentTimeMillis() - tStart) + " msec"); } @@ -400,10 +432,10 @@ if (r != null) { r.hasChanges = false; } - } + } } } - + // used only by asserts public synchronized boolean infoIsLive(SegmentInfo info) { int idx = segmentInfos.indexOf(info); @@ -419,7 +451,7 @@ } return info; } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -432,7 +464,7 @@ public synchronized boolean release(SegmentReader sr) throws IOException { return release(sr, false); } - + /** * Release the segment reader (i.e. decRef it and close if there * are no more references. @@ -493,7 +525,7 @@ sr.close(); } } - + /** Remove all our references to readers, and commits * any pending changes. */ synchronized void close() throws IOException { @@ -503,7 +535,7 @@ Iterator> iter = readerMap.entrySet().iterator(); while (iter.hasNext()) { - + Map.Entry ent = iter.next(); SegmentReader sr = ent.getValue(); @@ -526,7 +558,7 @@ sr.decRef(); } } - + /** * Commit all segment reader in the pool. * @throws IOException @@ -550,7 +582,7 @@ } } } - + /** * Returns a ref to a clone. NOTE: this clone is not * enrolled in the pool, so you should simply close() @@ -564,7 +596,7 @@ sr.decRef(); } } - + /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} @@ -580,7 +612,7 @@ /** * Obtain a SegmentReader from the readerPool. The reader * must be returned by calling {@link #release(SegmentReader)} - * + * * @see #release(SegmentReader) * @param info * @param doOpenStores @@ -638,7 +670,7 @@ return sr; } } - + /** * Obtain the number of deleted docs for a pooled reader. * If the reader isn't being pooled, the segmentInfo's @@ -658,7 +690,7 @@ } } } - + /** * Used internally to throw an {@link * AlreadyClosedException} if this IndexWriter has been @@ -721,7 +753,7 @@ mergePolicy.setIndexWriter(this); mergeScheduler = conf.getMergeScheduler(); codecs = conf.getCodecProvider(); - + bufferedDeletesStream = new BufferedDeletesStream(messageID); bufferedDeletesStream.setInfoStream(infoStream); poolReaders = conf.getReaderPooling(); @@ -790,8 +822,7 @@ // start with previous field numbers, but new FieldInfos globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory); - docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(), - globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream); + docWriter = new DocumentsWriter(config, directory, this, globalFieldNumberMap, bufferedDeletesStream); docWriter.setInfoStream(infoStream); // Default deleter (for backwards compatibility) is @@ -849,7 +880,7 @@ public IndexWriterConfig getConfig() { return config; } - + /** If non-null, this will be the default infoStream used * by a newly instantiated IndexWriter. * @see #setInfoStream @@ -901,7 +932,7 @@ public boolean verbose() { return infoStream != null; } - + /** * Commits all changes to an index and closes all * associated files. Note that this may be a costly @@ -916,7 +947,7 @@ * even though part of it (flushing buffered documents) * may have succeeded, so the write lock will still be * held.

- * + * *

If you can correct the underlying cause (eg free up * some disk space) then you can call close() again. * Failing that, if you want to force the write lock to be @@ -1036,7 +1067,7 @@ if (infoStream != null) message("now call final commit()"); - + if (!hitOOM) { commitInternal(null); } @@ -1049,7 +1080,7 @@ docWriter = null; deleter.close(); } - + if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -1072,7 +1103,7 @@ } /** Returns the Directory used by this index. */ - public Directory getDirectory() { + public Directory getDirectory() { // Pass false because the flush during closing calls getDirectory ensureOpen(false); return directory; @@ -1196,22 +1227,7 @@ * @throws IOException if there is a low-level IO error */ public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { - ensureOpen(); - boolean doFlush = false; - boolean success = false; - try { - try { - doFlush = docWriter.updateDocument(doc, analyzer, null); - success = true; - } finally { - if (!success && infoStream != null) - message("hit exception adding document"); - } - if (doFlush) - flush(true, false); - } catch (OutOfMemoryError oom) { - handleOOM(oom, "addDocument"); - } + updateDocument(null, doc, analyzer); } /** @@ -1228,9 +1244,7 @@ public void deleteDocuments(Term term) throws CorruptIndexException, IOException { ensureOpen(); try { - if (docWriter.deleteTerm(term, false)) { - flush(true, false); - } + docWriter.deleteTerms(term); } catch (OutOfMemoryError oom) { handleOOM(oom, "deleteDocuments(Term)"); } @@ -1238,7 +1252,8 @@ /** * Deletes the document(s) containing any of the - * terms. All deletes are flushed at the same time. + * terms. All given deletes are applied and flushed atomically + * at the same time. * *

NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See NOTE: if this method hits an OutOfMemoryError * you should immediately close the writer. See (segmentInfos); optimizeMaxNumSegments = maxNumSegments; - + // Now mark all pending & running merges as optimize // merge: for(final MergePolicy.OneMerge merge : pendingMerges) { @@ -1612,12 +1622,12 @@ if (merge.optimize) return true; } - + for (final MergePolicy.OneMerge merge : runningMerges) { if (merge.optimize) return true; } - + return false; } @@ -1914,7 +1924,7 @@ /** * Delete all documents in the index. * - *

This method will drop all buffered documents and will + *

This method will drop all buffered documents and will * remove all segments from the index. This change will not be * visible until a {@link #commit()} has been called. This method * can be rolled back using {@link #rollback()}.

@@ -1944,7 +1954,7 @@ deleter.refresh(); // Don't bother saving any changes in our segmentInfos - readerPool.clear(null); + readerPool.clear(null); // Mark that the index has changed ++changeCount; @@ -1971,7 +1981,7 @@ mergeFinish(merge); } pendingMerges.clear(); - + for (final MergePolicy.OneMerge merge : runningMerges) { if (infoStream != null) message("now abort running merge " + merge.segString(directory)); @@ -1998,7 +2008,7 @@ message("all running merges have aborted"); } else { - // waitForMerges() will ensure any running addIndexes finishes. + // waitForMerges() will ensure any running addIndexes finishes. // It's fine if a new one attempts to start because from our // caller above the call will see that we are in the // process of closing, and will throw an @@ -2010,7 +2020,7 @@ /** * Wait for any currently outstanding merges to finish. * - *

It is guaranteed that any merges started prior to calling this method + *

It is guaranteed that any merges started prior to calling this method * will have completed once this method completes.

*/ public synchronized void waitForMerges() { @@ -2040,6 +2050,125 @@ deleter.checkpoint(segmentInfos, false); } + /** + * Prepares the {@link SegmentInfo} for the new flushed segment and persists + * the deleted documents {@link BitVector}. Use + * {@link #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes)} to + * publish the returned {@link SegmentInfo} together with its segment private + * delete packet. + * + * @see #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes) + */ + SegmentInfo prepareFlushedSegment(FlushedSegment flushedSegment) throws IOException { + assert flushedSegment != null; + + SegmentInfo newSegment = flushedSegment.segmentInfo; + + setDiagnostics(newSegment, "flush"); + + boolean success = false; + try { + if (useCompoundFile(newSegment)) { + String compoundFileName = IndexFileNames.segmentFileName(newSegment.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + message("creating compound file " + compoundFileName); + // Now build compound file + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); + for(String fileName : newSegment.files()) { + cfsWriter.addFile(fileName); + } + + // Perform the merge + cfsWriter.close(); + synchronized(this) { + deleter.deleteNewFiles(newSegment.files()); + } + + newSegment.setUseCompoundFile(true); + } + + // Must write deleted docs after the CFS so we don't + // slurp the del file into CFS: + if (flushedSegment.deletedDocuments != null) { + final int delCount = flushedSegment.deletedDocuments.count(); + assert delCount > 0; + newSegment.setDelCount(delCount); + newSegment.advanceDelGen(); + final String delFileName = newSegment.getDelFileName(); + if (infoStream != null) { + message("flush: write " + delCount + " deletes to " + delFileName); + } + boolean success2 = false; + try { + // TODO: in the NRT case it'd be better to hand + // this del vector over to the + // shortly-to-be-opened SegmentReader and let it + // carry the changes; there's no reason to use + // filesystem as intermediary here. + flushedSegment.deletedDocuments.write(directory, delFileName); + success2 = true; + } finally { + if (!success2) { + try { + directory.deleteFile(delFileName); + } catch (Throwable t) { + // suppress this so we keep throwing the + // original exception + } + } + } + } + + success = true; + } finally { + if (!success) { + if (infoStream != null) { + message("hit exception " + + "reating compound file for newly flushed segment " + newSegment.name); + } + + synchronized(this) { + deleter.refresh(newSegment.name); + } + } + } + return newSegment; + } + + /** + * Atomically adds the segment private delete packet and publishes the flushed + * segments SegmentInfo to the index writer. NOTE: use + * {@link #prepareFlushedSegment(FlushedSegment)} to obtain the + * {@link SegmentInfo} for the flushed segment. + * + * @see #prepareFlushedSegment(FlushedSegment) + */ + synchronized void publishFlushedSegment(SegmentInfo newSegment, + FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException { + // Lock order IW -> BDS + synchronized (bufferedDeletesStream) { + if (globalPacket != null && globalPacket.any()) { + bufferedDeletesStream.push(globalPacket); + } + // Publishing the segment must be synched on IW -> BDS to make the sure + // that no merge prunes away the seg. private delete packet + final long nextGen; + if (packet != null && packet.any()) { + nextGen = bufferedDeletesStream.push(packet); + } else { + // Since we don't have a delete packet to apply we can get a new + // generation right away + nextGen = bufferedDeletesStream.getNextGen(); + } + newSegment.setBufferedDeletesGen(nextGen); + segmentInfos.add(newSegment); + checkpoint(); + } + } + + synchronized boolean useCompoundFile(SegmentInfo segmentInfo) throws IOException { + return mergePolicy.useCompoundFile(segmentInfos, segmentInfo); + } + private synchronized void resetMergeExceptions() { mergeExceptions = new ArrayList(); mergeGen++; @@ -2088,11 +2217,11 @@ *

* NOTE: this method only copies the segments of the incoming indexes * and does not merge them. Therefore deleted documents are not removed and - * the new segments are not merged with the existing ones. Also, the segments - * are copied as-is, meaning they are not converted to CFS if they aren't, - * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} + * the new segments are not merged with the existing ones. Also, the segments + * are copied as-is, meaning they are not converted to CFS if they aren't, + * and vice-versa. If you wish to do that, you can call {@link #maybeMerge} * or {@link #optimize} afterwards. - * + * *

This requires this index not be among those to be added. * *

@@ -2129,7 +2258,7 @@ docCount += info.docCount; String newSegName = newSegmentName(); String dsName = info.getDocStoreSegment(); - + if (infoStream != null) { message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info); } @@ -2176,7 +2305,7 @@ infos.add(info); } - } + } synchronized (this) { ensureOpen(); @@ -2225,11 +2354,12 @@ SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, null, codecs, payloadProcessorProvider, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs))); - + for (IndexReader reader : readers) // add new indexes merger.add(reader); - + int docCount = merger.merge(); // merge 'em + final FieldInfos fieldInfos = merger.fieldInfos(); SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), @@ -2241,11 +2371,11 @@ synchronized(this) { // Guard segmentInfos useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info); } - + // Now create the compound file if needed if (useCompoundFile) { merger.createCompoundFile(mergedName + ".cfs", info); - + // delete new non cfs files directly: they were never // registered with IFD deleter.deleteNewFiles(info.files()); @@ -2297,7 +2427,7 @@ * #commit()} to finish the commit, or {@link * #rollback()} to revert the commit and undo all changes * done since the writer was opened.

- * + * * You can also just call {@link #commit(Map)} directly * without prepareCommit first in which case that method * will internally call prepareCommit. @@ -2441,6 +2571,10 @@ } } + // Ensures only one flush() is actually flushing segments + // at a time: + private final Object fullFlushLock = new Object(); + /** * Flush all in-memory buffered updates (adds and deletes) * to the Directory. @@ -2464,117 +2598,105 @@ } } - // TODO: this method should not have to be entirely - // synchronized, ie, merges should be allowed to commit - // even while a flush is happening - private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { - + private boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException { if (hitOOM) { throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush"); } doBeforeFlush(); - assert testPoint("startDoFlush"); - - // We may be flushing because it was triggered by doc - // count, del count, ram usage (in which case flush - // pending is already set), or we may be flushing - // due to external event eg getReader or commit is - // called (in which case we now set it, and this will - // pause all threads): - flushControl.setFlushPendingNoWait("explicit flush"); - boolean success = false; - try { if (infoStream != null) { message(" start flush: applyAllDeletes=" + applyAllDeletes); message(" index before flush " + segString()); } - - final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos); - if (newSegment != null) { - setDiagnostics(newSegment, "flush"); - segmentInfos.add(newSegment); - checkpoint(); - } - - if (!applyAllDeletes) { - // If deletes alone are consuming > 1/2 our RAM - // buffer, force them all to apply now. This is to - // prevent too-frequent flushing of a long tail of - // tiny segments: - if (flushControl.getFlushDeletes() || - (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH && - bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) { - applyAllDeletes = true; - if (infoStream != null) { - message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB())); - } + final boolean anySegmentFlushed; + + synchronized (fullFlushLock) { + try { + anySegmentFlushed = docWriter.flushAllThreads(); + success = true; + } finally { + docWriter.finishFullFlush(success); } } - - if (applyAllDeletes) { - if (infoStream != null) { - message("apply all deletes during flush"); + success = false; + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + doAfterFlush(); + if (!anySegmentFlushed) { + // flushCount is incremented in flushAllThreads + flushCount.incrementAndGet(); } - flushDeletesCount.incrementAndGet(); - final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos); - if (result.anyDeletes) { - checkpoint(); - } - if (!keepFullyDeletedSegments && result.allDeleted != null) { - if (infoStream != null) { - message("drop 100% deleted segments: " + result.allDeleted); - } - for(SegmentInfo info : result.allDeleted) { - // If a merge has already registered for this - // segment, we leave it in the readerPool; the - // merge will skip merging it and will then drop - // it once it's done: - if (!mergingSegments.contains(info)) { - segmentInfos.remove(info); - if (readerPool != null) { - readerPool.drop(info); - } - } - } - checkpoint(); - } - bufferedDeletesStream.prune(segmentInfos); - assert !bufferedDeletesStream.any(); - flushControl.clearDeletes(); - } else if (infoStream != null) { - message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + success = true; + return anySegmentFlushed; } - - doAfterFlush(); - flushCount.incrementAndGet(); - - success = true; - - return newSegment != null; - } catch (OutOfMemoryError oom) { handleOOM(oom, "doFlush"); // never hit return false; } finally { - flushControl.clearFlushPending(); if (!success && infoStream != null) message("hit exception during flush"); } } + + final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException { + if (applyAllDeletes) { + if (infoStream != null) { + message("apply all deletes during flush"); + } + applyAllDeletes(); + } else if (infoStream != null) { + message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed()); + } + } + + final synchronized void applyAllDeletes() throws IOException { + flushDeletesCount.incrementAndGet(); + final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream + .applyDeletes(readerPool, segmentInfos); + if (result.anyDeletes) { + checkpoint(); + } + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream != null) { + message("drop 100% deleted segments: " + result.allDeleted); + } + for (SegmentInfo info : result.allDeleted) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(info)) { + segmentInfos.remove(info); + if (readerPool != null) { + readerPool.drop(info); + } + } + } + checkpoint(); + } + bufferedDeletesStream.prune(segmentInfos); + } + /** Expert: Return the total size of all index files currently cached in memory. * Useful for size management with flushRamDocs() */ public final long ramSizeInBytes() { ensureOpen(); - return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed(); + return docWriter.flushControl.netBytes() + bufferedDeletesStream.bytesUsed(); } + + // for testing only + DocumentsWriter getDocsWriter() { + boolean test = false; + assert test = true; + return test?docWriter: null; + } /** Expert: Return the number of documents currently * buffered in RAM. */ @@ -2709,7 +2831,7 @@ } commitMergedDeletes(merge, mergedReader); - + // If the doc store we are using has been closed and // is in now compound format (but wasn't when we // started), then we will switch to the compound @@ -2723,7 +2845,7 @@ message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert")); } - final Set mergedAway = new HashSet(merge.segments); + final Set mergedAway = new HashSet(merge.segments); int segIdx = 0; int newSegIdx = 0; boolean inserted = false; @@ -2770,15 +2892,15 @@ // them so that they don't bother writing them to // disk, updating SegmentInfo, etc.: readerPool.clear(merge.segments); - + if (merge.optimize) { // cascade the optimize: segmentsToOptimize.add(merge.info); } - + return true; } - + final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException { if (infoStream != null) { @@ -2867,7 +2989,7 @@ /** Hook that's called when the specified merge is complete. */ void mergeSuccess(MergePolicy.OneMerge merge) { } - + /** Checks whether this merge involves any segments * already participating in a merge. If not, this merge * is "registered", meaning we record that its segments @@ -2998,7 +3120,6 @@ // Lock order: IW -> BD bufferedDeletesStream.prune(segmentInfos); - Map details = new HashMap(); details.put("optimize", Boolean.toString(merge.optimize)); details.put("mergeFactor", Integer.toString(merge.segments.size())); @@ -3019,11 +3140,11 @@ mergingSegments.add(merge.info); } - private void setDiagnostics(SegmentInfo info, String source) { + static void setDiagnostics(SegmentInfo info, String source) { setDiagnostics(info, source, null); } - private void setDiagnostics(SegmentInfo info, String source, Map details) { + private static void setDiagnostics(SegmentInfo info, String source, Map details) { Map diagnostics = new HashMap(); diagnostics.put("source", source); diagnostics.put("lucene.version", Constants.LUCENE_VERSION); @@ -3041,7 +3162,7 @@ /** Does fininishing for a merge, which is fast but holds * the synchronized lock on IndexWriter instance. */ final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException { - + // Optimize, addIndexes or finishMerges may be waiting // on merges to finish. notifyAll(); @@ -3113,11 +3234,11 @@ * instance */ private int mergeMiddle(MergePolicy.OneMerge merge) throws CorruptIndexException, IOException { - + merge.checkAborted(directory); final String mergedName = merge.info.name; - + int mergedDocCount = 0; SegmentInfos sourceSegments = merge.segments; @@ -3191,7 +3312,7 @@ message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size()); } anyNonBulkMerges |= merger.getAnyNonBulkMerges(); - + assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount; // Very important to do this before opening the reader @@ -3325,12 +3446,12 @@ // For test purposes. final int getBufferedDeleteTermsSize() { - return docWriter.getPendingDeletes().terms.size(); + return docWriter.getBufferedDeleteTermsSize(); } // For test purposes. final int getNumBufferedDeleteTerms() { - return docWriter.getPendingDeletes().numTermDeletes.get(); + return docWriter.getNumBufferedDeleteTerms(); } // utility routines for tests @@ -3445,17 +3566,17 @@ assert lastCommitChangeCount <= changeCount; myChangeCount = changeCount; - + if (changeCount == lastCommitChangeCount) { if (infoStream != null) message(" skip startCommit(): no changes pending"); return; } - + // First, we clone & incref the segmentInfos we intend // to sync, then, without locking, we sync() all files // referenced by toSync, in the background. - + if (infoStream != null) message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); @@ -3463,10 +3584,10 @@ toSync = (SegmentInfos) segmentInfos.clone(); assert filesExist(toSync); - + if (commitUserData != null) toSync.setUserData(commitUserData); - + // This protects the segmentInfos we are now going // to commit. This is important in case, eg, while // we are trying to sync all referenced files, a @@ -3598,7 +3719,7 @@ /** Expert: remove any index files that are no longer * used. - * + * *

IndexWriter normally deletes unused files itself, * during indexing. However, on Windows, which disallows * deletion of open files, if there is a reader open on @@ -3647,7 +3768,7 @@ public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) { payloadProcessorProvider = pcp; } - + /** * Returns the {@link PayloadProcessorProvider} that is used during segment * merges to process payloads. @@ -3655,124 +3776,4 @@ public PayloadProcessorProvider getPayloadProcessorProvider() { return payloadProcessorProvider; } - - // decides when flushes happen - final class FlushControl { - - private boolean flushPending; - private boolean flushDeletes; - private int delCount; - private int docCount; - private boolean flushing; - - private synchronized boolean setFlushPending(String reason, boolean doWait) { - if (flushPending || flushing) { - if (doWait) { - while(flushPending || flushing) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - } - return false; - } else { - if (infoStream != null) { - message("now trigger flush reason=" + reason); - } - flushPending = true; - return flushPending; - } - } - - public synchronized void setFlushPendingNoWait(String reason) { - setFlushPending(reason, false); - } - - public synchronized boolean getFlushPending() { - return flushPending; - } - - public synchronized boolean getFlushDeletes() { - return flushDeletes; - } - - public synchronized void clearFlushPending() { - if (infoStream != null) { - message("clearFlushPending"); - } - flushPending = false; - flushDeletes = false; - docCount = 0; - notifyAll(); - } - - public synchronized void clearDeletes() { - delCount = 0; - } - - public synchronized boolean waitUpdate(int docInc, int delInc) { - return waitUpdate(docInc, delInc, false); - } - - public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) { - while(flushPending) { - try { - wait(); - } catch (InterruptedException ie) { - throw new ThreadInterruptedException(ie); - } - } - - // skipWait is only used when a thread is BOTH adding - // a doc and buffering a del term, and, the adding of - // the doc already triggered a flush - if (skipWait) { - docCount += docInc; - delCount += delInc; - return false; - } - - final int maxBufferedDocs = config.getMaxBufferedDocs(); - if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (docCount+docInc) >= maxBufferedDocs) { - return setFlushPending("maxBufferedDocs", true); - } - docCount += docInc; - - final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms(); - if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH && - (delCount+delInc) >= maxBufferedDeleteTerms) { - flushDeletes = true; - return setFlushPending("maxBufferedDeleteTerms", true); - } - delCount += delInc; - - return flushByRAMUsage("add delete/doc"); - } - - public synchronized boolean flushByRAMUsage(String reason) { - final double ramBufferSizeMB = config.getRAMBufferSizeMB(); - if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) { - final long limit = (long) (ramBufferSizeMB*1024*1024); - long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - - // DocumentsWriter may be able to free up some - // RAM: - // Lock order: FC -> DW - docWriter.balanceRAM(); - - used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed(); - if (used >= limit) { - return setFlushPending("ram full: " + reason, false); - } - } - } - return false; - } - } - - final FlushControl flushControl = new FlushControl(); } Index: lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (working copy) @@ -18,7 +18,7 @@ */ import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriter.IndexReaderWarmer; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.IndexSearcher; @@ -41,7 +41,7 @@ * IndexWriterConfig conf = new IndexWriterConfig(analyzer); * conf.setter1().setter2(); * - * + * * @since 3.1 */ public final class IndexWriterConfig implements Cloneable { @@ -56,7 +56,7 @@ * */ public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND } - + /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */ public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here @@ -77,23 +77,19 @@ /** * Default value for the write lock timeout (1,000 ms). - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long WRITE_LOCK_TIMEOUT = 1000; - /** The maximum number of simultaneous threads that may be - * indexing documents at once in IndexWriter; if more - * than this many threads arrive they will wait for - * others to finish. */ - public final static int DEFAULT_MAX_THREAD_STATES = 8; - /** Default setting for {@link #setReaderPooling}. */ public final static boolean DEFAULT_READER_POOLING = false; /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */ public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR; + /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */ + public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945; /** * Sets the default (for any instance) maximum time to wait for a write lock * (in milliseconds). @@ -105,7 +101,7 @@ /** * Returns the default write lock timeout for newly instantiated * IndexWriterConfigs. - * + * * @see #setDefaultWriteLockTimeout(long) */ public static long getDefaultWriteLockTimeout() { @@ -127,10 +123,12 @@ private volatile IndexReaderWarmer mergedSegmentWarmer; private volatile CodecProvider codecProvider; private volatile MergePolicy mergePolicy; - private volatile int maxThreadStates; + private volatile DocumentsWriterPerThreadPool indexerThreadPool; private volatile boolean readerPooling; private volatile int readerTermsIndexDivisor; - + private volatile FlushPolicy flushPolicy; + private volatile int perThreadHardLimitMB; + private Version matchVersion; /** @@ -153,15 +151,16 @@ maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB; maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS; - indexingChain = DocumentsWriter.defaultIndexingChain; + indexingChain = DocumentsWriterPerThread.defaultIndexingChain; mergedSegmentWarmer = null; codecProvider = CodecProvider.getDefault(); mergePolicy = new TieredMergePolicy(); - maxThreadStates = DEFAULT_MAX_THREAD_STATES; readerPooling = DEFAULT_READER_POOLING; + indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(); readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR; + perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB; } - + @Override public Object clone() { // Shallow clone is the only thing that's possible, since parameters like @@ -186,7 +185,7 @@ this.openMode = openMode; return this; } - + /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */ public OpenMode getOpenMode() { return openMode; @@ -261,7 +260,7 @@ public SimilarityProvider getSimilarityProvider() { return similarityProvider; } - + /** * Expert: set the interval between indexed terms. Large values cause less * memory to be used by IndexReader, but slow random-access to terms. Small @@ -281,7 +280,7 @@ * In particular, numUniqueTerms/interval terms are read into * memory by an IndexReader, and, on average, interval/2 terms * must be scanned for each random term access. - * + * * @see #DEFAULT_TERM_INDEX_INTERVAL * *

Takes effect immediately, but only applies to newly @@ -293,7 +292,7 @@ /** * Returns the interval between indexed terms. - * + * * @see #setTermIndexInterval(int) */ public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here @@ -331,10 +330,10 @@ this.writeLockTimeout = writeLockTimeout; return this; } - + /** * Returns allowed timeout when acquiring the write lock. - * + * * @see #setWriteLockTimeout(long) */ public long getWriteLockTimeout() { @@ -343,15 +342,16 @@ /** * Determines the minimal number of delete terms required before the buffered - * in-memory delete terms are applied and flushed. If there are documents - * buffered in memory at the time, they are merged and a new segment is - * created. - - *

Disabled by default (writer flushes by RAM usage). + * in-memory delete terms and queries are applied and flushed. + *

Disabled by default (writer flushes by RAM usage).

+ *

+ * NOTE: This setting won't trigger a segment flush. + *

* * @throws IllegalArgumentException if maxBufferedDeleteTerms * is enabled but smaller than 1 * @see #setRAMBufferSizeMB + * @see #setFlushPolicy(FlushPolicy) * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. @@ -366,9 +366,9 @@ } /** - * Returns the number of buffered deleted terms that will trigger a flush if - * enabled. - * + * Returns the number of buffered deleted terms that will trigger a flush of all + * buffered deletes if enabled. + * * @see #setMaxBufferedDeleteTerms(int) */ public int getMaxBufferedDeleteTerms() { @@ -380,45 +380,50 @@ * and deletions before they are flushed to the Directory. Generally for * faster indexing performance it's best to flush by RAM usage instead of * document count and use as large a RAM buffer as you can. - * *

* When this is set, the writer will flush whenever buffered documents and * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent * triggering a flush due to RAM usage. Note that if flushing by document * count is also enabled, then the flush will be triggered by whichever comes * first. - * *

+ * The maximum RAM limit is inherently determined by the JVMs available memory. + * Yet, an {@link IndexWriter} session can consume a significantly larger amount + * of memory than the given RAM limit since this limit is just an indicator when + * to flush memory resident documents to the Directory. Flushes are likely happen + * concurrently while other threads adding documents to the writer. For application + * stability the available memory in the JVM should be significantly larger than + * the RAM buffer used for indexing. + *

* NOTE: the account of RAM usage for pending deletions is only * approximate. Specifically, if you delete by Query, Lucene currently has no * way to measure the RAM usage of individual Queries so the accounting will * under-estimate and you should compensate by either calling commit() * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)} - * to flush by count instead of RAM usage (each buffered delete Query counts - * as one). - * + * to flush and apply buffered deletes by count instead of RAM usage + * (for each buffered delete Query a constant number of bytes is used to estimate + * RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will + * not trigger any segment flushes. *

- * NOTE: because IndexWriter uses ints when managing its - * internal storage, the absolute maximum value for this setting is somewhat - * less than 2048 MB. The precise limit depends on various factors, such as - * how large your documents are, how many fields have norms, etc., so it's - * best to set this value comfortably under 2048. + * NOTE: It's not guaranteed that all memory resident documents are flushed + * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a + * subset of the buffered documents are flushed and therefore only parts of the RAM + * buffer is released. + *

* - *

* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. - * + * @see #setFlushPolicy(FlushPolicy) + * @see #setRAMPerThreadHardLimitMB(int) + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @throws IllegalArgumentException * if ramBufferSize is enabled but non-positive, or it disables * ramBufferSize when maxBufferedDocs is already disabled + * */ public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { - if (ramBufferSizeMB > 2048.0) { - throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB - + " is too large; should be comfortably less than 2048"); - } if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) throw new IllegalArgumentException( "ramBufferSize should be > 0.0 MB when enabled"); @@ -438,22 +443,22 @@ * Determines the minimal number of documents required before the buffered * in-memory documents are flushed as a new Segment. Large values generally * give faster indexing. - * + * *

* When this is set, the writer will flush every maxBufferedDocs added * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a * flush due to number of buffered documents. Note that if flushing by RAM * usage is also enabled, then the flush will be triggered by whichever comes * first. - * + * *

* Disabled by default (writer flushes by RAM usage). - * + * *

Takes effect immediately, but only the next time a * document is added, updated or deleted. * * @see #setRAMBufferSizeMB(double) - * + * @see #setFlushPolicy(FlushPolicy) * @throws IllegalArgumentException * if maxBufferedDocs is enabled but smaller than 2, or it disables * maxBufferedDocs when ramBufferSize is already disabled @@ -473,7 +478,7 @@ /** * Returns the number of buffered added documents that will trigger a flush if * enabled. - * + * * @see #setMaxBufferedDocs(int) */ public int getMaxBufferedDocs() { @@ -519,32 +524,43 @@ return codecProvider; } - + /** * Returns the current MergePolicy in use by this writer. - * + * * @see #setMergePolicy(MergePolicy) */ public MergePolicy getMergePolicy() { return mergePolicy; } - /** - * Sets the max number of simultaneous threads that may be indexing documents - * at once in IndexWriter. Values < 1 are invalid and if passed - * maxThreadStates will be set to - * {@link #DEFAULT_MAX_THREAD_STATES}. - * - *

Only takes effect when IndexWriter is first created. */ - public IndexWriterConfig setMaxThreadStates(int maxThreadStates) { - this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates; + /** Expert: Sets the {@link DocumentsWriterPerThreadPool} instance used by the + * IndexWriter to assign thread-states to incoming indexing threads. If no + * {@link DocumentsWriterPerThreadPool} is set {@link IndexWriter} will use + * {@link ThreadAffinityDocumentsWriterThreadPool} with max number of + * thread-states set to {@value DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES} (see + * {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES}). + *

+ *

+ * NOTE: The given {@link DocumentsWriterPerThreadPool} instance must not be used with + * other {@link IndexWriter} instances once it has been initialized / associated with an + * {@link IndexWriter}. + *

+ *

+ * NOTE: This only takes effect when IndexWriter is first created.

*/ + public IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) { + if(threadPool == null) { + throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul"); + } + this.indexerThreadPool = threadPool; return this; } - /** Returns the max number of simultaneous threads that - * may be indexing documents at once in IndexWriter. */ - public int getMaxThreadStates() { - return maxThreadStates; + /** Returns the configured {@link DocumentsWriterPerThreadPool} instance. + * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool) + * @return the configured {@link DocumentsWriterPerThreadPool} instance.*/ + public DocumentsWriterPerThreadPool getIndexerThreadPool() { + return this.indexerThreadPool; } /** By default, IndexWriter does not pool the @@ -572,10 +588,10 @@ * *

Only takes effect when IndexWriter is first created. */ IndexWriterConfig setIndexingChain(IndexingChain indexingChain) { - this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain; + this.indexingChain = indexingChain == null ? DocumentsWriterPerThread.defaultIndexingChain : indexingChain; return this; } - + /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */ IndexingChain getIndexingChain() { return indexingChain; @@ -604,6 +620,53 @@ return readerTermsIndexDivisor; } + /** + * Expert: Controls when segments are flushed to disk during indexing. + * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized + * the given instance is bound to this {@link IndexWriter} and should not be used with another writer. + * @see #setMaxBufferedDeleteTerms(int) + * @see #setMaxBufferedDocs(int) + * @see #setRAMBufferSizeMB(double) + */ + public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) { + this.flushPolicy = flushPolicy; + return this; + } + + /** + * Expert: Sets the maximum memory consumption per thread triggering a forced + * flush if exceeded. A {@link DocumentsWriterPerThread} is forcefully flushed + * once it exceeds this limit even if the {@link #getRAMBufferSizeMB()} has + * not been exceeded. This is a safety limit to prevent a + * {@link DocumentsWriterPerThread} from address space exhaustion due to its + * internal 32 bit signed integer based memory addressing. + * The given value must be less that 2GB (2048MB) + * + * @see #DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB + */ + public IndexWriterConfig setRAMPerThreadHardLimitMB(int perThreadHardLimitMB) { + if (perThreadHardLimitMB <= 0 || perThreadHardLimitMB >= 2048) { + throw new IllegalArgumentException("PerThreadHardLimit must be greater than 0 and less than 2048MB"); + } + this.perThreadHardLimitMB = perThreadHardLimitMB; + return this; + } + + /** + * Returns the max amount of memory each {@link DocumentsWriterPerThread} can + * consume until forcefully flushed. + * @see #setRAMPerThreadHardLimitMB(int) + */ + public int getRAMPerThreadHardLimitMB() { + return perThreadHardLimitMB; + } + /** + * @see #setFlushPolicy(FlushPolicy) + */ + public FlushPolicy getFlushPolicy() { + return flushPolicy; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -623,9 +686,13 @@ sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n"); sb.append("codecProvider=").append(codecProvider).append("\n"); sb.append("mergePolicy=").append(mergePolicy).append("\n"); - sb.append("maxThreadStates=").append(maxThreadStates).append("\n"); + sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n"); sb.append("readerPooling=").append(readerPooling).append("\n"); sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n"); + sb.append("flushPolicy=").append(flushPolicy).append("\n"); + sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n"); + return sb.toString(); } + } Index: lucene/src/java/org/apache/lucene/index/IntBlockPool.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IntBlockPool.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/IntBlockPool.java (working copy) @@ -1,5 +1,7 @@ package org.apache.lucene.index; +import java.util.Arrays; + /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -22,24 +24,24 @@ public int[][] buffers = new int[10][]; int bufferUpto = -1; // Which buffer we are upto - public int intUpto = DocumentsWriter.INT_BLOCK_SIZE; // Where we are in head buffer + public int intUpto = DocumentsWriterPerThread.INT_BLOCK_SIZE; // Where we are in head buffer public int[] buffer; // Current head buffer - public int intOffset = -DocumentsWriter.INT_BLOCK_SIZE; // Current head offset + public int intOffset = -DocumentsWriterPerThread.INT_BLOCK_SIZE; // Current head offset - final private DocumentsWriter docWriter; + final private DocumentsWriterPerThread docWriter; - public IntBlockPool(DocumentsWriter docWriter) { + public IntBlockPool(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; } public void reset() { if (bufferUpto != -1) { - if (bufferUpto > 0) - // Recycle all but the first buffer - docWriter.recycleIntBlocks(buffers, 1, 1+bufferUpto); - // Reuse first buffer + if (bufferUpto > 0) { + docWriter.recycleIntBlocks(buffers, 1, bufferUpto-1); + Arrays.fill(buffers, 1, bufferUpto, null); + } bufferUpto = 0; intUpto = 0; intOffset = 0; @@ -57,7 +59,7 @@ bufferUpto++; intUpto = 0; - intOffset += DocumentsWriter.INT_BLOCK_SIZE; + intOffset += DocumentsWriterPerThread.INT_BLOCK_SIZE; } } Index: lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (working copy) @@ -17,21 +17,23 @@ * limitations under the License. */ -import java.util.Collection; +import java.io.IOException; import java.util.Map; -import java.io.IOException; abstract class InvertedDocConsumer { - /** Add a new thread */ - abstract InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - /** Abort (called after hitting AbortException) */ abstract void abort(); /** Flush a new segment */ - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; + abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + + abstract void startDocument() throws IOException; + + abstract void finishDocument() throws IOException; + /** Attempt to free RAM, returning true if any RAM was * freed */ abstract boolean freeRAM(); Index: lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class InvertedDocConsumerPerThread { - abstract void startDocument() throws IOException; - abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (working copy) @@ -17,12 +17,13 @@ * limitations under the License. */ -import java.util.Collection; +import java.io.IOException; import java.util.Map; -import java.io.IOException; abstract class InvertedDocEndConsumer { - abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread); - abstract void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException; abstract void abort(); + abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + abstract void startDocument() throws IOException; + abstract void finishDocument() throws IOException; } Index: lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java (working copy) @@ -1,25 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -abstract class InvertedDocEndConsumerPerThread { - abstract void startDocument(); - abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); - abstract void finishDocument(); - abstract void abort(); -} Index: lucene/src/java/org/apache/lucene/index/NormsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/NormsWriter.java (working copy) @@ -19,11 +19,7 @@ import java.io.IOException; import java.util.Collection; -import java.util.Iterator; -import java.util.HashMap; import java.util.Map; -import java.util.List; -import java.util.ArrayList; import org.apache.lucene.store.IndexOutput; @@ -36,10 +32,6 @@ final class NormsWriter extends InvertedDocEndConsumer { - @Override - public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new NormsWriterPerThread(docInverterPerThread, this); - } @Override public void abort() {} @@ -50,40 +42,11 @@ /** Produce _X.nrm if any document had a field with norms * not disabled */ @Override - public void flush(Map> threadsAndFields, SegmentWriteState state) throws IOException { - - final Map> byField = new HashMap>(); - + public void flush(Map fieldsToFlush, SegmentWriteState state) throws IOException { if (!state.fieldInfos.hasNorms()) { return; } - // Typically, each thread will have encountered the same - // field. So first we collate by field, ie, all - // per-thread field instances that correspond to the - // same FieldInfo - for (final Map.Entry> entry : threadsAndFields.entrySet()) { - final Collection fields = entry.getValue(); - final Iterator fieldsIt = fields.iterator(); - - while (fieldsIt.hasNext()) { - final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next(); - - if (perField.upto > 0) { - // It has some norms - List l = byField.get(perField.fieldInfo); - if (l == null) { - l = new ArrayList(); - byField.put(perField.fieldInfo, l); - } - l.add(perField); - } else - // Remove this field since we haven't seen it - // since the previous flush - fieldsIt.remove(); - } - } - final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION); IndexOutput normsOut = state.directory.createOutput(normsFileName); @@ -93,60 +56,25 @@ int normCount = 0; for (FieldInfo fi : state.fieldInfos) { - final List toMerge = byField.get(fi); + final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi); int upto = 0; - if (toMerge != null) { - - final int numFields = toMerge.size(); - + if (toWrite != null && toWrite.upto > 0) { normCount++; - final NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; - int[] uptos = new int[numFields]; - - for(int j=0;j 0) { - - assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length); - - int minLoc = 0; - int minDocID = fields[0].docIDs[uptos[0]]; - - for(int j=1;j { - final NormsWriterPerThread perThread; final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final Similarity similarity; // Holds all docID/norm pairs we've seen @@ -46,10 +45,9 @@ upto = 0; } - public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) { - this.perThread = perThread; + public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; - docState = perThread.docState; + docState = docInverterPerField.docState; fieldState = docInverterPerField.fieldState; similarity = docState.similarityProvider.get(fieldInfo.name); } Index: lucene/src/java/org/apache/lucene/index/NormsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/NormsWriterPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/NormsWriterPerThread.java (working copy) @@ -1,45 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -final class NormsWriterPerThread extends InvertedDocEndConsumerPerThread { - final NormsWriter normsWriter; - final DocumentsWriter.DocState docState; - - public NormsWriterPerThread(DocInverterPerThread docInverterPerThread, NormsWriter normsWriter) { - this.normsWriter = normsWriter; - docState = docInverterPerThread.docState; - } - - @Override - InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { - return new NormsWriterPerField(docInverterPerField, this, fieldInfo); - } - - @Override - void abort() {} - - @Override - void startDocument() {} - @Override - void finishDocument() {} - - boolean freeRAM() { - return false; - } -} Index: lucene/src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentInfo.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -37,14 +37,14 @@ /** * Information about a segment such as it's name, directory, and files related * to the segment. - * + * * @lucene.experimental */ public final class SegmentInfo { static final int NO = -1; // e.g. no norms; no deletes; static final int YES = 1; // e.g. have norms; have deletes; - static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. + static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. public String name; // unique name in dir public int docCount; // number of docs in seg @@ -56,7 +56,7 @@ * - YES or higher if there are deletes at generation N */ private long delGen; - + /* * Current generation of each field's norm file. If this array is null, * means no separate norms. If this array is not null, its values mean: @@ -65,7 +65,7 @@ */ private Map normGen; - private boolean isCompoundFile; + private boolean isCompoundFile; private volatile List files; // cached list of files that this segment uses // in the Directory @@ -73,10 +73,13 @@ private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand) private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand) + //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) private int docStoreOffset; // if this segment shares stored fields & vectors, this // offset is where in that file this segment's docs begin + //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) private String docStoreSegment; // name used to derive fields/vectors file we share with // other segments + //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) private int delCount; // How many deleted docs in this segment @@ -91,9 +94,9 @@ private Map diagnostics; - // Tracks the Lucene version this segment was created with, since 3.1. Null + // Tracks the Lucene version this segment was created with, since 3.1. Null // indicates an older than 3.0 index, and it's used to detect a too old index. - // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and + // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and // specific versions afterwards ("3.0", "3.1" etc.). // see Constants.LUCENE_MAIN_VERSION. private String version; @@ -101,7 +104,7 @@ // NOTE: only used in-RAM by IW to track buffered deletes; // this is never written to/read from the Directory private long bufferedDeletesGen; - + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) { this.name = name; @@ -182,11 +185,13 @@ docStoreSegment = name; docStoreIsCompoundFile = false; } + if (format > DefaultSegmentInfosWriter.FORMAT_4_0) { // pre-4.0 indexes write a byte if there is a single norms file byte b = input.readByte(); assert 1 == b; } + int numNormGen = input.readInt(); if (numNormGen == NO) { normGen = null; @@ -207,7 +212,7 @@ assert delCount <= docCount; hasProx = input.readByte() == YES; - + // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name); if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) { segmentCodecs = new SegmentCodecs(codecs, input); @@ -217,7 +222,7 @@ segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")}); } diagnostics = input.readStringStringMap(); - + if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) { hasVectors = input.readByte() == 1; } else { @@ -366,7 +371,7 @@ // against this segment return null; } else { - return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); + return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); } } @@ -432,7 +437,7 @@ if (hasSeparateNorms(number)) { return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number)); } else { - // single file for all norms + // single file for all norms return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN); } } @@ -465,39 +470,74 @@ assert delCount <= docCount; } + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated public int getDocStoreOffset() { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) return docStoreOffset; } - + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated public boolean getDocStoreIsCompoundFile() { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) return docStoreIsCompoundFile; } - - void setDocStoreIsCompoundFile(boolean v) { - docStoreIsCompoundFile = v; + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated + public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) + this.docStoreIsCompoundFile = docStoreIsCompoundFile; clearFilesCache(); } - + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated + void setDocStore(int offset, String segment, boolean isCompoundFile) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) + docStoreOffset = offset; + docStoreSegment = segment; + docStoreIsCompoundFile = isCompoundFile; + clearFilesCache(); + } + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated public String getDocStoreSegment() { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) return docStoreSegment; } - - public void setDocStoreSegment(String segment) { - docStoreSegment = segment; - } - + + /** + * @deprecated shared doc stores are not supported in >= 4.0 + */ + @Deprecated void setDocStoreOffset(int offset) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) docStoreOffset = offset; clearFilesCache(); } - void setDocStore(int offset, String segment, boolean isCompoundFile) { - docStoreOffset = offset; - docStoreSegment = segment; - docStoreIsCompoundFile = isCompoundFile; - clearFilesCache(); + /** + * @deprecated shared doc stores are not supported in 4.0 + */ + @Deprecated + public void setDocStoreSegment(String docStoreSegment) { + // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0) + this.docStoreSegment = docStoreSegment; } - + /** Save this segment's info. */ public void write(IndexOutput output) throws IOException { @@ -507,12 +547,14 @@ output.writeString(name); output.writeInt(docCount); output.writeLong(delGen); + output.writeInt(docStoreOffset); if (docStoreOffset != -1) { output.writeString(docStoreSegment); output.writeByte((byte) (docStoreIsCompoundFile ? 1:0)); } + if (normGen == null) { output.writeInt(NO); } else { @@ -522,7 +564,7 @@ output.writeLong(entry.getValue()); } } - + output.writeByte((byte) (isCompoundFile ? YES : NO)); output.writeInt(delCount); output.writeByte((byte) (hasProx ? 1:0)); @@ -570,9 +612,9 @@ // Already cached: return files; } - + Set fileSet = new HashSet(); - + boolean useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { @@ -606,7 +648,7 @@ fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - } + } } String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); @@ -644,7 +686,7 @@ } /** Used for debugging. Format may suddenly change. - * + * *

Current format looks like * _a(3.1):c45/4->_1, which means the segment's * name is _a; it was created with Lucene 3.1 (or @@ -674,7 +716,7 @@ if (delCount != 0) { s.append('/').append(delCount); } - + if (docStoreOffset != -1) { s.append("->").append(docStoreSegment); if (docStoreIsCompoundFile) { @@ -714,13 +756,13 @@ * NOTE: this method is used for internal purposes only - you should * not modify the version of a SegmentInfo, or it may result in unexpected * exceptions thrown when you attempt to open the index. - * + * * @lucene.internal */ public void setVersion(String version) { this.version = version; } - + /** Returns the version of the code which wrote the segment. */ public String getVersion() { return version; Index: lucene/src/java/org/apache/lucene/index/SegmentMerger.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentMerger.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/SegmentMerger.java (working copy) @@ -39,24 +39,24 @@ /** * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, - * into a single Segment. After adding the appropriate readers, call the merge method to combine the + * into a single Segment. After adding the appropriate readers, call the merge method to combine the * segments. - * + * * @see #merge * @see #add */ final class SegmentMerger { - + /** norms header placeholder */ - static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; - + static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + private Directory directory; private String segment; private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; private List readers = new ArrayList(); private final FieldInfos fieldInfos; - + private int mergedDocs; private final MergeState.CheckAbort checkAbort; @@ -64,13 +64,13 @@ /** Maximum number of contiguous documents to bulk-copy when merging stored fields */ private final static int MAX_RAW_MERGE_DOCS = 4192; - + private final CodecProvider codecs; private Codec codec; private SegmentWriteState segmentWriteState; private PayloadProcessorProvider payloadProcessorProvider; - + SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) { this.payloadProcessorProvider = payloadProcessorProvider; directory = dir; @@ -135,10 +135,10 @@ for (String file : files) { cfsWriter.addFile(file); } - + // Perform the merge cfsWriter.close(); - + return files; } @@ -196,13 +196,12 @@ } /** - * + * * @return The number of documents in all of the readers * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ private int mergeFields() throws CorruptIndexException, IOException { - for (IndexReader reader : readers) { if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; @@ -265,7 +264,7 @@ throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption"); segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null); - + return docCount; } @@ -283,7 +282,7 @@ ++j; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { @@ -295,7 +294,7 @@ break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs); fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; @@ -349,7 +348,7 @@ * @throws IOException */ private final void mergeVectors() throws IOException { - TermVectorsWriter termVectorsWriter = + TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); try { @@ -369,7 +368,7 @@ copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader); } else { copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader); - + } } } finally { @@ -402,7 +401,7 @@ ++docNum; continue; } - // We can optimize this case (doing a bulk byte copy) since the field + // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { @@ -414,7 +413,7 @@ break; } } while(numDocs < MAX_RAW_MERGE_DOCS); - + matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.work(300 * numDocs); @@ -425,7 +424,7 @@ // skip deleted docs continue; } - + // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); @@ -434,7 +433,7 @@ } } } - + private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter, final TermVectorsReader matchingVectorsReader, final IndexReader reader) @@ -470,7 +469,7 @@ // Let CodecProvider decide which codec will be used to write // the new segment: - + int docBase = 0; final List fields = new ArrayList(); @@ -498,7 +497,7 @@ mergeState.readerCount = readers.size(); mergeState.fieldInfos = fieldInfos; mergeState.mergedDocCount = mergedDocs; - + // Remap docIDs mergeState.delCounts = new int[mergeState.readerCount]; mergeState.docMaps = new int[mergeState.readerCount][]; @@ -536,7 +535,7 @@ } assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount; } - + if (payloadProcessorProvider != null) { mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory()); } @@ -549,7 +548,7 @@ // apart when we step through the docs enums in // MultiDocsEnum. mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts); - + try { consumer.merge(mergeState, new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), @@ -568,7 +567,7 @@ int[] getDelCounts() { return mergeState.delCounts; } - + public boolean getAnyNonBulkMerges() { assert matchedCount <= readers.size(); return matchedCount != readers.size(); @@ -579,7 +578,7 @@ try { for (FieldInfo fi : fieldInfos) { if (fi.isIndexed && !fi.omitNorms) { - if (output == null) { + if (output == null) { output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION)); output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); } @@ -610,7 +609,7 @@ } } } finally { - if (output != null) { + if (output != null) { output.close(); } } Index: lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (working copy) @@ -18,7 +18,8 @@ */ import java.io.IOException; -import org.apache.lucene.store.RAMOutputStream; + +import org.apache.lucene.document.Fieldable; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -26,22 +27,38 @@ final class StoredFieldsWriter { FieldsWriter fieldsWriter; - final DocumentsWriter docWriter; + final DocumentsWriterPerThread docWriter; int lastDocID; - PerDoc[] docFreeList = new PerDoc[1]; int freeCount; - public StoredFieldsWriter(DocumentsWriter docWriter) { + final DocumentsWriterPerThread.DocState docState; + + public StoredFieldsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + this.docState = docWriter.docState; } - public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException { - return new StoredFieldsWriterPerThread(docState, this); + private int numStoredFields; + private Fieldable[] storedFields; + private int[] fieldNumbers; + + public void reset() { + numStoredFields = 0; + storedFields = new Fieldable[1]; + fieldNumbers = new int[1]; } - synchronized public void flush(SegmentWriteState state) throws IOException { - if (state.numDocs > lastDocID) { + public void startDocument() { + reset(); + } + + public void flush(SegmentWriteState state) throws IOException { + + if (state.numDocs > 0) { + // It's possible that all documents seen in this segment + // hit non-aborting exceptions, in which case we will + // not have yet init'd the FieldsWriter: initFieldsWriter(); fill(state.numDocs); } @@ -67,23 +84,9 @@ int allocCount; - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; - } - } + void abort() { + reset(); - synchronized void abort() { if (fieldsWriter != null) { fieldsWriter.abort(); fieldsWriter = null; @@ -101,53 +104,40 @@ } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + void finishDocument() throws IOException { assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start"); + initFieldsWriter(); + fill(docState.docID); - fill(perDoc.docID); + if (fieldsWriter != null && numStoredFields > 0) { + fieldsWriter.startDocument(numStoredFields); + for (int i = 0; i < numStoredFields; i++) { + fieldsWriter.writeField(fieldNumbers[i], storedFields[i]); + } + lastDocID++; + } - // Append stored fields to the real FieldsWriter: - fieldsWriter.flushDocument(perDoc.numStoredFields, perDoc.fdt); - lastDocID++; - perDoc.reset(); - free(perDoc); + reset(); assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end"); } - synchronized void free(PerDoc perDoc) { - assert freeCount < docFreeList.length; - assert 0 == perDoc.numStoredFields; - assert 0 == perDoc.fdt.length(); - assert 0 == perDoc.fdt.getFilePointer(); - docFreeList[freeCount++] = perDoc; - } - - class PerDoc extends DocumentsWriter.DocWriter { - final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer(); - RAMOutputStream fdt = new RAMOutputStream(buffer); - int numStoredFields; - - void reset() { - fdt.reset(); - buffer.recycle(); - numStoredFields = 0; + public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { + if (numStoredFields == storedFields.length) { + int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + Fieldable[] newArray = new Fieldable[newSize]; + System.arraycopy(storedFields, 0, newArray, 0, numStoredFields); + storedFields = newArray; } - @Override - void abort() { - reset(); - free(this); + if (numStoredFields == fieldNumbers.length) { + fieldNumbers = ArrayUtil.grow(fieldNumbers); } - @Override - public long sizeInBytes() { - return buffer.getSizeInBytes(); - } + storedFields[numStoredFields] = field; + fieldNumbers[numStoredFields] = fieldInfo.number; + numStoredFields++; - @Override - public void finish() throws IOException { - finishDocument(this); - } + assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); } } Index: lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java (working copy) @@ -1,79 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.document.Fieldable; - -final class StoredFieldsWriterPerThread { - - final FieldsWriter localFieldsWriter; - final StoredFieldsWriter storedFieldsWriter; - final DocumentsWriter.DocState docState; - - StoredFieldsWriter.PerDoc doc; - - public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException { - this.storedFieldsWriter = storedFieldsWriter; - this.docState = docState; - localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null); - } - - public void startDocument() { - if (doc != null) { - // Only happens if previous document hit non-aborting - // exception while writing stored fields into - // localFieldsWriter: - doc.reset(); - doc.docID = docState.docID; - } - } - - public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { - if (doc == null) { - doc = storedFieldsWriter.getPerDoc(); - doc.docID = docState.docID; - localFieldsWriter.setFieldsStream(doc.fdt); - assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields; - assert 0 == doc.fdt.length(); - assert 0 == doc.fdt.getFilePointer(); - } - - localFieldsWriter.writeField(fieldInfo, field); - assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField"); - doc.numStoredFields++; - } - - public DocumentsWriter.DocWriter finishDocument() { - // If there were any stored fields in this doc, doc will - // be non-null; else it's null. - try { - return doc; - } finally { - doc = null; - } - } - - public void abort() { - if (doc != null) { - doc.abort(); - doc = null; - } - } -} Index: lucene/src/java/org/apache/lucene/index/TermsHash.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHash.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/TermsHash.java (working copy) @@ -18,12 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; import java.util.Map; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; + /** This class implements {@link InvertedDocConsumer}, which * is passed each token produced by the analyzer on each * field. It stores these tokens in a hash table, and @@ -36,78 +36,118 @@ final TermsHashConsumer consumer; final TermsHash nextTermsHash; - final DocumentsWriter docWriter; + final DocumentsWriterPerThread docWriter; - boolean trackAllocations; + final IntBlockPool intPool; + final ByteBlockPool bytePool; + ByteBlockPool termBytePool; - public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) { + final boolean primary; + final DocumentsWriterPerThread.DocState docState; + + // Used when comparing postings via termRefComp, in TermsHashPerField + final BytesRef tr1 = new BytesRef(); + final BytesRef tr2 = new BytesRef(); + + // Used by perField to obtain terms from the analysis chain + final BytesRef termBytesRef = new BytesRef(10); + + final boolean trackAllocations; + + + public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) { + this.docState = docWriter.docState; this.docWriter = docWriter; this.consumer = consumer; + this.trackAllocations = trackAllocations; this.nextTermsHash = nextTermsHash; - this.trackAllocations = trackAllocations; + intPool = new IntBlockPool(docWriter); + bytePool = new ByteBlockPool(docWriter.byteBlockAllocator); + + if (nextTermsHash != null) { + // We are primary + primary = true; + termBytePool = bytePool; + nextTermsHash.termBytePool = bytePool; + } else { + primary = false; + } } @Override - InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null); + public void abort() { + reset(); + try { + consumer.abort(); + } finally { + if (nextTermsHash != null) { + nextTermsHash.abort(); + } + } } - TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread); - } + // Clear all state + void reset() { + intPool.reset(); + bytePool.reset(); - @Override - public void abort() { - consumer.abort(); - if (nextTermsHash != null) - nextTermsHash.abort(); + if (primary) { + bytePool.reset(); + } } @Override - synchronized void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { - Map> childThreadsAndFields = new HashMap>(); - Map> nextThreadsAndFields; + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { + Map childFields = new HashMap(); + Map nextChildFields; - if (nextTermsHash != null) - nextThreadsAndFields = new HashMap>(); - else - nextThreadsAndFields = null; + if (nextTermsHash != null) { + nextChildFields = new HashMap(); + } else { + nextChildFields = null; + } - for (final Map.Entry> entry : threadsAndFields.entrySet()) { + for (final Map.Entry entry : fieldsToFlush.entrySet()) { + TermsHashPerField perField = (TermsHashPerField) entry.getValue(); + childFields.put(entry.getKey(), perField.consumer); + if (nextTermsHash != null) { + nextChildFields.put(entry.getKey(), perField.nextPerField); + } + } - TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey(); + consumer.flush(childFields, state); - Collection fields = entry.getValue(); + if (nextTermsHash != null) { + nextTermsHash.flush(nextChildFields, state); + } + } - Iterator fieldsIt = fields.iterator(); - Collection childFields = new HashSet(); - Collection nextChildFields; + @Override + InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { + return new TermsHashPerField(docInverterPerField, this, nextTermsHash, fieldInfo); + } - if (nextTermsHash != null) - nextChildFields = new HashSet(); - else - nextChildFields = null; + @Override + public boolean freeRAM() { + return false; + } - while(fieldsIt.hasNext()) { - TermsHashPerField perField = (TermsHashPerField) fieldsIt.next(); - childFields.add(perField.consumer); - if (nextTermsHash != null) - nextChildFields.add(perField.nextPerField); + @Override + void finishDocument() throws IOException { + try { + consumer.finishDocument(this); + } finally { + if (nextTermsHash != null) { + nextTermsHash.consumer.finishDocument(nextTermsHash); } - - childThreadsAndFields.put(perThread.consumer, childFields); - if (nextTermsHash != null) - nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields); } - - consumer.flush(childThreadsAndFields, state); - - if (nextTermsHash != null) - nextTermsHash.flush(nextThreadsAndFields, state); } @Override - synchronized public boolean freeRAM() { - return false; + void startDocument() throws IOException { + consumer.startDocument(); + if (nextTermsHash != null) { + nextTermsHash.consumer.startDocument(); + } } } Index: lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (working copy) @@ -18,11 +18,12 @@ */ import java.io.IOException; -import java.util.Collection; import java.util.Map; abstract class TermsHashConsumer { - abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread); - abstract void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException; + abstract void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException; abstract void abort(); - } + abstract void startDocument() throws IOException; + abstract void finishDocument(TermsHash termsHash) throws IOException; + abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); +} Index: lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java (working copy) @@ -1,27 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -abstract class TermsHashConsumerPerThread { - abstract void startDocument() throws IOException; - abstract DocumentsWriter.DocWriter finishDocument() throws IOException; - abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); - abstract public void abort(); -} Index: lucene/src/java/org/apache/lucene/index/TermsHashPerField.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (working copy) @@ -34,9 +34,10 @@ final TermsHashConsumerPerField consumer; + final TermsHash termsHash; + final TermsHashPerField nextPerField; - final TermsHashPerThread perThread; - final DocumentsWriter.DocState docState; + final DocumentsWriterPerThread.DocState docState; final FieldInvertState fieldState; TermToBytesRefAttribute termAtt; BytesRef termBytesRef; @@ -52,27 +53,27 @@ final FieldInfo fieldInfo; final BytesRefHash bytesHash; - + ParallelPostingsArray postingsArray; private final AtomicLong bytesUsed; - public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { - this.perThread = perThread; - intPool = perThread.intPool; - bytePool = perThread.bytePool; - termBytePool = perThread.termBytePool; - docState = perThread.docState; - bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong(); - + public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHash termsHash, final TermsHash nextTermsHash, final FieldInfo fieldInfo) { + intPool = termsHash.intPool; + bytePool = termsHash.bytePool; + termBytePool = termsHash.termBytePool; + docState = termsHash.docState; + this.termsHash = termsHash; + bytesUsed = termsHash.trackAllocations ? termsHash.docWriter.bytesUsed + : new AtomicLong(); fieldState = docInverterPerField.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); + this.consumer = termsHash.consumer.addField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); - bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); + bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); streamCount = consumer.getStreamCount(); numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; - if (nextPerThread != null) - nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); + if (nextTermsHash != null) + nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo); else nextPerField = null; } @@ -80,7 +81,7 @@ void shrinkHash(int targetSize) { // Fully free the bytesHash on each flush but keep the pool untouched // bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too - bytesHash.clear(false); + bytesHash.clear(false); } public void reset() { @@ -90,7 +91,7 @@ } @Override - synchronized public void abort() { + public void abort() { reset(); if (nextPerField != null) nextPerField.abort(); @@ -99,14 +100,13 @@ public void initReader(ByteSliceReader reader, int termID, int stream) { assert stream < streamCount; int intStart = postingsArray.intStarts[termID]; - final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK; + final int[] ints = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + final int upto = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; reader.init(bytePool, postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto+stream]); } - /** Collapse the hash table & sort in-place. */ public int[] sortPostings(Comparator termComp) { return bytesHash.sort(termComp); @@ -124,7 +124,7 @@ nextPerField.start(f); } } - + @Override boolean start(Fieldable[] fields, int count) throws IOException { doCall = consumer.start(fields, count); @@ -143,11 +143,12 @@ // First time we are seeing this token since we last // flushed the hash. // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) intPool.nextBuffer(); - if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) + if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); + } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; @@ -166,8 +167,8 @@ } else { termID = (-termID)-1; int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } } @@ -192,7 +193,7 @@ if (docState.maxTermPrefix == null) { final int saved = termBytesRef.length; try { - termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8); + termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8); docState.maxTermPrefix = termBytesRef.toString(); } finally { termBytesRef.length = saved; @@ -204,7 +205,7 @@ if (termID >= 0) {// New posting bytesHash.byteStart(termID); // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) { + if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) { intPool.nextBuffer(); } @@ -229,8 +230,8 @@ } else { termID = (-termID)-1; final int intStart = postingsArray.intStarts[termID]; - intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK; + intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT]; + intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK; consumer.addTerm(termID); } @@ -278,7 +279,7 @@ if (nextPerField != null) nextPerField.finish(); } - + private static final class PostingsBytesStartArray extends BytesStartArray { private final TermsHashPerField perField; @@ -289,10 +290,10 @@ this.perField = perField; this.bytesUsed = bytesUsed; } - + @Override public int[] init() { - if(perField.postingsArray == null) { + if(perField.postingsArray == null) { perField.postingsArray = perField.consumer.createPostingsArray(2); bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); } @@ -312,7 +313,7 @@ @Override public int[] clear() { if(perField.postingsArray != null) { - bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); + bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting())); perField.postingsArray = null; } return null; Index: lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java (working copy) @@ -1,96 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.ByteBlockPool; - -import java.io.IOException; - -final class TermsHashPerThread extends InvertedDocConsumerPerThread { - - final TermsHash termsHash; - final TermsHashConsumerPerThread consumer; - final TermsHashPerThread nextPerThread; // the secondary is currently consumed by TermVectorsWriter - // see secondary entry point in TermsHashPerField#add(int) - - final IntBlockPool intPool; - final ByteBlockPool bytePool; - final ByteBlockPool termBytePool; - - final boolean primary; - final DocumentsWriter.DocState docState; - - public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) { - docState = docInverterPerThread.docState; - - this.termsHash = termsHash; - this.consumer = termsHash.consumer.addThread(this); - - intPool = new IntBlockPool(termsHash.docWriter); - bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator); // use the allocator from the docWriter which tracks the used bytes - primary = nextTermsHash != null; - if (primary) { - // We are primary - termBytePool = bytePool; - nextPerThread = nextTermsHash.addThread(docInverterPerThread, this); // this will be the primaryPerThread in the secondary - assert nextPerThread != null; - } else { - assert primaryPerThread != null; - termBytePool = primaryPerThread.bytePool; // we are secondary and share the byte pool with the primary - nextPerThread = null; - } - } - - @Override - InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) { - return new TermsHashPerField(docInverterPerField, this, nextPerThread, fieldInfo); - } - - @Override - synchronized public void abort() { - reset(true); - consumer.abort(); - if (primary) - nextPerThread.abort(); - } - - @Override - public void startDocument() throws IOException { - consumer.startDocument(); - if (primary) - nextPerThread.consumer.startDocument(); - } - - @Override - public DocumentsWriter.DocWriter finishDocument() throws IOException { - final DocumentsWriter.DocWriter doc = consumer.finishDocument(); - final DocumentsWriter.DocWriter docFromSecondary = primary? nextPerThread.consumer.finishDocument():null; - if (doc == null) - return docFromSecondary; - else { - doc.setNext(docFromSecondary); - return doc; - } - } - - // Clear all state - void reset(boolean recyclePostings) { - intPool.reset(); - bytePool.reset(); - } -} Index: lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (revision 1097796) +++ lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (working copy) @@ -17,49 +17,48 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Map; + import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; -import java.io.IOException; -import java.util.Collection; - -import java.util.Map; - final class TermVectorsTermsWriter extends TermsHashConsumer { - final DocumentsWriter docWriter; - PerDoc[] docFreeList = new PerDoc[1]; + final DocumentsWriterPerThread docWriter; int freeCount; IndexOutput tvx; IndexOutput tvd; IndexOutput tvf; int lastDocID; + + final DocumentsWriterPerThread.DocState docState; + final BytesRef flushTerm = new BytesRef(); + + // Used by perField when serializing the term vectors + final ByteSliceReader vectorSliceReader = new ByteSliceReader(); boolean hasVectors; - public TermVectorsTermsWriter(DocumentsWriter docWriter) { + public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) { this.docWriter = docWriter; + docState = docWriter.docState; } @Override - public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) { - return new TermVectorsTermsWriterPerThread(termsHashPerThread, this); - } - - @Override - synchronized void flush(Map> threadsAndFields, final SegmentWriteState state) throws IOException { + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { if (tvx != null) { // At least one doc in this run had term vectors enabled fill(state.numDocs); + assert state.segmentName != null; + String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); tvx.close(); tvf.close(); tvd.close(); tvx = tvd = tvf = null; - assert state.segmentName != null; - String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION); - if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) { + if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) { throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName)); } @@ -68,36 +67,13 @@ hasVectors = false; } - for (Map.Entry> entry : threadsAndFields.entrySet()) { - for (final TermsHashConsumerPerField field : entry.getValue() ) { - TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; - perField.termsHashPerField.reset(); - perField.shrinkHash(); - } - - TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey(); - perThread.termsHashPerThread.reset(true); + for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) { + TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field; + perField.termsHashPerField.reset(); + perField.shrinkHash(); } } - int allocCount; - - synchronized PerDoc getPerDoc() { - if (freeCount == 0) { - allocCount++; - if (allocCount > docFreeList.length) { - // Grow our free list up front to make sure we have - // enough space to recycle all outstanding PerDoc - // instances - assert allocCount == 1+docFreeList.length; - docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - } - return new PerDoc(); - } else { - return docFreeList[--freeCount]; - } - } - /** Fills in no-term-vectors for all docs we haven't seen * since the last doc that had term vectors. */ void fill(int docID) throws IOException { @@ -112,18 +88,17 @@ } } - synchronized void initTermVectorsWriter() throws IOException { + private final void initTermVectorsWriter() throws IOException { if (tvx == null) { // If we hit an exception while init'ing the term // vector output files, we must abort this segment // because those files will be in an unknown // state: - hasVectors = true; tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION)); tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION)); - + tvx.writeInt(TermVectorsReader.FORMAT_CURRENT); tvd.writeInt(TermVectorsReader.FORMAT_CURRENT); tvf.writeInt(TermVectorsReader.FORMAT_CURRENT); @@ -132,39 +107,44 @@ } } - synchronized void finishDocument(PerDoc perDoc) throws IOException { + @Override + void finishDocument(TermsHash termsHash) throws IOException { assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start"); + if (!hasVectors) { + return; + } + initTermVectorsWriter(); - fill(perDoc.docID); + fill(docState.docID); // Append term vectors to the real outputs: - tvx.writeLong(tvd.getFilePointer()); + long pointer = tvd.getFilePointer(); + tvx.writeLong(pointer); tvx.writeLong(tvf.getFilePointer()); - tvd.writeVInt(perDoc.numVectorFields); - if (perDoc.numVectorFields > 0) { - for(int i=0;i 0) { + for(int i=0;i= 0; - if (!doVectors || numPostings == 0) - return; - if (numPostings > maxNumPostings) maxNumPostings = numPostings; - final IndexOutput tvf = perThread.doc.perDocTvf; - // This is called once, after inverting all occurrences // of a given field in the doc. At this point we flush // our hash into the DocWriter. assert fieldInfo.storeTermVector; - assert perThread.vectorFieldsInOrder(fieldInfo); + assert termsWriter.vectorFieldsInOrder(fieldInfo); - perThread.doc.addField(termsHashPerField.fieldInfo.number); TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray; + final IndexOutput tvf = termsWriter.tvf; // TODO: we may want to make this sort in same order // as Codec's terms dict? @@ -140,21 +128,21 @@ byte bits = 0x0; if (doVectorPositions) bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; - if (doVectorOffsets) + if (doVectorOffsets) bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; tvf.writeByte(bits); int lastLen = 0; byte[] lastBytes = null; int lastStart = 0; - - final ByteSliceReader reader = perThread.vectorSliceReader; - final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool; + final ByteSliceReader reader = termsWriter.vectorSliceReader; + final ByteBlockPool termBytePool = termsHashPerField.termBytePool; + for(int j=0;j createdFiles; Set openFilesForWrite = new HashSet(); volatile boolean crashed; + private ThrottledIndexOutput throttledOutput; // use this for tracking files for crash. // additionally: provides debugging information in case you leave one open @@ -114,6 +116,10 @@ public void setPreventDoubleWrite(boolean value) { preventDoubleWrite = value; } + + public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) { + this.throttledOutput = throttledOutput; + } @Override public synchronized void sync(Collection names) throws IOException { @@ -348,7 +354,7 @@ IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name); openFileHandles.put(io, new RuntimeException("unclosed IndexOutput")); openFilesForWrite.add(name); - return io; + return throttledOutput == null ? io : throttledOutput.newFromDelegate(io); } @Override @@ -578,4 +584,5 @@ maybeYield(); delegate.copy(to, src, dest); } + } Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1097796) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -116,7 +116,7 @@ * If this is set, it is the only method that should run. */ static final String TEST_METHOD; - + /** Create indexes in this directory, optimally use a subdir, named after the test */ public static final File TEMP_DIR; static { @@ -163,11 +163,11 @@ * multiply it by the number of iterations */ public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1")); - + private int savedBoolMaxClauseCount; private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null; - + /** Used to track if setUp and tearDown are called correctly from subclasses */ private boolean setup; @@ -189,28 +189,28 @@ private static class UncaughtExceptionEntry { public final Thread thread; public final Throwable exception; - + public UncaughtExceptionEntry(Thread thread, Throwable exception) { this.thread = thread; this.exception = exception; } } private List uncaughtExceptions = Collections.synchronizedList(new ArrayList()); - + // saves default codec: we do this statically as many build indexes in @beforeClass private static String savedDefaultCodec; // default codec: not set when we use a per-field provider. private static Codec codec; // default codec provider private static CodecProvider savedCodecProvider; - + private static Locale locale; private static Locale savedLocale; private static TimeZone timeZone; private static TimeZone savedTimeZone; - + private static Map stores; - + private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"}; private static void swapCodec(Codec c, CodecProvider cp) { @@ -288,7 +288,7 @@ // randomly picks from core and test codecs static String pickRandomCodec(Random rnd) { - int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + + int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length + TEST_CODECS.length); if (idx < CodecProvider.CORE_CODECS.length) { return CodecProvider.CORE_CODECS[idx]; @@ -321,7 +321,7 @@ /** @deprecated (4.0) until we fix no-fork problems in solr tests */ @Deprecated private static List testClassesRun = new ArrayList(); - + @BeforeClass public static void beforeClassLuceneTestCaseJ4() { staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1; @@ -347,7 +347,7 @@ TimeZone.setDefault(timeZone); testsFailed = false; } - + @AfterClass public static void afterClassLuceneTestCaseJ4() { if (! "false".equals(TEST_CLEAN_THREADS)) { @@ -363,12 +363,12 @@ if ("randomPerField".equals(TEST_CODEC)) { if (cp instanceof RandomCodecProvider) codecDescription = cp.toString(); - else + else codecDescription = "PreFlex"; } else { codecDescription = codec.toString(); } - + if (CodecProvider.getDefault() == savedCodecProvider) removeTestCodecs(codec, CodecProvider.getDefault()); CodecProvider.setDefault(savedCodecProvider); @@ -398,14 +398,14 @@ stores = null; // if verbose or tests failed, report some information back if (VERBOSE || testsFailed) - System.err.println("NOTE: test params are: codec=" + codecDescription + - ", locale=" + locale + + System.err.println("NOTE: test params are: codec=" + codecDescription + + ", locale=" + locale + ", timezone=" + (timeZone == null ? "(null)" : timeZone.getID())); if (testsFailed) { System.err.println("NOTE: all tests run in this JVM:"); System.err.println(Arrays.toString(testClassesRun.toArray())); - System.err.println("NOTE: " + System.getProperty("os.name") + " " - + System.getProperty("os.version") + " " + System.err.println("NOTE: " + System.getProperty("os.name") + " " + + System.getProperty("os.version") + " " + System.getProperty("os.arch") + "/" + System.getProperty("java.vendor") + " " + System.getProperty("java.version") + " " @@ -428,7 +428,7 @@ } private static boolean testsFailed; /* true if any tests failed */ - + // This is how we get control when errors occur. // Think of this as start/end/success/failed // events. @@ -463,7 +463,7 @@ LuceneTestCase.this.name = method.getName(); super.starting(method); } - + }; @Before @@ -481,7 +481,7 @@ savedUncaughtExceptionHandler.uncaughtException(t, e); } }); - + savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount(); } @@ -513,7 +513,7 @@ if ("perMethod".equals(TEST_CLEAN_THREADS)) { int rogueThreads = threadCleanup("test method: '" + getName() + "'"); if (rogueThreads > 0) { - System.err.println("RESOURCE LEAK: test method: '" + getName() + System.err.println("RESOURCE LEAK: test method: '" + getName() + "' left " + rogueThreads + " thread(s) running"); // TODO: fail, but print seed for now. if (!testsFailed && uncaughtExceptions.isEmpty()) { @@ -535,18 +535,18 @@ fail("Some threads threw uncaught exceptions!"); } - // calling assertSaneFieldCaches here isn't as useful as having test - // classes call it directly from the scope where the index readers - // are used, because they could be gc'ed just before this tearDown + // calling assertSaneFieldCaches here isn't as useful as having test + // classes call it directly from the scope where the index readers + // are used, because they could be gc'ed just before this tearDown // method is called. // // But it's better then nothing. // - // If you are testing functionality that you know for a fact - // "violates" FieldCache sanity, then you should either explicitly + // If you are testing functionality that you know for a fact + // "violates" FieldCache sanity, then you should either explicitly // call purgeFieldCache at the end of your test method, or refactor - // your Test class so that the inconsistant FieldCache usages are - // isolated in distinct test methods + // your Test class so that the inconsistant FieldCache usages are + // isolated in distinct test methods assertSaneFieldCaches(getTestLabel()); } finally { @@ -557,14 +557,14 @@ private final static int THREAD_STOP_GRACE_MSEC = 50; // jvm-wide list of 'rogue threads' we found, so they only get reported once. private final static IdentityHashMap rogueThreads = new IdentityHashMap(); - + static { // just a hack for things like eclipse test-runner threads for (Thread t : Thread.getAllStackTraces().keySet()) { rogueThreads.put(t, true); } } - + /** * Looks for leftover running threads, trying to kill them off, * so they don't fail future tests. @@ -575,20 +575,20 @@ Thread[] stillRunning = new Thread[Thread.activeCount()+1]; int threadCount = 0; int rogueCount = 0; - + if ((threadCount = Thread.enumerate(stillRunning)) > 1) { while (threadCount == stillRunning.length) { // truncated response stillRunning = new Thread[stillRunning.length*2]; threadCount = Thread.enumerate(stillRunning); } - + for (int i = 0; i < threadCount; i++) { Thread t = stillRunning[i]; - - if (t.isAlive() && - !rogueThreads.containsKey(t) && - t != Thread.currentThread() && + + if (t.isAlive() && + !rogueThreads.containsKey(t) && + t != Thread.currentThread() && /* its ok to keep your searcher across test cases */ (t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) { System.err.println("WARNING: " + context + " left thread running: " + t); @@ -613,7 +613,7 @@ } return rogueCount; } - + /** * Asserts that FieldCacheSanityChecker does not detect any * problems with FieldCache.DEFAULT. @@ -656,13 +656,13 @@ } } - + // @deprecated (4.0) These deprecated methods should be removed soon, when all tests using no Epsilon are fixed: @Deprecated static public void assertEquals(double expected, double actual) { assertEquals(null, expected, actual); } - + @Deprecated static public void assertEquals(String message, double expected, double actual) { assertEquals(message, Double.valueOf(expected), Double.valueOf(actual)); @@ -677,18 +677,18 @@ static public void assertEquals(String message, float expected, float actual) { assertEquals(message, Float.valueOf(expected), Float.valueOf(actual)); } - + // Replacement for Assume jUnit class, so we can add a message with explanation: - + private static final class TestIgnoredException extends RuntimeException { TestIgnoredException(String msg) { super(msg); } - + TestIgnoredException(String msg, Throwable t) { super(msg, t); } - + @Override public String getMessage() { StringBuilder sb = new StringBuilder(super.getMessage()); @@ -696,7 +696,7 @@ sb.append(" - ").append(getCause()); return sb.toString(); } - + // only this one is called by our code, exception is not used outside this class: @Override public void printStackTrace(PrintStream s) { @@ -708,19 +708,19 @@ } } } - + public static void assumeTrue(String msg, boolean b) { Assume.assumeNoException(b ? null : new TestIgnoredException(msg)); } - + public static void assumeFalse(String msg, boolean b) { assumeTrue(msg, !b); } - + public static void assumeNoException(String msg, Exception e) { Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e)); } - + public static Set asSet(T... args) { return new HashSet(Arrays.asList(args)); } @@ -778,7 +778,7 @@ c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000)); } if (r.nextBoolean()) { - c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20)); + c.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(_TestUtil.nextInt(r, 1, 20))); } if (r.nextBoolean()) { @@ -864,7 +864,7 @@ public static MockDirectoryWrapper newDirectory() throws IOException { return newDirectory(random); } - + /** * Returns a new Directory instance, using the specified random. * See {@link #newDirectory()} for more information. @@ -875,7 +875,7 @@ stores.put(dir, Thread.currentThread().getStackTrace()); return dir; } - + /** * Returns a new Directory instance, with contents copied from the * provided directory. See {@link #newDirectory()} for more @@ -884,23 +884,23 @@ public static MockDirectoryWrapper newDirectory(Directory d) throws IOException { return newDirectory(random, d); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f) throws IOException { return newFSDirectory(f, null); } - + /** Returns a new FSDirectory instance over the given file, which must be a folder. */ public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException { String fsdirClass = TEST_DIRECTORY; if (fsdirClass.equals("random")) { fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; } - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + Class clazz; try { try { @@ -908,11 +908,11 @@ } catch (ClassCastException e) { // TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)]; - + if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store fsdirClass = "org.apache.lucene.store." + fsdirClass; } - + clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class); } MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f, lf)); @@ -922,7 +922,7 @@ throw new RuntimeException(e); } } - + /** * Returns a new Directory instance, using the specified random * with contents copied from the provided directory. See @@ -980,44 +980,44 @@ public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) { if (!index.isIndexed()) return new Field(name, value, store, index); - + if (!store.isStored() && random.nextBoolean()) store = Store.YES; // randomly store it - + tv = randomTVSetting(random, tv); - + return new Field(name, value, store, index, tv); } - - static final TermVector tvSettings[] = { - TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, - TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS + + static final TermVector tvSettings[] = { + TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, + TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS }; - + private static TermVector randomTVSetting(Random random, TermVector minimum) { switch(minimum) { case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)]; case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)]; - case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS + case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS : TermVector.WITH_POSITIONS_OFFSETS; - case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS + case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS : TermVector.WITH_POSITIONS_OFFSETS; default: return TermVector.WITH_POSITIONS_OFFSETS; } } - + /** return a random Locale from the available locales on the system */ public static Locale randomLocale(Random random) { Locale locales[] = Locale.getAvailableLocales(); return locales[random.nextInt(locales.length)]; } - + /** return a random TimeZone from the available timezones on the system */ public static TimeZone randomTimeZone(Random random) { String tzIds[] = TimeZone.getAvailableIDs(); return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]); } - + /** return a Locale object equivalent to its programmatic name */ public static Locale localeForName(String localeName) { String elements[] = localeName.split("\\_"); @@ -1039,7 +1039,7 @@ "RAMDirectory", FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2] }; - + public static String randomDirectory(Random random) { if (random.nextInt(10) == 0) { return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)]; @@ -1064,7 +1064,7 @@ return FSDirectory.open(file); } } - + static Directory newDirectoryImpl(Random random, String clazzName) { if (clazzName.equals("random")) clazzName = randomDirectory(random); @@ -1085,9 +1085,9 @@ return clazz.newInstance(); } catch (Exception e) { throw new RuntimeException(e); - } + } } - + /** create a new searcher over the reader. * This searcher might randomly use threads. */ public static IndexSearcher newSearcher(IndexReader r) throws IOException { @@ -1095,8 +1095,8 @@ return new IndexSearcher(r); } else { int threads = 0; - final ExecutorService ex = (random.nextBoolean()) ? null - : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), + final ExecutorService ex = (random.nextBoolean()) ? null + : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8), new NamedThreadFactory("LuceneTestCase")); if (ex != null && VERBOSE) { System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); @@ -1121,12 +1121,12 @@ public String getName() { return this.name; } - + /** Gets a resource from the classpath as {@link File}. This method should only be used, * if a real file is needed. To get a stream, code should prefer * {@link Class#getResourceAsStream} using {@code this.getClass()}. */ - + protected File getDataFile(String name) throws IOException { try { return new File(this.getClass().getResource(name).toURI()); @@ -1137,11 +1137,11 @@ // We get here from InterceptTestCaseEvents on the 'failed' event.... public void reportAdditionalFailureInfo() { - System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName() + " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed) + reproduceWithExtraParams()); } - + // extra params that were overridden needed to reproduce the command private String reproduceWithExtraParams() { StringBuilder sb = new StringBuilder(); @@ -1157,12 +1157,12 @@ private static long staticSeed; // seed for individual test methods, changed in @before private long seed; - + private static final Random seedRand = new Random(); protected static final Random random = new Random(0); private String name = ""; - + /** * Annotation for tests that should only be run during nightly builds. */ @@ -1170,7 +1170,7 @@ @Inherited @Retention(RetentionPolicy.RUNTIME) public @interface Nightly {} - + /** optionally filters the tests to be run by TEST_METHOD */ public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner { private List testMethods; @@ -1200,11 +1200,11 @@ testMethods.add(new FrameworkMethod(m)); } } - + if (testMethods.isEmpty()) { throw new RuntimeException("No runnable methods!"); } - + if (TEST_NIGHTLY == false) { if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) { /* the test class is annotated with nightly, remove all methods */ @@ -1265,9 +1265,9 @@ @Override public boolean shouldRun(Description d) { return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD); - } + } }; - + try { f.apply(this); } catch (NoTestsRemainException e) { @@ -1275,12 +1275,12 @@ } } } - + private static class RandomCodecProvider extends CodecProvider { private List knownCodecs = new ArrayList(); private Map previousMappings = new HashMap(); private final int perFieldSeed; - + RandomCodecProvider(Random random) { this.perFieldSeed = random.nextInt(); register(new StandardCodec()); @@ -1312,13 +1312,13 @@ } return codec.name; } - + @Override public synchronized String toString() { return "RandomCodecProvider: " + previousMappings.toString(); } } - + @Ignore("just a hack") public final void alwaysIgnoredTestMethod() {} } Index: lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (working copy) @@ -50,7 +50,7 @@ boolean isClose = false; StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { - if ("doFlush".equals(trace[i].getMethodName())) { + if ("flush".equals(trace[i].getMethodName())) { isDoFlush = true; } if ("close".equals(trace[i].getMethodName())) { Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -148,8 +148,8 @@ writer.addDocument(doc); } - + public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException { String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); @@ -262,7 +262,7 @@ if (VERBOSE) { System.out.println("TEST: config1=" + writer.getConfig()); } - + for(int j=0;j<500;j++) { addDocWithIndex(writer, j); } @@ -338,7 +338,7 @@ assertEquals("should be one document", reader2.numDocs(), 1); reader.close(); reader2.close(); - + dir.close(); } @@ -367,14 +367,14 @@ * these docs until writer is closed. */ public void testCommitOnClose() throws IOException { - Directory dir = newDirectory(); + Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); @@ -415,14 +415,14 @@ * and add docs to it. */ public void testCommitOnCloseAbort() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for (int i = 0; i < 14; i++) { addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("first number of hits", 14, hits.length); @@ -450,7 +450,7 @@ hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("saw changes after writer.abort", 14, hits.length); searcher.close(); - + // Now make sure we can re-open the index, add docs, // and all is good: writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) @@ -567,7 +567,7 @@ * and close(). */ public void testCommitOnCloseOptimize() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); // Must disable throwing exc on double-write: this // test uses IW.rollback which easily results in // writing to same file more than once @@ -634,7 +634,7 @@ } public void testIndexNoDocuments() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.commit(); writer.close(); @@ -656,7 +656,7 @@ } public void testManyFields() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for(int j=0;j<100;j++) { Document doc = new Document(); @@ -686,7 +686,7 @@ } public void testSmallRAMBuffer() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)). @@ -782,13 +782,14 @@ writer.deleteDocuments(new Term("field", "aaa" + j)); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); + if (j == 1) lastFlushCount = flushCount; else if (j < 10) { // No new files should be created assertEquals(flushCount, lastFlushCount); } else if (10 == j) { - assertTrue(flushCount > lastFlushCount); + assertTrue("" + j, flushCount > lastFlushCount); lastFlushCount = flushCount; writer.getConfig().setRAMBufferSizeMB(0.000001); writer.getConfig().setMaxBufferedDeleteTerms(1); @@ -825,7 +826,7 @@ } public void testDiverseDocs() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5)); for(int i=0;i<3;i++) { // First, docs where every term is unique (heavy on @@ -872,12 +873,12 @@ } public void testEnablingNorms() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, pre flush for(int j=0;j<10;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 8) { f.setOmitNorms(true); } @@ -898,7 +899,7 @@ // Enable norms for only 1 doc, post flush for(int j=0;j<27;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); if (j != 26) { f.setOmitNorms(true); } @@ -918,7 +919,7 @@ } public void testHighFreqTerm() throws IOException { - MockDirectoryWrapper dir = newDirectory(); + MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01)); // Massive doc that has 128 K a's @@ -968,7 +969,7 @@ return myLockFactory.makeLock(name); } } - + Directory dir = new MyRAMDirectory(new RAMDirectory()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -976,7 +977,7 @@ addDoc(writer); } writer.close(); - Term searchTerm = new Term("content", "aaa"); + Term searchTerm = new Term("content", "aaa"); IndexSearcher searcher = new IndexSearcher(dir, false); ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs; assertEquals("did not get right number of hits", 100, hits.length); @@ -1073,7 +1074,7 @@ infos.read(dir); assertEquals(2, infos.size()); } - } + } dir.close(); } @@ -1089,7 +1090,7 @@ Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); - + Document document = new Document(); document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); iw.addDocument(document); @@ -1343,7 +1344,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1370,12 +1371,12 @@ writer.close(); dir.close(); } - + // LUCENE-325: test expungeDeletes, when 2 singular merges // are required public void testExpungeDeletes() throws IOException { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setRAMBufferSizeMB( IndexWriterConfig.DISABLE_AUTO_FLUSH)); @@ -1537,14 +1538,14 @@ public void doAfterFlush() { afterWasCalled = true; } - + @Override protected void doBeforeFlush() throws IOException { beforeWasCalled = true; } } - + // LUCENE-1222 public void testDoBeforeAfterFlush() throws IOException { Directory dir = newDirectory(); @@ -1572,7 +1573,7 @@ } - + final String[] utf8Data = new String[] { // unpaired low surrogate "ab\udc17cd", "ab\ufffdcd", @@ -1642,7 +1643,7 @@ } UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8); - + String s1 = new String(chars, 0, len); String s2 = new String(utf8.bytes, 0, utf8.length, "UTF-8"); assertEquals("codepoint " + ch, s1, s2); @@ -1699,7 +1700,7 @@ expected[i++] = 0xfffd; expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); hasIllegal = true; - } else + } else expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800); } else { expected[i] = buffer[i] = ' '; @@ -1796,10 +1797,10 @@ final TokenStream tokens = new TokenStream() { final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - + final Iterator terms = Arrays.asList("a","b","c").iterator(); boolean first = true; - + @Override public boolean incrementToken() { if (!terms.hasNext()) return false; @@ -1856,7 +1857,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1912,7 +1913,7 @@ setMergePolicy(newLogMergePolicy(5)) ); writer.commit(); - + for (int i = 0; i < 23; i++) addDoc(writer); @@ -1979,7 +1980,7 @@ byte[] b = new byte[50]; for(int i=0;i<50;i++) b[i] = (byte) (i+77); - + Document doc = new Document(); Field f = new Field("binary", b, 10, 17); byte[] bx = f.getBinaryValue(); @@ -2016,7 +2017,7 @@ // commit(Map) never called for this index assertEquals(0, r.getCommitUserData().size()); r.close(); - + w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); for(int j=0;j<17;j++) addDoc(w); @@ -2024,7 +2025,7 @@ data.put("label", "test1"); w.commit(data); w.close(); - + assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); r = IndexReader.open(dir, true); @@ -2036,7 +2037,7 @@ w.close(); assertEquals("test1", IndexReader.getCommitUserData(dir).get("label")); - + dir.close(); } @@ -2046,7 +2047,7 @@ Directory dir = newDirectory(); MockAnalyzer analyzer = new MockAnalyzer(random); analyzer.setPositionIncrementGap( 100 ); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); Field f = newField("field", "", Field.Store.NO, @@ -2073,7 +2074,7 @@ // LUCENE-1468 -- make sure opening an IndexWriter with // create=true does not remove non-index files - + public void testOtherFiles() throws Throwable { Directory dir = newDirectory(); try { @@ -2132,7 +2133,7 @@ @Override public void run() { // LUCENE-2239: won't work with NIOFS/MMAP - Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); + Directory dir = new MockDirectoryWrapper(random, new RAMDirectory()); IndexWriter w = null; while(!finish) { try { @@ -2141,7 +2142,7 @@ if (w != null) { w.close(); } - IndexWriterConfig conf = newIndexWriterConfig( + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2); w = new IndexWriter(dir, conf); @@ -2208,10 +2209,10 @@ e.printStackTrace(System.out); } } - try { + try { dir.close(); - } catch (IOException e) { - throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); } } } @@ -2226,7 +2227,7 @@ // interrupt arrives while class loader is trying to // init this class (in servicing a first interrupt): assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException); - + // issue 100 interrupts to child thread int i = 0; while(i < 100) { @@ -2260,12 +2261,12 @@ doc.add(f); doc.add(f2); w.addDocument(doc); - + // add 2 docs to test in-memory merging f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false)); f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false)); w.addDocument(doc); - + // force segment flush so we can force a segment merge with doc3 later. w.commit(); @@ -2288,7 +2289,7 @@ assertTrue(ir.document(0).getFieldable("binary").isBinary()); assertTrue(ir.document(1).getFieldable("binary").isBinary()); assertTrue(ir.document(2).getFieldable("binary").isBinary()); - + assertEquals("value", ir.document(0).get("string")); assertEquals("value", ir.document(1).get("string")); assertEquals("value", ir.document(2).get("string")); @@ -2359,7 +2360,7 @@ public void testNoDocsIndex() throws Throwable { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); writer.setInfoStream(new PrintStream(bos)); @@ -2369,7 +2370,7 @@ _TestUtil.checkIndex(dir); dir.close(); } - + // LUCENE-2095: make sure with multiple threads commit // doesn't return until all changes are in fact in the // index @@ -2377,7 +2378,7 @@ final int NUM_THREADS = 5; final double RUN_SEC = 0.5; final Directory dir = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( + final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); _TestUtil.reduceOpenFiles(w.w); w.commit(); @@ -2562,7 +2563,7 @@ Field f = newField("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED); d.add(f); w.addDocument(d); - + IndexReader r = w.getReader().getSequentialSubReaders()[0]; TermsEnum t = r.fields().terms("field").iterator(); int count = 0; @@ -2648,10 +2649,10 @@ // in case a deletion policy which holds onto commits is used. Directory dir = newDirectory(); SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setIndexDeletionPolicy(sdp)); - + // First commit Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2661,7 +2662,7 @@ // Keep that commit sdp.snapshot("id"); - + // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); @@ -2673,25 +2674,13 @@ sdp.release("id"); writer.deleteUnusedFiles(); assertEquals(1, IndexReader.listCommits(dir).size()); - + writer.close(); dir.close(); } - - private static class FlushCountingIndexWriter extends IndexWriter { - int flushCount; - public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException { - super(dir, iwc); - } - @Override - public void doAfterFlush() { - flushCount++; - } - } public void testIndexingThenDeleting() throws Exception { final Random r = random; - Directory dir = newDirectory(); // note this test explicitly disables payloads final Analyzer analyzer = new Analyzer() { @@ -2700,7 +2689,7 @@ return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); } }; - FlushCountingIndexWriter w = new FlushCountingIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(-1).setMaxBufferedDeleteTerms(-1)); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH)); w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); doc.add(newField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO, Field.Index.ANALYZED)); @@ -2714,15 +2703,15 @@ } if (doIndexing) { // Add docs until a flush is triggered - final int startFlushCount = w.flushCount; - while(w.flushCount == startFlushCount) { + final int startFlushCount = w.getFlushCount(); + while(w.getFlushCount() == startFlushCount) { w.addDocument(doc); count++; } } else { // Delete docs until a flush is triggered - final int startFlushCount = w.flushCount; - while(w.flushCount == startFlushCount) { + final int startFlushCount = w.getFlushCount(); + while(w.getFlushCount() == startFlushCount) { w.deleteDocuments(new Term("foo", ""+count)); count++; } @@ -2732,7 +2721,7 @@ w.close(); dir.close(); } - + public void testNoCommits() throws Exception { // Tests that if we don't call commit(), the directory has 0 commits. This has // changed since LUCENE-2386, where before IW would always commit on a fresh @@ -2753,7 +2742,7 @@ public void testEmptyFSDirWithNoLock() throws Exception { // Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF), - // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed + // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed // when listAll() was called in IndexFileDeleter. Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory()); new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close(); @@ -2762,10 +2751,10 @@ public void testEmptyDirRollback() throws Exception { // Tests that if IW is created over an empty Directory, some documents are - // indexed, flushed (but not committed) and then IW rolls back, then no + // indexed, flushed (but not committed) and then IW rolls back, then no // files are left in the Directory. Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy())); String[] files = dir.listAll(); @@ -2789,7 +2778,7 @@ writer.addDocument(doc); // Adding just one document does not call flush yet. assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length); - + doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.addDocument(doc); @@ -2810,17 +2799,17 @@ public void testNoSegmentFile() throws IOException { Directory dir = newDirectory(); dir.setLockFactory(NoLockFactory.getNoLockFactory()); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); - + Document doc = new Document(); doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); w.addDocument(doc); w.addDocument(doc); - IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( + IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2) .setOpenMode(OpenMode.CREATE)); - + w2.close(); // If we don't do that, the test fails on Windows w.rollback(); @@ -2859,7 +2848,7 @@ w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit)); assertEquals(1, w.numDocs()); - + // commit IndexWriter to "third" w.addDocument(doc); commitData.put("tag", "third"); @@ -2914,7 +2903,7 @@ } final int docCount = 200*RANDOM_MULTIPLIER; final int fieldCount = _TestUtil.nextInt(rand, 1, 5); - + final List fieldIDs = new ArrayList(); Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -2924,7 +2913,7 @@ } final Map docs = new HashMap(); - + if (VERBOSE) { System.out.println("TEST: build index docCount=" + docCount); } @@ -3111,7 +3100,7 @@ Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, dir, new StringSplitAnalyzer()); - char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8]; + char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8]; Arrays.fill(chars, 'x'); Document doc = new Document(); final String bigTerm = new String(chars); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (working copy) @@ -24,7 +24,7 @@ import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.index.DocumentsWriter.IndexingChain; +import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarityProvider; import org.apache.lucene.search.IndexSearcher; @@ -36,15 +36,15 @@ private static final class MySimilarityProvider extends DefaultSimilarityProvider { // Does not implement anything - used only for type checking on IndexWriterConfig. } - + private static final class MyIndexingChain extends IndexingChain { // Does not implement anything - used only for type checking on IndexWriterConfig. @Override - DocConsumer getChain(DocumentsWriter documentsWriter) { + DocConsumer getChain(DocumentsWriterPerThread documentsWriter) { return null; } - + } @Test @@ -64,12 +64,16 @@ assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.getRAMBufferSizeMB(), 0.0); assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.getMaxBufferedDocs()); assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling()); - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); assertNull(conf.getMergedSegmentWarmer()); - assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor()); assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); - + assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass()); + assertNull(conf.getFlushPolicy()); + assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB()); + + + // Sanity check - validate that all getters are covered. Set getters = new HashSet(); getters.add("getAnalyzer"); @@ -91,7 +95,11 @@ getters.add("getMergePolicy"); getters.add("getMaxThreadStates"); getters.add("getReaderPooling"); + getters.add("getIndexerThreadPool"); getters.add("getReaderTermsIndexDivisor"); + getters.add("getFlushPolicy"); + getters.add("getRAMPerThreadHardLimitMB"); + for (Method m : IndexWriterConfig.class.getDeclaredMethods()) { if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get")) { assertTrue("method " + m.getName() + " is not tested for defaults", getters.contains(m.getName())); @@ -107,12 +115,12 @@ if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("set") && !Modifier.isStatic(m.getModifiers())) { - assertEquals("method " + m.getName() + " does not return IndexWriterConfig", + assertEquals("method " + m.getName() + " does not return IndexWriterConfig", IndexWriterConfig.class, m.getReturnType()); } } } - + @Test public void testConstants() throws Exception { // Tests that the values of the constants does not change @@ -123,10 +131,9 @@ assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS); assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0); assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING); - assertEquals(8, IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); assertEquals(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR); } - + @Test public void testToString() throws Exception { String str = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).toString(); @@ -143,15 +150,15 @@ assertTrue(f.getName() + " not found in toString", str.indexOf(f.getName()) != -1); } } - + @Test public void testClone() throws Exception { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); IndexWriterConfig clone = (IndexWriterConfig) conf.clone(); - + // Clone is shallow since not all parameters are cloneable. assertTrue(conf.getIndexDeletionPolicy() == clone.getIndexDeletionPolicy()); - + conf.setMergeScheduler(new SerialMergeScheduler()); assertEquals(ConcurrentMergeScheduler.class, clone.getMergeScheduler().getClass()); } @@ -159,14 +166,14 @@ @Test public void testInvalidValues() throws Exception { IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); - + // Test IndexDeletionPolicy assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(null)); assertEquals(SnapshotDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); conf.setIndexDeletionPolicy(null); assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass()); - + // Test MergeScheduler assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass()); conf.setMergeScheduler(new SerialMergeScheduler()); @@ -183,12 +190,12 @@ assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider()); // Test IndexingChain - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); conf.setIndexingChain(new MyIndexingChain()); assertEquals(MyIndexingChain.class, conf.getIndexingChain().getClass()); conf.setIndexingChain(null); - assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain()); - + assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain()); + try { conf.setMaxBufferedDeleteTerms(0); fail("should not have succeeded to set maxBufferedDeleteTerms to 0"); @@ -239,12 +246,20 @@ // this is expected } - assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); - conf.setMaxThreadStates(5); - assertEquals(5, conf.getMaxThreadStates()); - conf.setMaxThreadStates(0); - assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates()); + try { + conf.setRAMPerThreadHardLimitMB(2048); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048"); + } catch (IllegalArgumentException e) { + // this is expected + } + try { + conf.setRAMPerThreadHardLimitMB(0); + fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0"); + } catch (IllegalArgumentException e) { + // this is expected + } + // Test MergePolicy assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass()); conf.setMergePolicy(new LogDocMergePolicy()); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) @@ -33,7 +33,7 @@ import org.apache.lucene.util._TestUtil; public class TestIndexWriterDelete extends LuceneTestCase { - + // test the simple case public void testSimpleCase() throws IOException { String[] keywords = { "1", "2" }; @@ -124,7 +124,7 @@ writer.close(); dir.close(); } - + // test when delete terms only apply to ram segments public void testRAMDeletes() throws IOException { for(int t=0;t<2;t++) { @@ -220,7 +220,7 @@ IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + id = 0; modifier.deleteDocuments(new Term("id", String.valueOf(++id))); modifier.deleteDocuments(new Term("id", String.valueOf(++id))); @@ -297,33 +297,33 @@ IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); - + int id = 0; int value = 100; - + for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } modifier.commit(); - + addDoc(modifier, ++id, value); IndexReader reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + // Delete all - modifier.deleteAll(); + modifier.deleteAll(); // Roll it back modifier.rollback(); modifier.close(); - + // Validate that the docs are still there reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + dir.close(); } @@ -334,10 +334,10 @@ IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); - + int id = 0; int value = 100; - + for (int i = 0; i < 7; i++) { addDoc(modifier, ++id, value); } @@ -349,24 +349,24 @@ addDoc(modifier, ++id, value); addDoc(modifier, ++id, value); - + // Delete all - modifier.deleteAll(); + modifier.deleteAll(); reader = modifier.getReader(); assertEquals(0, reader.numDocs()); reader.close(); - + // Roll it back modifier.rollback(); modifier.close(); - + // Validate that the docs are still there reader = IndexReader.open(dir, true); assertEquals(7, reader.numDocs()); reader.close(); - + dir.close(); } @@ -538,10 +538,13 @@ } // prevent throwing a random exception here!! final double randomIOExceptionRate = dir.getRandomIOExceptionRate(); + final long maxSizeInBytes = dir.getMaxSizeInBytes(); dir.setRandomIOExceptionRate(0.0); + dir.setMaxSizeInBytes(0); if (!success) { // Must force the close else the writer can have // open files which cause exc in MockRAMDir.close + modifier.rollback(); } @@ -552,6 +555,7 @@ TestIndexWriter.assertNoUnreferencedFiles(dir, "after writer.close"); } dir.setRandomIOExceptionRate(randomIOExceptionRate); + dir.setMaxSizeInBytes(maxSizeInBytes); // Finally, verify index is not corrupt, and, if // we succeeded, we see all docs changed, and if @@ -622,7 +626,7 @@ // This test tests that buffered deletes are cleared when // an Exception is hit during flush. public void testErrorAfterApplyDeletes() throws IOException { - + MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() { boolean sawMaybe = false; boolean failed = false; @@ -786,7 +790,7 @@ // a segment is written are cleaned up if there's an i/o error public void testErrorInDocsWriterAdd() throws IOException { - + MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() { boolean failed = false; @Override Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy) @@ -223,8 +223,9 @@ threads[i].join(); for(int i=0;i dataset = asSet(data); - + private static String MAGIC_FIELD = "f"+(NUM_FIELDS/3); - + private static FieldSelector SELECTOR = new FieldSelector() { public FieldSelectorResult accept(String f) { if (f.equals(MAGIC_FIELD)) { @@ -58,22 +58,21 @@ return FieldSelectorResult.LAZY_LOAD; } }; - - private Directory makeIndex() throws Exception { + + private Directory makeIndex() throws Exception { Directory dir = newDirectory(); try { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy(); lmp.setUseCompoundFile(false); - for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++ ) { - doc.add(newField("f"+f, - data[f % data.length] - + '#' + data[random.nextInt(data.length)], - Field.Store.YES, + doc.add(newField("f"+f, + data[f % data.length] + + '#' + data[random.nextInt(data.length)], + Field.Store.YES, Field.Index.ANALYZED)); } writer.addDocument(doc); @@ -84,14 +83,14 @@ } return dir; } - + public void doTest(int[] docs) throws Exception { Directory dir = makeIndex(); IndexReader reader = IndexReader.open(dir, true); for (int i = 0; i < docs.length; i++) { Document d = reader.document(docs[i], SELECTOR); d.get(MAGIC_FIELD); - + List fields = d.getFields(); for (Iterator fi = fields.iterator(); fi.hasNext(); ) { Fieldable f=null; @@ -101,7 +100,7 @@ String fval = f.stringValue(); assertNotNull(docs[i]+" FIELD: "+fname, fval); String[] vals = fval.split("#"); - if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { + if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { fail("FIELD:"+fname+",VAL:"+fval); } } catch (Exception e) { @@ -116,7 +115,7 @@ public void testLazyWorks() throws Exception { doTest(new int[] { 399 }); } - + public void testLazyAlsoWorks() throws Exception { doTest(new int[] { 399, 150 }); } Index: lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; +import org.apache.lucene.document.Field.Index; import org.apache.lucene.store.*; import org.apache.lucene.util.*; import org.junit.Test; @@ -72,4 +73,72 @@ dir.close(); } + + + public void testUpdateSameDoc() throws Exception { + final Directory dir = newDirectory(); + + final LineFileDocs docs = new LineFileDocs(random); + for (int r = 0; r < 3; r++) { + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); + final int SIZE = 200 * RANDOM_MULTIPLIER; + final int numUpdates = (int) (SIZE * (2 + random.nextDouble())); + int numThreads = 3 + random.nextInt(12); + IndexingThread[] threads = new IndexingThread[numThreads]; + for (int i = 0; i < numThreads; i++) { + threads[i] = new IndexingThread(docs, w, numUpdates); + threads[i].start(); + } + + for (int i = 0; i < numThreads; i++) { + threads[i].join(); + } + + w.close(); + } + IndexReader open = IndexReader.open(dir); + assertEquals(1, open.numDocs()); + open.close(); + docs.close(); + dir.close(); + } + + static class IndexingThread extends Thread { + final LineFileDocs docs; + final IndexWriter writer; + final int num; + + public IndexingThread(LineFileDocs docs, IndexWriter writer, int num) { + super(); + this.docs = docs; + this.writer = writer; + this.num = num; + } + + public void run() { + try { + IndexReader open = null; + for (int i = 0; i < num; i++) { + Document doc = new Document();// docs.nextDoc(); + doc.add(newField("id", "test", Index.NOT_ANALYZED)); + writer.updateDocument(new Term("id", "test"), doc); + if (random.nextInt(10) == 0) { + if (open == null) + open = IndexReader.open(writer, true); + IndexReader reader = open.reopen(); + if (reader != open) { + open.close(); + open = reader; + } + assertEquals("iter: " + i + " numDocs: "+ open.numDocs() + " del: " + open.numDeletedDocs() + " max: " + open.maxDoc(), 1, open.numDocs()); + } + } + open.close(); + } catch (Exception e) { + fail(e.getMessage()); + } + + } + } } Index: lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy) @@ -53,7 +53,7 @@ reader1 = SegmentReader.get(true, info1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); reader2 = SegmentReader.get(true, info2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); } - + @Override public void tearDown() throws Exception { reader1.close(); @@ -71,8 +71,8 @@ assertTrue(reader1 != null); assertTrue(reader2 != null); } - - public void testMerge() throws IOException { + + public void testMerge() throws IOException { SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault(), null, new FieldInfos()); merger.add(reader1); merger.add(reader2); @@ -83,7 +83,6 @@ SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, fieldInfos.hasProx(), merger.getSegmentCodecs(), fieldInfos.hasVectors(), fieldInfos), BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); - assertTrue(mergedReader != null); assertTrue(mergedReader.numDocs() == 2); Document newDoc1 = mergedReader.document(0); @@ -93,19 +92,19 @@ Document newDoc2 = mergedReader.document(1); assertTrue(newDoc2 != null); assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size()); - + DocsEnum termDocs = MultiFields.getTermDocsEnum(mergedReader, MultiFields.getDeletedDocs(mergedReader), DocHelper.TEXT_FIELD_2_KEY, new BytesRef("field")); assertTrue(termDocs != null); assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); - + Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); assertTrue(stored != null); //System.out.println("stored size: " + stored.size()); assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3); - + TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); assertTrue(vector != null); BytesRef [] terms = vector.getTerms(); @@ -116,7 +115,7 @@ assertTrue(freqs != null); //System.out.println("Freqs size: " + freqs.length); assertTrue(vector instanceof TermPositionVector == true); - + for (int i = 0; i < terms.length; i++) { String term = terms[i].utf8ToString(); int freq = freqs[i]; @@ -127,5 +126,5 @@ TestSegmentReader.checkNorms(mergedReader); mergedReader.close(); - } + } } Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 1097796) +++ lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) @@ -201,7 +201,7 @@ Map docs = new HashMap(); IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE) - .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates) + .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates)) .setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy())); w.setInfoStream(VERBOSE ? System.out : null); LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy(); Property changes on: solr ___________________________________________________________________ Modified: svn:mergeinfo Merged /lucene/dev/branches/realtime_search/solr:r953476-1097796