Index: lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -18,10 +18,13 @@ */ import java.io.IOException; -import java.util.List; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; -import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; @@ -174,7 +177,7 @@ int delIDX = deletes.size()-1; List allDeleted = null; - List advanced = null; + Set advanced = null; while (infosIDX >= 0) { //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); @@ -252,11 +255,14 @@ delIDX--; infosIDX--; if (advanced == null) { - advanced = new ArrayList(); + advanced = new HashSet(); } advanced.add(info); - } else if (packet != null && packet.anyDeletes()){ + } else if (packet != null && !packet.anyDeletes() && packet.anyUpdates()) { + // ignore updates only packets + delIDX--; + } else { //System.out.println(" gt"); if (coalescedDeletes != null) { @@ -289,40 +295,39 @@ infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); } if (advanced == null) { - advanced = new ArrayList(); + advanced = new HashSet(); } advanced.add(info); } infosIDX--; - } else if (packet != null) { - delIDX--; - } else { - infosIDX--; } } + // go through deletes forward and apply updates for (SegmentInfoPerCommit updateInfo : infos2) { - //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); final long updateSegGen = updateInfo.getBufferedDeletesGen(); for (FrozenBufferedDeletes updatePacket : deletes) { if (updatePacket.anyUpdates() && updateSegGen <= updatePacket.delGen()) { assert readerPool.infoIsLive(updateInfo); + // we need to reopen the reader every time, to include previous + // updates when applying new ones final ReadersAndLiveDocs rld = readerPool.get(updateInfo, true); final SegmentReader reader = rld.getReader(IOContext.READ); + final boolean exactGen = updateSegGen == updatePacket.delGen(); try { - anyNewDeletes |= applyTermUpdates(updatePacket.updateTerms, - updatePacket.updateArrays, rld, reader); + anyNewDeletes |= applyTermUpdates(updatePacket.allUpdates, rld, + reader, exactGen); } finally { rld.release(reader); readerPool.release(rld); } + if (advanced == null) { + advanced = new HashSet(); + } + advanced.add(updateInfo); } - if (advanced == null) { - advanced = new ArrayList(); - } - advanced.add(updateInfo); } } @@ -463,67 +468,27 @@ return delCount; } - private synchronized boolean applyTermUpdates(PrefixCodedTerms updateTerms, - FieldsUpdate[][] updateArrays, ReadersAndLiveDocs rld, - SegmentReader reader) throws IOException { + private synchronized boolean applyTermUpdates( + SortedSet packetUpdates, ReadersAndLiveDocs rld, + SegmentReader reader, boolean checkDocId) throws IOException { Fields fields = reader.fields(); if (fields == null) { // This reader has no postings return false; } - TermsEnum termsEnum = null; - - String currentField = null; - DocsEnum docs = null; - assert checkDeleteTerm(null); UpdatedSegmentData updatedSegmentData = new UpdatedSegmentData(); - int termIndex = -1; - // System.out.println(Thread.currentThread().getName() + - // " del terms reader=" + reader); - for (Term term : updateTerms) { - termIndex++; - // Since we visit terms sorted, we gain performance - // by re-using the same TermsEnum and seeking only - // forwards - if (!term.field().equals(currentField)) { - assert currentField == null || currentField.compareTo(term.field()) < 0; - currentField = term.field(); - Terms terms = fields.terms(currentField); - if (terms != null) { - termsEnum = terms.iterator(null); - } else { - termsEnum = null; + for (FieldsUpdate update : packetUpdates) { + DocsEnum docsEnum = reader.termDocsEnum(update.term); + if (docsEnum != null) { + int docID; + while ((docID = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + updatedSegmentData.addUpdate(docID, update, checkDocId); } } - - if (termsEnum == null) { - continue; - } - assert checkDeleteTerm(term); - - // System.out.println(" term=" + term); - - if (termsEnum.seekExact(term.bytes(), false)) { - // we don't need term frequencies for this - DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, 0); - // System.out.println("BDS: got docsEnum=" + docsEnum); - - if (docsEnum != null) { - while (true) { - final int docID = docsEnum.nextDoc(); - // System.out.println(Thread.currentThread().getName() + - // " del term=" + term + " doc=" + docID); - if (docID == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - updatedSegmentData.addUpdates(docID, updateArrays[termIndex]); - } - } - } } if (updatedSegmentData.hasUpdates()) { Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -417,8 +417,10 @@ final DocumentsWriterPerThread dwpt = perThread.dwpt; try { - FieldsUpdate fieldsUpdate = new FieldsUpdate(operation, fields, - analyzer, numDocsInRAM.get()); + // create new fields update, which should effect previous docs in the + // current segment + FieldsUpdate fieldsUpdate = new FieldsUpdate(term, operation, fields, + analyzer, numDocsInRAM.get() - 1); // invert the given fields and store in RAMDirectory dwpt.invertFieldsUpdate(fieldsUpdate, globalFieldNumberMap); dwpt.updateFields(term, fieldsUpdate); @@ -427,7 +429,7 @@ flushControl.doOnAbort(perThread); } } - final boolean isUpdate = term != null; + final boolean isUpdate = term != null; flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); } finally { perThread.unlock(); @@ -539,7 +541,7 @@ perThread.unlock(); } - return null; + return updates; } private boolean doFlush(DocumentsWriterPerThread flushingDWPT) Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (working copy) @@ -349,7 +349,7 @@ if (fieldsUpdate == null) { bufferedDeletes.addTerm(item, docIDUpto); } else { - bufferedUpdates.addTerm(item, new FieldsUpdate(fieldsUpdate, docIDUpto)); + bufferedUpdates.addTerm(item, new FieldsUpdate(fieldsUpdate)); } } Index: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (working copy) @@ -471,7 +471,6 @@ fieldsUpdate.directory = updateDir; fieldsUpdate.segmentInfo = updateSegment; fieldsUpdate.fields = null; - //fieldsUpdate.docIDUpto = success = true; } finally { @@ -710,8 +709,7 @@ } void sealUpdatedSegment(SegmentInfo info, Directory directory, - long updateGen, Set generationReplacementFilenames) - throws IOException { + long updateGen, Set excludedFiles) throws IOException { assert updateGen > 0; boolean success = false; try { @@ -721,7 +719,7 @@ // Now build compound file Collection oldFiles = IndexWriter.createCompoundFile( infoStream, directory, MergeState.CheckAbort.NONE, info, context, - -1L, generationReplacementFilenames); + -1L, excludedFiles); writer.deleteNewFiles(oldFiles); } Index: lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/FieldsUpdate.java (working copy) @@ -41,7 +41,8 @@ */ REPLACE_FIELDS } - + + final Term term; final Operation operation; final Set replacedFields; final Analyzer analyzer; @@ -54,6 +55,8 @@ /** * An update of fields which is not assigned to a specific live segment. * + * @param term + * The term to apply this update on * @param operation * The type of update operation. * @param fields @@ -63,8 +66,9 @@ * @param docIDUpto * Document ID of the last document added before this field update */ - public FieldsUpdate(Operation operation, IndexDocument fields, + public FieldsUpdate(Term term, Operation operation, IndexDocument fields, Analyzer analyzer, int docIDUpto) { + this.term = term; this.fields = fields; this.operation = operation; if (operation == Operation.ADD_FIELDS) { @@ -87,15 +91,13 @@ * * @param other * A non-specific update with the update data. - * @param docIDUpto - * The doc ID in the live segment up to which the update should be - * applied. */ - public FieldsUpdate(FieldsUpdate other, int docIDUpto) { + public FieldsUpdate(FieldsUpdate other) { + this.term = other.term; this.operation = other.operation; this.replacedFields = other.replacedFields; this.analyzer = other.analyzer; - this.docIDUpto = docIDUpto; + this.docIDUpto = other.docIDUpto; this.directory = other.directory; this.segmentInfo = other.segmentInfo; } Index: lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (working copy) @@ -520,7 +520,7 @@ if (state.liveUpdates == null) { state.liveUpdates = new UpdatedSegmentData(); } - state.liveUpdates.addUpdate(docID, nextUpdate); + state.liveUpdates.addUpdate(docID, nextUpdate, true); } totTF += termFreq; Index: lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (working copy) @@ -21,13 +21,13 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.SortedSet; +import java.util.TreeSet; +import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit; import org.apache.lucene.search.Query; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit; /** Holds buffered deletes by term or query, once pushed. * Pushed deletes are write-once, so we shift to more @@ -55,10 +55,8 @@ // a segment private deletes. in that case is should // only have Queries - // Updated terms, in sorted order: - final PrefixCodedTerms updateTerms; - // Updated fields per term - final FieldsUpdate[][] updateArrays; + // An sorted set of updates + final SortedSet allUpdates; public FrozenBufferedDeletes(BufferedDeletes deletes, BufferedUpdates updates, boolean isSegmentPrivate) { this.isSegmentPrivate = isSegmentPrivate; @@ -95,24 +93,14 @@ } // freeze updates - if (updates != null && !updates.terms.isEmpty()) { - PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder(); - updateArrays = new FieldsUpdate[updates.terms.size()][]; - localBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF * (1 + updateArrays.length); - int i = 0; - for (Entry> entry : updates.terms.entrySet()) { - builder.add(entry.getKey()); - SortedSet updateList = entry.getValue(); - // TODO : calculate bytes of updates? - updateArrays[i] = updateList.toArray(new FieldsUpdate[updateList.size()]); - localBytesUsed += RamUsageEstimator.NUM_BYTES_OBJECT_REF * (1 + updateArrays[i].length); - i++; + if (updates == null || updates.terms.isEmpty()) { + allUpdates = null; + } else { + allUpdates = new TreeSet<>(); + for (SortedSet list : updates.terms.values()) { + allUpdates.addAll(list); } - updateTerms = builder.finish(); - localBytesUsed += (int) updateTerms.getSizeInBytes(); - } else { - updateTerms = null; - updateArrays = null; + localBytesUsed += 100; } bytesUsed = localBytesUsed; @@ -177,8 +165,8 @@ if (queries != null && queries.length != 0) { s += " " + queries.length + " deleted queries"; } - if (updateArrays != null && updateArrays.length > 0) { - s += " " + updateArrays.length + " updates"; + if (allUpdates != null && !allUpdates.isEmpty()) { + s += " " + allUpdates.size() + " updates"; } if (bytesUsed != 0) { s += " bytesUsed=" + bytesUsed; @@ -192,6 +180,6 @@ } boolean anyUpdates() { - return updateTerms != null; + return allUpdates != null; } } Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -2406,7 +2406,7 @@ UpdatedSegmentData updates, IOContext context) throws IOException { // add updates, single update per document in each round, until all updates // were added - while (updates != null) { + while (updates != null && !updates.isEmpty()) { updates = docWriter.writeUpdatedSegment(info, updates, config.getTermIndexInterval(), globalFieldNumberMap, deleter); info.advanceUpdateGen(); Index: lucene/core/src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -19,13 +19,13 @@ import java.io.IOException; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; -import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.search.FieldCache; -import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.Bits; @@ -224,35 +224,30 @@ private synchronized void generateReplacementsMap() throws IOException { if (replacementsMap == null) { + Set visitedFields = new HashSet(); replacementsMap = new HashMap(); - boolean found = addReplacements(core.fields, core.cfsReader); + for (String field : core.fields) { + addReplacements(field); + visitedFields.add(field); + } for (int i = 0; i < updates.length; i++) { - if (addReplacements(updates[i].fields, updates[i].cfsReader)) { - found = true; + for (String field : updates[i].fields) { + if (!visitedFields.contains(field)) { + addReplacements(field); + visitedFields.add(field); + } } } - if (!found) { - // no replacements - replacementsMap.clear(); - } } } - private boolean addReplacements(FieldsProducer fields, - CompoundFileDirectory cfsReader) throws IOException { - boolean found = false; - for (String field : fields) { - if (!replacementsMap.containsKey(field)) { - final FieldGenerationReplacements replacements = si.info.getCodec() - .generationReplacementsFormat() - .readGenerationReplacements(field, si, context); - replacementsMap.put(field, replacements); - if (replacements != null) { - found = true; - } - } + private void addReplacements(String field) throws IOException { + final FieldGenerationReplacements replacements = si.info.getCodec() + .generationReplacementsFormat() + .readGenerationReplacements(field, si, context); + if (replacements != null) { + replacementsMap.put(field, replacements); } - return found; } @Override Index: lucene/core/src/java/org/apache/lucene/index/StackedDocsEnum.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedDocsEnum.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/StackedDocsEnum.java (working copy) @@ -1,6 +1,8 @@ package org.apache.lucene.index; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; import java.util.Map; @@ -35,12 +37,12 @@ /** * A queue containing non-active enums, ordered by doc ID. */ - final private PriorityQueue queueByDocId; + final private DocsEnumDocIdPriorityQueue queueByDocId; /** * A queue for ordering active enums by decreasing enum index. */ - final private PriorityQueue queueByIndex; + final private DocsEnumIndexPriorityQueue queueByIndex; /** * Field generation replacements for the enclosing field. @@ -68,6 +70,7 @@ private int positionsLeft; private static final FieldGenerationReplacements NO_REPLACEMENTS = new FieldGenerationReplacements(); + private static final int STACKED_SEGMENT_POSITION_INCREMENT = 50000; public StackedDocsEnum(Map activeMap, FieldGenerationReplacements replacements) { @@ -160,7 +163,13 @@ @Override public int nextPosition() throws IOException { if (positionsEnum == null) { - activeIterator = active.iterator(); + if (active.size() == 1) { + activeIterator = active.iterator(); + } else { + ArrayList tempList = new ArrayList<>(active); + Collections.sort(tempList); + activeIterator = tempList.iterator(); + } positionsLeft = 0; } @@ -170,7 +179,9 @@ } positionsLeft--; - return ((DocsAndPositionsEnum) positionsEnum.docsEnum).nextPosition(); + int pos = positionsEnum.index * STACKED_SEGMENT_POSITION_INCREMENT + + ((DocsAndPositionsEnum) positionsEnum.docsEnum).nextPosition(); + return pos; } @Override @@ -188,7 +199,16 @@ return ((DocsAndPositionsEnum) positionsEnum.docsEnum).getPayload(); } - protected class DocsEnumWithIndex { + @Override + public long cost() { + long cost = 0; + for (DocsEnumWithIndex docsEnum : active) { + cost += docsEnum.docsEnum.cost(); + } + return cost; + } + + protected class DocsEnumWithIndex implements Comparable { DocsEnum docsEnum; int index; @@ -197,6 +217,11 @@ this.docsEnum = docsEnum; this.index = index; } + + @Override + public int compareTo(DocsEnumWithIndex other) { + return this.index - other.index; + } } @@ -224,7 +249,7 @@ @Override protected boolean lessThan(DocsEnumWithIndex a, DocsEnumWithIndex b) { // bigger index should be first - return a.index < b.index; + return a.index > b.index; } } Index: lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/StackedTermsEnum.java (working copy) @@ -3,6 +3,7 @@ import java.io.IOException; import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; import java.util.Map; import java.util.TreeSet; @@ -33,14 +34,14 @@ * with no occurrences might return. Given a certain term, all the enumerations * take into account fields replacements. */ -public class StackedTermsEnum extends TermsEnum { +class StackedTermsEnum extends TermsEnum { private final Terms[] subTerms; private final FieldGenerationReplacements replacements; private Comparator comparator; private TreeSet activeEnums; - public StackedTermsEnum(Terms[] subTerms, + protected StackedTermsEnum(Terms[] subTerms, FieldGenerationReplacements replacements, Comparator comparator) throws IOException { this.subTerms = subTerms; @@ -117,7 +118,7 @@ throws IOException { // reset active enums if (activeEnums == null) { - activeEnums = new TreeSet(); + activeEnums = new TreeSet(new InnerTermsEnumFullComparator()); } else { activeEnums.clear(); } @@ -181,13 +182,23 @@ throws IOException { // build map of active enums with indexes Map activeMap = new HashMap(); - for (InnerTermsEnum inner : activeEnums.headSet(activeEnums.first(), true)) { - final DocsEnum docs = inner.termsEnum.docs(liveDocs, reuse, flags); - if (docs != null) { - activeMap.put(docs, inner.getIndex()); + + // iterate over active enums, fetch DocsEnum of all those pointing to the + // next term + InnerTermsEnum first = activeEnums.first(); + Iterator iterator = activeEnums.iterator(); + while (iterator != null && iterator.hasNext()) { + InnerTermsEnum inner = iterator.next(); + if (comparator.compare(first.term, inner.term) == 0) { + final DocsEnum docs = inner.termsEnum.docs(liveDocs, reuse, flags); + if (docs != null) { + activeMap.put(docs, inner.getIndex()); + } + } else { + iterator = null; } } - + if (activeMap.isEmpty()) { return null; } @@ -204,11 +215,21 @@ DocsAndPositionsEnum reuse, int flags) throws IOException { // build map of active enums with indexes Map activeMap = new HashMap(); - for (InnerTermsEnum inner : activeEnums.headSet(activeEnums.first(), true)) { - final DocsAndPositionsEnum docsAndPositions = inner.termsEnum - .docsAndPositions(liveDocs, reuse, flags); - if (docsAndPositions != null) { - activeMap.put(docsAndPositions, inner.getIndex()); + + // iterate over active enums, fetch DocsAndPositionsEnum of all those + // pointing to the next term + InnerTermsEnum first = activeEnums.first(); + Iterator iterator = activeEnums.iterator(); + while (iterator != null && iterator.hasNext()) { + InnerTermsEnum inner = iterator.next(); + if (comparator.compare(first.term, inner.term) == 0) { + final DocsAndPositionsEnum docsAndPositions = inner.termsEnum + .docsAndPositions(liveDocs, reuse, flags); + if (docsAndPositions != null) { + activeMap.put(docsAndPositions, inner.getIndex()); + } + } else { + iterator = null; } } Index: lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (revision 1477914) +++ lucene/core/src/java/org/apache/lucene/index/UpdatedSegmentData.java (working copy) @@ -58,36 +58,20 @@ updatesMap = new TreeMap>(); } - void addUpdate(int docID, FieldsUpdate fieldsUpdate) { - if (docID >= fieldsUpdate.docIDUpto) { + void addUpdate(int docId, FieldsUpdate fieldsUpdate, boolean checkDocId) { + if (checkDocId && docId > fieldsUpdate.docIDUpto) { return; } - PriorityQueue prevUpdates = updatesMap.get(docID); + PriorityQueue prevUpdates = updatesMap.get(docId); if (prevUpdates == null) { prevUpdates = new PriorityQueue(); - updatesMap.put(docID, prevUpdates); + updatesMap.put(docId, prevUpdates); } else { System.out.println(); } prevUpdates.add(fieldsUpdate); } - void addUpdates(int docID, FieldsUpdate[] updatesArray) { - PriorityQueue prevUpdates = updatesMap.get(docID); - if (prevUpdates == null) { - prevUpdates = new PriorityQueue(); - } - for (int i = 0; i < updatesArray.length; i++) { - FieldsUpdate fieldsUpdate = updatesArray[i]; - if (docID < fieldsUpdate.docIDUpto) { - prevUpdates.add(fieldsUpdate); - } - } - if (!prevUpdates.isEmpty()) { - updatesMap.put(docID, prevUpdates); - } - } - boolean hasUpdates() { return !updatesMap.isEmpty(); } @@ -173,6 +157,10 @@ return toReturn; } + boolean isEmpty() { + return updatesMap.isEmpty(); + } + private class UpdateAtomicReader extends AtomicReader { final private SegmentCoreReaders core; Index: lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (revision 1477914) +++ lucene/core/src/test/org/apache/lucene/index/TestFieldReplacements.java (working copy) @@ -27,6 +27,7 @@ import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; @@ -44,7 +45,7 @@ public class TestFieldReplacements extends LuceneTestCase { private Directory dir; - + private static String[] fieldNames = null; private static String[][] fieldTokens = null; private static String loremIpsum = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, " @@ -68,7 +69,6 @@ public void setUp() throws Exception { super.setUp(); dir = newDirectory(); - // init fields data structures int numFields = 4 + random().nextInt(4); @@ -85,7 +85,7 @@ fieldTokens[i] = tokens.toArray(new String[tokens.size()]); } } - + @Override public void tearDown() throws Exception { dir.close(); @@ -94,7 +94,7 @@ public void testEmptyIndex() throws IOException { init(random()); - + // test performing fields addition and replacement on an empty index IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random()))); @@ -131,14 +131,21 @@ fieldTokens[i] = tokens.toArray(new String[tokens.size()]); } } - - private static void addDocuments(Directory directory, Random localRandom, - int maxDocs) throws IOException { + + private static void addDocuments(Directory directory, Random localRandom, + int maxDocs, boolean randomConfig) throws IOException { init(localRandom); HashSet usedTerms = new HashSet(); - IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random())); + final IndexWriterConfig config; + if (randomConfig) { + config = newIndexWriterConfig(TEST_VERSION_CURRENT, + new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + } else { + config = new IndexWriterConfig(TEST_VERSION_CURRENT, + new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + } + System.out.println(config.getMergePolicy()); config.setCodec(new SimpleTextCodec()); IndexWriter writer = new IndexWriter(directory, config); @@ -226,7 +233,8 @@ return fields; } - public static Term getOperationTerm(HashSet usedTerms, Random loaclRandom) { + public static Term getOperationTerm(HashSet usedTerms, + Random loaclRandom) { Term term = null; boolean used = loaclRandom.nextInt(5) < 4; if (used && !usedTerms.isEmpty()) { @@ -268,13 +276,13 @@ } public void testRandomIndexGeneration() throws IOException { - addDocuments(dir, random(), Integer.MAX_VALUE); + addDocuments(dir, random(), Integer.MAX_VALUE, true); DirectoryReader directoryReader = DirectoryReader.open(dir); directoryReader.close(); } public void testAddIndexes() throws IOException { - addDocuments(dir, random(), Integer.MAX_VALUE); + addDocuments(dir, random(), Integer.MAX_VALUE, true); RAMDirectory addedDir = new RAMDirectory(); IndexWriter addedIndexWriter = new IndexWriter(addedDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); @@ -293,167 +301,300 @@ assertEquals("Difference in addIndexes ", updatesIndexData, addedIndexData); } - public void testIndexEquality() throws IOException { // create index through updates - addDocuments(dir, new Random(3), Integer.MAX_VALUE); - + addDocuments(dir, new Random(3), Integer.MAX_VALUE, true); + DirectoryReader updatesReader = DirectoryReader.open(dir); IndexData updatesIndexData = new IndexData(updatesReader); System.out.println("Updates index data"); - System.out.println(updatesIndexData.toString(true)); + System.out.println(updatesIndexData.toString(false)); System.out.println(); updatesReader.close(); - + // create the same index directly RAMDirectory directDir = new RAMDirectory(); - IndexWriter directWriter = new IndexWriter(directDir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); - + IndexWriter directWriter = new IndexWriter(directDir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + Document doc = new Document(); - doc.add(new StoredField("f0","elit, magna volutpat. tation ea dolor consequat, facilisis odio te soluta doming facer qui me consuetudium littera per nunc ")); - doc.add(new TextField("f4","consectetuer tincidunt erat nostrud hendrerit dignissim claritatem me etiam quam claram, ", Store.NO)); - doc.add(new TextField("f3","nibh iriure qui liber claritatem. claram, seacula videntur sollemnes ", Store.NO)); + doc.add(new StoredField( + "f0", + "elit, magna volutpat. tation ea dolor consequat, facilisis odio te soluta doming facer qui me consuetudium littera per nunc ")); + doc.add(new TextField( + "f4", + "consectetuer tincidunt erat nostrud hendrerit dignissim claritatem me etiam quam claram, ", + Store.NO)); + doc.add(new TextField( + "f3", + "nibh iriure qui liber claritatem. claram, seacula videntur sollemnes ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f4","diam eum nulla nulla claritatem. mutationem claram, ", Store.NO)); - doc.add(new TextField("f4","amet, erat eum delenit iis claritatem. claram, et fiant ", Store.NO)); - doc.add(new StoredField("f0","dolore quis duis iriure illum accumsan blandit tempor nihil facer assum. qui lectores dynamicus, claram, quinta qui sollemnes ")); - doc.add(new StoredField("f1","elit, dolore aliquip dolore et facilisi. nobis placerat demonstraverunt processus qui littera eodem clari, ")); - doc.add(new TextField("f1","nonummy ad commodo at te eleifend congue doming in demonstraverunt consuetudium est eodem ", Store.NO)); - doc.add(new TextField("f3","dolore volutpat. exerci nisl consequat, delenit liber nobis qui lectores saepius. et fiant ", Store.NO)); + doc.add(new StoredField( + "f0", + "dolore quis duis iriure illum accumsan blandit tempor nihil facer assum. qui lectores dynamicus, claram, quinta qui sollemnes ")); + doc.add(new TextField( + "f3", + "wisi vel accumsan liber qui nunc qui ", + Store.NO)); + doc.add(new TextField("f4", + "adipiscing ea dolore claritatem. est litterarum qui fiant ", Store.NO)); + doc.add(new StoredField("f4", + "diam hendrerit illum cum claritatem. quam claram, litterarum fiant ")); + doc.add(new TextField( + "f1", + "volutpat. nostrud lobortis dolore nulla odio blandit eleifend quod eorum qui formas nunc nobis ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f5","magna dolore luptatum claritatem investigationes quod per ", Store.NO)); - doc.add(new TextField("f2","elit, sed dolore aliquip commodo eum dignissim feugait doming habent insitam; legunt est qui quarta parum ", Store.NO)); - doc.add(new StoredField("f3","nibh volutpat. in facilisis accumsan luptatum mazim lectores sequitur anteposuerit sollemnes ")); - doc.add(new TextField("f2","euismod suscipit eum dolor molestie at qui duis doming in lius qui notare nunc ", Store.NO)); + doc.add(new TextField("f5", + "magna dolore luptatum claritatem investigationes quod per ", Store.NO)); + doc.add(new TextField( + "f2", + "elit, sed dolore aliquip commodo eum dignissim feugait doming habent insitam; legunt est qui quarta parum ", + Store.NO)); + doc.add(new StoredField( + "f3", + "nibh volutpat. in facilisis accumsan luptatum mazim lectores sequitur anteposuerit sollemnes ")); + doc.add(new TextField( + "f2", + "euismod suscipit eum dolor molestie at qui duis doming in lius qui notare nunc ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f4","tincidunt velit facilisis dignissim cum iis claram, ", Store.NO)); - doc.add(new StoredField("f4","ullamcorper accumsan delenit dolore nihil claritatem. mutationem clari, ")); + doc.add(new TextField("f4", + "tincidunt velit facilisis dignissim cum iis claram, ", Store.NO)); + doc.add(new StoredField("f4", + "ullamcorper accumsan delenit dolore nihil claritatem. mutationem clari, ")); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f3","exerci ea esse consequat, facilisis praesent placerat dynamicus, seacula qui ", Store.NO)); - doc.add(new TextField("f2","sed nonummy erat duis eum iriure dignissim duis nam assum. insitam; qui quam nunc futurum. ", Store.NO)); - doc.add(new TextField("f5","velit luptatum augue placerat quam ", Store.NO)); - doc.add(new TextField("f3","minim commodo facilisis qui imperdiet ii claritas seacula ", Store.NO)); + doc.add(new TextField( + "f3", + "exerci ea esse consequat, facilisis praesent placerat dynamicus, seacula qui ", + Store.NO)); + doc.add(new TextField( + "f2", + "sed nonummy erat duis eum iriure dignissim duis nam assum. insitam; qui quam nunc futurum. ", + Store.NO)); + doc.add(new TextField("f5", "velit luptatum augue placerat quam ", Store.NO)); + doc.add(new TextField("f3", + "minim commodo facilisis qui imperdiet ii claritas seacula ", Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f2","tincidunt suscipit dolor eu dignissim delenit congue possim lius anteposuerit in ", Store.NO)); + doc.add(new TextField( + "f2", + "tincidunt suscipit dolor eu dignissim delenit congue possim lius anteposuerit in ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f5","consectetuer illum eleifend processus fiant ", Store.NO)); + doc.add(new TextField("f5", "consectetuer illum eleifend processus fiant ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f1","volutpat. minim aliquip duis dolore zzril congue in saepius. dynamicus, qui est eodem qui futurum. ", Store.NO)); - doc.add(new TextField("f0","ut quis duis eum hendrerit dolore odio feugait option doming mazim possim usus claritatem. legunt mirum litterarum qui sollemnes futurum. ", Store.NO)); + doc.add(new TextField( + "f1", + "volutpat. minim aliquip duis dolore zzril congue in saepius. dynamicus, qui est eodem qui futurum. ", + Store.NO)); + doc.add(new TextField( + "f0", + "ut quis duis eum hendrerit dolore odio feugait option doming mazim possim usus claritatem. legunt mirum litterarum qui sollemnes futurum. ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f0","nibh ut ut minim exerci ea duis esse et blandit luptatum facilisi. soluta doming quod typi usus quod dynamicus, consuetudium mirum quam quarta clari, in ", Store.NO)); - doc.add(new TextField("f4","wisi facilisis claritatem iis lius mutationem qui ", Store.NO)); + doc.add(new TextField( + "f0", + "nibh ut ut minim exerci ea duis esse et blandit luptatum facilisi. soluta doming quod typi usus quod dynamicus, consuetudium mirum quam quarta clari, in ", + Store.NO)); + doc.add(new TextField("f4", + "wisi facilisis claritatem iis lius mutationem qui ", Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new StoredField("f4","nibh ullamcorper ea dignissim usus mutationem quarta ")); - doc.add(new StoredField("f4","consectetuer wisi ea illum facilisis assum. mutationem quarta clari, ")); + doc.add(new StoredField("f4", + "nibh ullamcorper ea dignissim usus mutationem quarta ")); + doc.add(new StoredField("f4", + "consectetuer wisi ea illum facilisis assum. mutationem quarta clari, ")); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f5","velit tempor processus putamus et typi, ", Store.NO)); - doc.add(new TextField("f0","nibh dolore exerci eum esse feugiat facilisis iusto dolore cum quod non facit legunt quam claram, litterarum nunc ", Store.NO)); - doc.add(new TextField("f4","consectetuer ullamcorper eum dignissim dolore assum. est littera in ", Store.NO)); - doc.add(new TextField("f2","ipsum ad duis dolor eu at nam doming habent eorum me consuetudium decima futurum. ", Store.NO)); + doc.add(new TextField( + "f2", + "ipsum ad duis dolor eu at nam doming habent eorum me consuetudium decima futurum. ", + Store.NO)); + doc.add(new StoredField("f5", "velit tempor processus putamus et typi, ")); + doc.add(new TextField( + "f4", + "adipiscing nibh wisi velit nulla nihil claritatem etiam quarta fiant ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f3","adipiscing wisi nisl consequat, dignissim nobis qui mirum fiant sollemnes ", Store.NO)); - doc.add(new TextField("f0","euismod ut ad nisl dolor eu blandit te eleifend nihil typi qui lectores claritas consuetudium gothica, claram, decima sollemnes ", Store.NO)); - doc.add(new TextField("f0","ut erat ut nisl ea dolor velit vel eros odio qui feugait facilisi. nihil assum. usus ii legunt littera decima nobis sollemnes ", Store.NO)); + doc.add(new TextField( + "f3", + "adipiscing wisi nisl consequat, dignissim nobis qui mirum fiant sollemnes ", + Store.NO)); + doc.add(new TextField( + "f0", + "euismod ut ad nisl dolor eu blandit te eleifend nihil typi qui lectores claritas consuetudium gothica, claram, decima sollemnes ", + Store.NO)); + doc.add(new TextField( + "f0", + "ut erat ut nisl ea dolor velit vel eros odio qui feugait facilisi. nihil assum. usus ii legunt littera decima nobis sollemnes ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f5","velit tempor legentis mirum fiant ", Store.NO)); - doc.add(new TextField("f0","nibh dolore exerci eum esse feugiat facilisis iusto dolore cum quod non facit legunt quam claram, litterarum nunc ", Store.NO)); - doc.add(new TextField("f4","consectetuer ullamcorper eum dignissim dolore assum. est littera in ", Store.NO)); + doc.add(new StoredField("f4", + "nostrud velit accumsan quod assum. claritatem. etiam et in ")); + doc.add(new TextField("f4", + "adipiscing ea facilisis nihil usus lius etiam qui ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f2","dolore tation duis in eu delenit nam placerat in qui quarta ", Store.NO)); - doc.add(new TextField("f2","ut suscipit duis at dignissim delenit soluta insitam; me quam qui futurum. ", Store.NO)); + doc.add(new TextField("f2", + "dolore tation duis in eu delenit nam placerat in qui quarta ", + Store.NO)); + doc.add(new TextField( + "f2", + "ut suscipit duis at dignissim delenit soluta insitam; me quam qui futurum. ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f0","diam euismod quis autem consequat, eros iusto delenit feugait option quod habent claritatem claritatem. lectores consuetudium nunc per qui sollemnes ", Store.NO)); - doc.add(new TextField("f4","nibh tincidunt hendrerit nulla usus est quam qui ", Store.NO)); - doc.add(new TextField("f1","adipiscing diam nostrud duis at zzril te nobis congue est demonstraverunt lius consuetudium est claram, qui in ", Store.NO)); - doc.add(new StoredField("f0","nibh euismod magna erat suscipit duis dolor esse et delenit tempor quod typi in legunt littera nunc decima. in ")); + doc.add(new TextField( + "f0", + "diam euismod quis autem consequat, eros iusto delenit feugait option quod habent claritatem claritatem. lectores consuetudium nunc per qui sollemnes ", + Store.NO)); + doc.add(new TextField("f4", + "nibh tincidunt hendrerit nulla usus est quam qui ", Store.NO)); + doc.add(new TextField( + "f1", + "adipiscing diam nostrud duis at zzril te nobis congue est demonstraverunt lius consuetudium est claram, qui in ", + Store.NO)); + doc.add(new StoredField( + "f0", + "nibh euismod magna erat suscipit duis dolor esse et delenit tempor quod typi in legunt littera nunc decima. in ")); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f1","diam nonummy tincidunt lobortis dolor et luptatum liber cum doming quod assum. habent insitam; est ii littera per et qui ", Store.NO)); - doc.add(new TextField("f4","diam ullamcorper dignissim assum. claritatem. me etiam qui clari, ", Store.NO)); - doc.add(new TextField("f0","ipsum ut volutpat. minim autem dolor vulputate vel dolore odio blandit cum nobis mazim placerat facer possim est lectores sequitur consuetudium claram, modo qui in ", Store.NO)); - doc.add(new TextField("f2","tincidunt nisl duis in zzril placerat habent qui parum litterarum qui ", Store.NO)); + doc.add(new TextField("f4", + "diam ullamcorper dignissim assum. claritatem. me etiam qui clari, ", + Store.NO)); + doc.add(new TextField( + "f0", + "ipsum ut volutpat. minim autem dolor vulputate vel dolore odio blandit cum nobis mazim placerat facer possim est lectores sequitur consuetudium claram, modo qui in ", + Store.NO)); + doc.add(new TextField( + "f2", + "tincidunt nisl duis in zzril placerat habent qui parum litterarum qui ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new StoredField("f3","nibh ea consequat, accumsan tempor est dynamicus, seacula typi, videntur sollemnes ")); + doc.add(new StoredField( + "f3", + "nibh ea consequat, accumsan tempor est dynamicus, seacula typi, videntur sollemnes ")); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f1","volutpat. duis dolor esse at iusto delenit doming est facit est consuetudium humanitatis sollemnes ", Store.NO)); - doc.add(new TextField("f2","ipsum ut nisl dolor dignissim nam placerat investigationes processus notare nunc in ", Store.NO)); - doc.add(new TextField("f1","ipsum tincidunt nostrud lobortis in vel nulla dolore placerat facit ii quam littera formas nunc clari, ", Store.NO)); + doc.add(new TextField( + "f1", + "volutpat. duis dolor esse at iusto delenit doming est facit est consuetudium humanitatis sollemnes ", + Store.NO)); + doc.add(new TextField( + "f2", + "ipsum ut nisl dolor dignissim nam placerat investigationes processus notare nunc in ", + Store.NO)); + doc.add(new TextField( + "f1", + "ipsum tincidunt nostrud lobortis in vel nulla dolore placerat facit ii quam littera formas nunc clari, ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f3","adipiscing ea consequat, qui nobis ii mirum et sollemnes ", Store.NO)); - doc.add(new TextField("f5","velit te legere typi, ", Store.NO)); - doc.add(new TextField("f3","nisl in dignissim delenit placerat est claritatem. notare anteposuerit et videntur ", Store.NO)); + doc.add(new TextField("f3", + "adipiscing ea consequat, qui nobis ii mirum et sollemnes ", Store.NO)); + doc.add(new TextField("f5", "velit te legere typi, ", Store.NO)); + doc.add(new TextField( + "f3", + "nisl in dignissim delenit placerat est claritatem. notare anteposuerit et videntur ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f1","dolore nostrud suscipit lobortis duis vel et delenit liber cum habent usus claritatem. qui formas nobis sollemnes futurum. ", Store.NO)); - doc.add(new TextField("f2","erat tation duis in molestie dignissim liber congue possim me qui litterarum eodem in ", Store.NO)); + doc.add(new TextField( + "f1", + "dolore nostrud suscipit lobortis duis vel et delenit liber cum habent usus claritatem. qui formas nobis sollemnes futurum. ", + Store.NO)); + doc.add(new TextField( + "f2", + "erat tation duis in molestie dignissim liber congue possim me qui litterarum eodem in ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f0","amet, elit, ut minim duis eum esse vel eu iusto blandit nam eleifend nihil typi usus facit legunt notare litterarum per decima clari, ", Store.NO)); + doc.add(new TextField( + "f0", + "amet, elit, ut minim duis eum esse vel eu iusto blandit nam eleifend nihil typi usus facit legunt notare litterarum per decima clari, ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f2","euismod suscipit in velit delenit facer legunt quam formas parum ", Store.NO)); - doc.add(new StoredField("f4","ullamcorper accumsan delenit insitam; lius mutationem quarta decima. ")); + doc.add(new TextField("f2", + "euismod suscipit in velit delenit facer legunt quam formas parum ", + Store.NO)); + doc.add(new StoredField("f4", + "ullamcorper accumsan delenit insitam; lius mutationem quarta decima. ")); directWriter.addDocument(doc); doc = new Document(); - doc.add(new StoredField("f3","ipsum adipiscing wisi nisl consequat, praesent placerat qui saepius. dynamicus, seacula videntur ")); + doc.add(new StoredField( + "f3", + "ipsum adipiscing wisi nisl consequat, praesent placerat qui saepius. dynamicus, seacula videntur ")); directWriter.addDocument(doc); doc = new Document(); - doc.add(new StoredField("f0","consectetuer erat minim suscipit ea esse consequat, feugiat accumsan duis cum nihil typi claritatem facit etiam quam claram, quinta modo futurum. ")); - doc.add(new TextField("f4","tincidunt illum cum claritatem. mutationem litterarum ", Store.NO)); + doc.add(new StoredField( + "f0", + "consectetuer erat minim suscipit ea esse consequat, feugiat accumsan duis cum nihil typi claritatem facit etiam quam claram, quinta modo futurum. ")); + doc.add(new TextField("f4", + "tincidunt illum cum claritatem. mutationem litterarum ", Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f2","tincidunt erat ad aliquip duis velit dignissim delenit facer insitam; processus qui litterarum formas quarta ", Store.NO)); - doc.add(new StoredField("f2","erat ut nisl duis at feugait congue in lius anteposuerit nunc ")); - doc.add(new TextField("f3","adipiscing minim esse luptatum tempor imperdiet est saepius. seacula fiant ", Store.NO)); - doc.add(new TextField("f0","ipsum elit, magna suscipit dolor eu iusto feugait eleifend quod assum. non est investigationes claritas nunc seacula videntur ", Store.NO)); - doc.add(new TextField("f4","amet, nibh ullamcorper velit nulla dignissim quod insitam; lius decima. ", Store.NO)); + doc.add(new TextField( + "f2", + "tincidunt erat ad aliquip duis velit dignissim delenit facer insitam; processus qui litterarum formas quarta ", + Store.NO)); + doc.add(new StoredField("f2", + "erat ut nisl duis at feugait congue in lius anteposuerit nunc ")); + doc.add(new TextField( + "f3", + "adipiscing minim esse luptatum tempor imperdiet est saepius. seacula fiant ", + Store.NO)); + doc.add(new TextField( + "f0", + "ipsum elit, magna suscipit dolor eu iusto feugait eleifend quod assum. non est investigationes claritas nunc seacula videntur ", + Store.NO)); + doc.add(new TextField( + "f4", + "amet, nibh ullamcorper velit nulla dignissim quod insitam; lius decima. ", + Store.NO)); directWriter.addDocument(doc); doc = new Document(); - doc.add(new TextField("f5","quis dolore eleifend investigationes mirum per eodem typi, ", Store.NO)); + doc.add(new TextField("f5", + "quis dolore eleifend investigationes mirum per eodem typi, ", Store.NO)); directWriter.addDocument(doc); directWriter.close(); @@ -461,7 +602,7 @@ IndexData directIndexData = new IndexData(directReader); System.out.println("Direct index data"); - System.out.println(directIndexData.toString(true)); + System.out.println(directIndexData.toString(false)); System.out.println(); directReader.close(); directDir.close(); @@ -473,7 +614,7 @@ public void testReplaceAndAddAgain() throws IOException { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); FieldType fieldType = new FieldType(); fieldType.setIndexed(true); @@ -491,9 +632,9 @@ doc1.add(new StoredField("f1", "c", fieldType)); writer.addDocument(doc1); - Document doc3 = new Document(); - doc3.add(new StoredField("f1", "d", fieldType)); - writer.updateFields(Operation.REPLACE_FIELDS, new Term("f1", "b"), doc3); + Document fields1 = new Document(); + fields1.add(new StoredField("f1", "d", fieldType)); + writer.updateFields(Operation.REPLACE_FIELDS, new Term("f1", "b"), fields1); Document doc2 = new Document(); doc2.add(new StoredField("f1", "b", fieldType)); @@ -508,6 +649,7 @@ // check indexed fields final DocsAndPositionsEnum termPositionsA = atomicReader .termPositionsEnum(new Term("f1", "a")); + assertNotNull("no positions for term", termPositionsA); assertEquals("wrong doc id", 1, termPositionsA.nextDoc()); assertEquals("wrong position", 0, termPositionsA.nextPosition()); assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, @@ -515,6 +657,7 @@ final DocsAndPositionsEnum termPositionsB = atomicReader .termPositionsEnum(new Term("f1", "b")); + assertNotNull("no positions for term", termPositionsB); assertEquals("wrong doc id", 2, termPositionsB.nextDoc()); assertEquals("wrong position", 0, termPositionsB.nextPosition()); assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, @@ -522,6 +665,7 @@ final DocsAndPositionsEnum termPositionsC = atomicReader .termPositionsEnum(new Term("f1", "c")); + assertNotNull("no positions for term", termPositionsC); assertEquals("wrong doc id", 1, termPositionsC.nextDoc()); assertEquals("wrong position", 1, termPositionsC.nextPosition()); assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, @@ -529,8 +673,10 @@ final DocsAndPositionsEnum termPositionsD = atomicReader .termPositionsEnum(new Term("f1", "d")); + assertNotNull("no positions for term", termPositionsD); assertEquals("wrong doc id", 0, termPositionsD.nextDoc()); - assertEquals("wrong position", 0, termPositionsD.nextPosition()); + // 50000 == StackedDocsEnum.STACKED_SEGMENT_POSITION_INCREMENT + assertEquals("wrong position", 50000, termPositionsD.nextPosition()); assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, termPositionsD.nextDoc()); @@ -555,6 +701,91 @@ } + public void testReplaceAndAddSameField() throws IOException { + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + + FieldType fieldType = new FieldType(); + fieldType.setIndexed(true); + fieldType.setTokenized(false); + fieldType.setOmitNorms(true); + fieldType.setStored(true); + + Document doc0 = new Document(); + doc0.add(new StoredField("f1", "a", fieldType)); + doc0.add(new StoredField("f1", "b", fieldType)); + writer.addDocument(doc0); + + Document doc1 = new Document(); + doc1.add(new StoredField("f1", "a", fieldType)); + doc1.add(new StoredField("f1", "c", fieldType)); + writer.addDocument(doc1); + + writer.commit(); + + Document fields1 = new Document(); + fields1.add(new StoredField("f1", "d", fieldType)); + writer.updateFields(Operation.ADD_FIELDS, new Term("f1", "c"), fields1); + + writer.close(); + + DirectoryReader directoryReader = DirectoryReader.open(dir); + final AtomicReader atomicReader = directoryReader.leaves().get(0).reader(); + printField(atomicReader, "f1"); + + // check indexed fields + final DocsAndPositionsEnum termPositionsA = atomicReader + .termPositionsEnum(new Term("f1", "a")); + assertNotNull("no positions for term", termPositionsA); + assertEquals("wrong doc id", 0, termPositionsA.nextDoc()); + assertEquals("wrong position", 0, termPositionsA.nextPosition()); + assertEquals("wrong doc id", 1, termPositionsA.nextDoc()); + assertEquals("wrong position", 0, termPositionsA.nextPosition()); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsA.nextDoc()); + + final DocsAndPositionsEnum termPositionsB = atomicReader + .termPositionsEnum(new Term("f1", "b")); + assertNotNull("no positions for term", termPositionsB); + assertEquals("wrong doc id", 0, termPositionsB.nextDoc()); + assertEquals("wrong position", 1, termPositionsB.nextPosition()); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsB.nextDoc()); + + final DocsAndPositionsEnum termPositionsC = atomicReader + .termPositionsEnum(new Term("f1", "c")); + assertNotNull("no positions for term", termPositionsC); + assertEquals("wrong doc id", 1, termPositionsC.nextDoc()); + assertEquals("wrong position", 1, termPositionsC.nextPosition()); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsC.nextDoc()); + + final DocsAndPositionsEnum termPositionsD = atomicReader + .termPositionsEnum(new Term("f1", "d")); + assertNotNull("no positions for term", termPositionsD); + assertEquals("wrong doc id", 1, termPositionsD.nextDoc()); + assertEquals("wrong position", 0, termPositionsD.nextPosition()); + assertEquals("wrong doc id", DocIdSetIterator.NO_MORE_DOCS, + termPositionsD.nextDoc()); + + // check stored fields + final StoredDocument stored0 = atomicReader.document(0); + final StorableField[] f1_0 = stored0.getFields("f1"); + assertEquals("wrong numeber of stored fields", 2, f1_0.length); + assertEquals("wrong field value", "a", f1_0[0].stringValue()); + assertEquals("wrong field value", "b", f1_0[1].stringValue()); + + final StoredDocument stored1 = atomicReader.document(1); + final StorableField[] f1_1 = stored1.getFields("f1"); + assertEquals("wrong numeber of stored fields", 3, f1_1.length); + assertEquals("wrong field value", "d", f1_1[0].stringValue()); + assertEquals("wrong field value", "a", f1_1[1].stringValue()); + assertEquals("wrong field value", "c", f1_1[2].stringValue()); + + directoryReader.close(); + + } + private void printField(AtomicReader atomicReader, String fieldName) throws IOException { if (!VERBOSE_FIELD_REPLACEMENTS) { @@ -580,29 +811,28 @@ } public void testprintIndexes() throws IOException { - File outDir = new File("D:/temp/ifu/compare/scenario/c"); + File outDir = new File("D:/temp/ifu/compare/scenario/a"); outDir.mkdirs(); - + for (int i = 0; i < 42; i++) { - //Directory directory = new RAMDirectory(); + // Directory directory = new RAMDirectory(); File fsDirFile = new File(outDir, "" + i); fsDirFile.mkdirs(); Directory directory = FSDirectory.open(fsDirFile); for (String filename : directory.listAll()) { new File(fsDirFile, filename).delete(); } - addDocuments(directory, new Random(3), i); + System.out.print("" + i + " "); + addDocuments(directory, new Random(3), i, true); DirectoryReader updatesReader = DirectoryReader.open(directory); - IndexData updatesIndexData = new IndexData( - updatesReader); + IndexData updatesIndexData = new IndexData(updatesReader); updatesReader.close(); - + File out = new File(outDir, (i < 10 ? "0" : "") + i + ".txt"); FileWriter fileWriter = new FileWriter(out); fileWriter.append(updatesIndexData.toString()); fileWriter.close(); } } - } Index: lucene/core/src/test/org/apache/lucene/util/IndexData.java =================================================================== --- lucene/core/src/test/org/apache/lucene/util/IndexData.java (revision 1477914) +++ lucene/core/src/test/org/apache/lucene/util/IndexData.java (working copy) @@ -4,12 +4,12 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; @@ -293,6 +293,7 @@ public DocumentData(StoredDocument document) { storedFields = document.getFields(); + Collections.sort(storedFields, new StorableFieldsComparator()); } public boolean isEmpty() { @@ -374,4 +375,13 @@ } } + + private class StorableFieldsComparator implements Comparator { + + @Override + public int compare(StorableField f0, StorableField f1) { + return f0.stringValue().compareTo(f1.stringValue()); + } + + } }