diff --git a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java index 1554b81..713ee6f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsFormat.java @@ -5,6 +5,7 @@ import java.util.Set; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -29,7 +30,7 @@ import org.apache.lucene.store.IOContext; * Controls the format of stored fields */ public abstract class StoredFieldsFormat { - public abstract StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException; + public abstract StoredFieldsReader fieldsReader(SegmentReadState state) throws IOException; public abstract StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException; public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java index a5449f9..638076d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsFormat.java @@ -22,6 +22,7 @@ import java.util.Set; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -29,7 +30,7 @@ import org.apache.lucene.store.IOContext; * Controls the format of term vectors */ public abstract class TermVectorsFormat { - public abstract TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException; + public abstract TermVectorsReader vectorsReader(SegmentReadState state) throws IOException; public abstract TermVectorsWriter vectorsWriter(Directory directory, String segment, IOContext context) throws IOException; public abstract void files(SegmentInfo info, Set files) throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java index b7054ba..405bb92 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java @@ -165,6 +165,6 @@ class Lucene3xSegmentInfosReader extends SegmentInfosReader { return new SegmentInfo(dir, version, name, docCount, delGen, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, normGen, isCompoundFile, - delCount, hasProx, codec, diagnostics, hasVectors); + delCount, hasProx, codec, diagnostics, hasVectors, null); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java index 20a005c..65fd58a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java @@ -23,8 +23,8 @@ import java.util.Set; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; -import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -33,9 +33,8 @@ import org.apache.lucene.store.IOContext; class Lucene3xStoredFieldsFormat extends StoredFieldsFormat { @Override - public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, - FieldInfos fn, IOContext context) throws IOException { - return new Lucene3xStoredFieldsReader(directory, si, fn, context); + public StoredFieldsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene3xStoredFieldsReader(state.dir, state.segmentInfo, state.fieldInfos, state.context); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java index 58a5a7c..8bf31a6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xTermVectorsFormat.java @@ -23,8 +23,8 @@ import java.util.Set; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsWriter; -import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -38,8 +38,8 @@ import org.apache.lucene.store.IOContext; class Lucene3xTermVectorsFormat extends TermVectorsFormat { @Override - public TermVectorsReader vectorsReader(Directory directory,SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { - return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context); + public TermVectorsReader vectorsReader(SegmentReadState state) throws IOException { + return new Lucene3xTermVectorsReader(state.dir, state.segmentInfo, state.fieldInfos, state.context); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java index 7070ff7..0839253 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosReader.java @@ -86,9 +86,11 @@ public class Lucene40SegmentInfosReader extends SegmentInfosReader { final Codec codec = Codec.forName(input.readString()); final Map diagnostics = input.readStringStringMap(); final int hasVectors = input.readByte(); + final boolean hasOverlay = input.readByte() == SegmentInfo.YES; + final String ovlName = hasOverlay ? input.readString() : null; return new SegmentInfo(dir, version, name, docCount, delGen, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, normGen, isCompoundFile, - delCount, hasProx, codec, diagnostics, hasVectors); + delCount, hasProx, codec, diagnostics, hasVectors, ovlName); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosWriter.java index 670eae6..04717c2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfosWriter.java @@ -99,6 +99,10 @@ public class Lucene40SegmentInfosWriter extends SegmentInfosWriter { output.writeString(si.getCodec().getName()); output.writeStringStringMap(si.getDiagnostics()); output.writeByte((byte) (si.getHasVectorsInternal())); + output.writeByte((byte) (si.getOverlayName() != null ? SegmentInfo.YES : SegmentInfo.NO)); + if (si.getOverlayName() != null) { + output.writeString(si.getOverlayName()); + } } protected IndexOutput createOutput(Directory dir, String segmentFileName, IOContext context) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java index 240d16d..8b6129a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java @@ -23,9 +23,9 @@ import java.util.Set; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; -import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.DataOutput; // javadocs +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -80,9 +80,8 @@ import org.apache.lucene.store.IOContext; public class Lucene40StoredFieldsFormat extends StoredFieldsFormat { @Override - public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, - FieldInfos fn, IOContext context) throws IOException { - return new Lucene40StoredFieldsReader(directory, si, fn, context); + public StoredFieldsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene40StoredFieldsReader(state.dir, state.segmentInfo, state.fieldInfos, state.context); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java index b7fc812..4437556 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java @@ -23,9 +23,9 @@ import java.util.Set; import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsWriter; -import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.DataOutput; // javadocs +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -103,8 +103,8 @@ import org.apache.lucene.store.IOContext; public class Lucene40TermVectorsFormat extends TermVectorsFormat { @Override - public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { - return new Lucene40TermVectorsReader(directory, segmentInfo, fieldInfos, context); + public TermVectorsReader vectorsReader(SegmentReadState state) throws IOException { + return new Lucene40TermVectorsReader(state.dir, state.segmentInfo, state.fieldInfos, state.context); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfosReader.java index 590cf6a..acff167 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfosReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfosReader.java @@ -168,7 +168,7 @@ public class SimpleTextSegmentInfosReader extends SegmentInfosReader { return new SegmentInfo(directory, version, name, docCount, delGen, dsOffset, dsSegment, dsCompoundFile, normGen, isCompoundFile, - delCount, hasProx, codec, diagnostics, hasVectors); + delCount, hasProx, codec, diagnostics, hasVectors, null); } private String readString(int offset, BytesRef scratch) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java index 05cc7e7..6585f1e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java @@ -23,8 +23,8 @@ import java.util.Set; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; -import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -37,8 +37,8 @@ import org.apache.lucene.store.IOContext; public class SimpleTextStoredFieldsFormat extends StoredFieldsFormat { @Override - public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException { - return new SimpleTextStoredFieldsReader(directory, si, fn, context); + public StoredFieldsReader fieldsReader(SegmentReadState state) throws IOException { + return new SimpleTextStoredFieldsReader(state.dir, state.segmentInfo, state.fieldInfos, state.context); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java index 8ec0a86..a96e74e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java @@ -25,6 +25,7 @@ import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsWriter; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -37,8 +38,8 @@ import org.apache.lucene.store.IOContext; public class SimpleTextTermVectorsFormat extends TermVectorsFormat { @Override - public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { - return new SimpleTextTermVectorsReader(directory, segmentInfo, context); + public TermVectorsReader vectorsReader(SegmentReadState state) throws IOException { + return new SimpleTextTermVectorsReader(state.dir, state.segmentInfo, state.context); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocValuesFormat.java new file mode 100644 index 0000000..9cb25ca --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocValuesFormat.java @@ -0,0 +1,41 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.PerDocConsumer; +import org.apache.lucene.codecs.PerDocProducer; +import org.apache.lucene.index.PerDocWriteState; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; + +public class StackedDocValuesFormat extends DocValuesFormat { + + DocValuesFormat original; + + public StackedDocValuesFormat(DocValuesFormat original) { + this.original = original; + } + + @Override + public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException { + return original.docsConsumer(state); + } + + @Override + public PerDocProducer docsProducer(SegmentReadState state) throws IOException { + if (state.segmentInfo.getHasStacked()) { + return new StackedPerDocProducer(original.docsProducer(state), state); + } else { + return original.docsProducer(state); + } + } + + @Override + public void files(SegmentInfo info, Set files) throws IOException { + original.files(info, files); + // XXX add overlay? or should've been done already at the other codec's level? + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocsAndPositionsEnum.java new file mode 100644 index 0000000..dd4cb6c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocsAndPositionsEnum.java @@ -0,0 +1,131 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +public class StackedDocsAndPositionsEnum extends DocsAndPositionsEnum { + private String field; + private StackedMap map; + private BytesRef term; + private DocsAndPositionsEnum main; + private DocsAndPositionsEnum stacked = null, currentStacked = null; + private TermsEnum stackedTerms; + private boolean needsOffsets; + private Bits liveDocs; + private int currentMID = -1; + private int currentSID = -1; + private int currentFreq; + + public StackedDocsAndPositionsEnum(String field, StackedMap map, BytesRef term, + DocsAndPositionsEnum main, + TermsEnum stackedTerms, boolean needsOffsets, Bits liveDocs) { + this.field = field; + this.map = map; + this.term = term; + this.main = main; + this.stackedTerms = stackedTerms; + this.needsOffsets = needsOffsets; + this.liveDocs = liveDocs; + } + + @Override + public int nextPosition() throws IOException { + if (currentStacked != null) { + return currentStacked.nextPosition(); + } else { + return main.nextPosition(); + } + } + + @Override + public int startOffset() throws IOException { + if (currentStacked != null) { + return currentStacked.startOffset(); + } else { + return main.startOffset(); + } + } + + @Override + public int endOffset() throws IOException { + if (currentStacked != null) { + return currentStacked.endOffset(); + } else { + return main.endOffset(); + } + } + + @Override + public BytesRef getPayload() throws IOException { + if (currentStacked != null) { + return currentStacked.getPayload(); + } else { + return main.getPayload(); + } + } + + @Override + public boolean hasPayload() { + if (currentStacked != null) { + return currentStacked.hasPayload(); + } else { + return main.hasPayload(); + } + } + + @Override + public int freq() { + return currentFreq; + } + + @Override + public int docID() { + return Math.min(currentMID, currentSID); + } + + @Override + public int nextDoc() throws IOException { + if (currentMID == -1 || currentMID < currentSID) { // init + currentMID = main.nextDoc(); + } + if (currentSID == -1 || currentSID < currentMID) { // init + currentSID = map.advanceDocsEnum(field, currentSID); + } + return Math.min(currentMID, currentSID); + } + + @Override + public int advance(int target) throws IOException { + if (currentMID == -1 || currentMID < target) { + currentMID = main.advance(target); + } + if (currentSID == -1 || currentSID < target) { + currentSID = map.advanceDocsEnum(field, target); + } + // prepare freq + if (currentSID <= currentMID) { + // !!!!!!!!! MAJOR COST !!!!!!!! + if (stackedTerms.seekExact(term, true)) { // found this term + stacked = stackedTerms.docsAndPositions(liveDocs, stacked, needsOffsets); + // remap; + int newID = map.getStackedDocsEnumId(field, currentSID); + stacked.advance(newID); + currentFreq = stacked.freq(); + currentStacked = stacked; + } else { // term not found + currentSID = currentMID; + currentFreq = main.freq(); + currentStacked = null; + } + } else { + currentFreq = main.freq(); + currentStacked = null; + } + return Math.min(currentMID, currentSID); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocsEnum.java new file mode 100644 index 0000000..7b0c648 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedDocsEnum.java @@ -0,0 +1,82 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +public class StackedDocsEnum extends DocsEnum { + private String field; + private StackedMap map; + private BytesRef term; + private DocsEnum main; + private TermsEnum stackedTerms; + private boolean needsFreqs; + private int currentMID = -1; + private int currentSID = -1; + private int currentFreq; + private DocsEnum stacked; + + public StackedDocsEnum(String field, StackedMap map, BytesRef term, DocsEnum main, TermsEnum stackedTerms, + boolean needsFreqs, Bits liveDocs) { + this.field = field; + this.map = map; + this.term = term; + this.main = main; + this.stackedTerms = stackedTerms; + this.needsFreqs = needsFreqs; + stacked = null; + } + + @Override + public int freq() { + return currentFreq; + } + + @Override + public int docID() { + return Math.min(currentMID, currentSID); + } + + @Override + public int nextDoc() throws IOException { + if (currentMID == -1 || currentMID < currentSID) { // init + currentMID = main.nextDoc(); + } + if (currentSID == -1 || currentSID < currentMID) { // init + currentSID = map.advanceDocsEnum(field, currentSID); + } + return Math.min(currentMID, currentSID); + } + + @Override + public int advance(int target) throws IOException { + if (currentMID == -1 || currentMID < target) { + currentMID = main.advance(target); + } + if (currentSID == -1 || currentSID < target) { + currentSID = map.advanceDocsEnum(field, target); + } + if (needsFreqs) { // prepare freq + if (currentSID <= currentMID) { + // !!!!!!!!! MAJOR COST !!!!!!!! + if (stackedTerms.seekExact(term, true)) { // found this term in updates + stacked = stackedTerms.docs(null, stacked, needsFreqs); + // remap; + int newID = map.getStackedDocsEnumId(field, currentSID); + stacked.advance(newID); + currentFreq = stacked.freq(); + } else { // term not found - use main + currentSID = currentMID; + currentFreq = main.freq(); + } + } else { + currentFreq = main.freq(); + } + } + return Math.min(currentMID, currentSID); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedFieldsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedFieldsProducer.java new file mode 100644 index 0000000..ecc5c39 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedFieldsProducer.java @@ -0,0 +1,107 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; +import java.util.Iterator; +import java.util.TreeSet; + +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.Terms; +import org.apache.poi.openxml4j.exceptions.InvalidOperationException; + +public class StackedFieldsProducer extends FieldsProducer { + FieldsProducer mainProducer; + FieldsProducer stackedProducer; + StackedMap map; + + public StackedFieldsProducer(FieldsProducer mainProducer, FieldsProducer stackedProducer, StackedMap map) { + this.mainProducer = mainProducer; + this.stackedProducer = stackedProducer; + this.map = map; + } + + @Override + public void close() throws IOException { + if (mainProducer != null) { + mainProducer.close(); + } + stackedProducer.close(); + } + + @Override + public FieldsEnum iterator() throws IOException { + if (mainProducer == null) { + return stackedProducer.iterator(); + } + return new StackedFieldsEnum(); + } + + class StackedFieldsEnum extends FieldsEnum { + FieldsEnum main, stacked; + TreeSet allFields = new TreeSet(); + Iterator it = null; + String curField = null; + + StackedFieldsEnum() throws IOException { + main = mainProducer.iterator(); + stacked = stackedProducer.iterator(); + String fld; + while ((fld = main.next()) != null) { + allFields.add(fld); + } + while ((fld = stacked.next()) != null) { + allFields.add(fld); + } + } + + @Override + public String next() throws IOException { + if (it == null) { + it = allFields.iterator(); + } + if (it.hasNext()) { + curField = it.next(); + return curField; + } else { + return null; + } + } + + @Override + public Terms terms() throws IOException { + if (curField == null) { + throw new IOException("next() has to be called first"); + } + return StackedFieldsProducer.this.terms(curField); + } + } + + @Override + public Terms terms(String field) throws IOException { + Terms main = null; + if (mainProducer != null) { + mainProducer.terms(field); + } + Terms stacked = stackedProducer.terms(field); + if (stacked == null && main == null) { + return null; + } + if (stacked == null) { + return main; + } else { + return new StackedTerms(field, main, stacked, map); + } + } + + // XXX wacky, but what can be the impact? + @Override + public int size() throws IOException { + if (mainProducer != null) { + return mainProducer.size(); + } else { + return stackedProducer.size(); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedMap.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedMap.java new file mode 100644 index 0000000..59fe4ef --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedMap.java @@ -0,0 +1,49 @@ +package org.apache.lucene.codecs.stacked; + +import java.util.Map; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.SegmentReadState; + +public class StackedMap { + + public static StackedMap read(SegmentReadState state) { + StackedMap map = new StackedMap(); + return map; + } + + /** + * Return a map of field names and stacked document ids that + * contain updated stored field values + * @param oid original id + * @return map where keys are field names and values are stacked document ids where + * updated values can be found + */ + public Map getStackedStoredIds(int oid) { + return null; + } + + /** + * Return a stacked document id that updates an inverted field for the + * original id + * @param field field name + * @param oid original doc id + * @return -1 if there is no such document, or the stacked document id. + */ + public int getStackedDocsEnumId(String field, int oid) { + // XXX + return -1; + } + + /** + * Find the original id after the current id of a document with + * updated postings. + * @param field field name + * @param currentId current original id + * @return next original id of a document with updated postings + */ + public int advanceDocsEnum(String field, int currentId) { + // XXX + return DocsEnum.NO_MORE_DOCS; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedPerDocProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedPerDocProducer.java new file mode 100644 index 0000000..8459db7 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedPerDocProducer.java @@ -0,0 +1,30 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.PerDocProducer; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.SegmentReadState; + +public class StackedPerDocProducer extends PerDocProducer { + PerDocProducer original; + + public StackedPerDocProducer(PerDocProducer original, SegmentReadState state) { + this.original = original; + } + + @Override + public void close() throws IOException { + if (original != null) { + original.close(); + } + } + + @Override + public DocValues docValues(String field) throws IOException { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedPostingsFormat.java new file mode 100644 index 0000000..fea7e4d --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedPostingsFormat.java @@ -0,0 +1,52 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IOContext; + +public class StackedPostingsFormat extends PostingsFormat { + + public static final String OVERLAY_EXT = "pov"; + + private PostingsFormat original = new Lucene40PostingsFormat(); + + protected StackedPostingsFormat(String name, PostingsFormat postingsFormat) { + super(name); + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) + throws IOException { + return original.fieldsConsumer(state); + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) + throws IOException { + FieldsProducer producer = original.fieldsProducer(state); + if (producer == null) { + return null; + } + if (state.segmentInfo.getHasStacked()) { + return new StackedFieldsProducer(producer, state); + } else { + return producer; + } + } + + @Override + public void files(SegmentInfo segmentInfo, String segmentSuffix, + Set files) throws IOException { + original.files(segmentInfo, segmentSuffix, files); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedSegmentInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedSegmentInfosFormat.java new file mode 100644 index 0000000..a61b43f --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedSegmentInfosFormat.java @@ -0,0 +1,25 @@ +package org.apache.lucene.codecs.stacked; + +import org.apache.lucene.codecs.SegmentInfosFormat; +import org.apache.lucene.codecs.SegmentInfosReader; +import org.apache.lucene.codecs.SegmentInfosWriter; + +public class StackedSegmentInfosFormat extends SegmentInfosFormat { + + public StackedSegmentInfosFormat(SegmentInfosFormat segmentInfosFormat) { + // TODO Auto-generated constructor stub + } + + @Override + public SegmentInfosReader getSegmentInfosReader() { + // TODO Auto-generated method stub + return null; + } + + @Override + public SegmentInfosWriter getSegmentInfosWriter() { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedStoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedStoredFieldsFormat.java new file mode 100644 index 0000000..74a9e86 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedStoredFieldsFormat.java @@ -0,0 +1,40 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +public class StackedStoredFieldsFormat extends StoredFieldsFormat { + + public StackedStoredFieldsFormat(StoredFieldsFormat storedFieldsFormat) { + // TODO Auto-generated constructor stub + } + + @Override + public StoredFieldsReader fieldsReader(SegmentReadState state) throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public StoredFieldsWriter fieldsWriter(Directory directory, String segment, + IOContext context) throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public void files(SegmentInfo info, Set files) throws IOException { + // TODO Auto-generated method stub + + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedStoredFieldsReader.java new file mode 100644 index 0000000..d972277 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedStoredFieldsReader.java @@ -0,0 +1,138 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.StoredFieldVisitor; + +public class StackedStoredFieldsReader extends StoredFieldsReader { + private StackedMap map; + private StoredFieldsReader main, stacked; + + public StackedStoredFieldsReader(StackedMap map, StoredFieldsReader main, StoredFieldsReader stacked) { + this.map = map; + this.main = main; + this.stacked = stacked; + } + + @Override + public void close() throws IOException { + main.close(); + stacked.close(); + } + + @Override + public void visitDocument(int n, StoredFieldVisitor visitor) + throws CorruptIndexException, IOException { + Map stackedIds = map.getStackedStoredIds(n); + if (stackedIds != null) { + VisitorWrapper wrapper = new VisitorWrapper(visitor); + // first collect unchanged fields from the main + wrapper.excludeFields(stackedIds.keySet()); + main.visitDocument(n, wrapper); + // now collect fields from stacked updates + wrapper.excludeFields(Collections.emptySet()); + // !!!!!!!!!!! MAJOR COST !!!!!!!!!! + for (Entry e : stackedIds.entrySet()) { + wrapper.includeFields(e.getKey()); + stacked.visitDocument(e.getValue(), wrapper); + } + } else { + main.visitDocument(n, visitor); + } + } + + private static class VisitorWrapper extends StoredFieldVisitor { + Set includeFields = new HashSet(); + Set excludeFields = new HashSet(); + StoredFieldVisitor wrapped; + + public VisitorWrapper(StoredFieldVisitor wrapped) { + this.wrapped = wrapped; + } + + public void includeFields(String... fields) { + this.includeFields.clear(); + this.includeFields.addAll(Arrays.asList(fields)); + } + + public void excludeFields(Collection fields) { + this.excludeFields.clear(); + this.excludeFields.addAll(fields); + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (includeFields.isEmpty()) { + if (excludeFields.isEmpty()) { + return wrapped.needsField(fieldInfo); + } else { + if (!excludeFields.contains(fieldInfo.name)) { + return wrapped.needsField(fieldInfo); + } else { + return Status.NO; + } + } + } else { + if (includeFields.contains(fieldInfo.name)) { + if (excludeFields.contains(fieldInfo.name)) { + return Status.NO; + } else { + return wrapped.needsField(fieldInfo); + } + } else { + return Status.NO; + } + } + } + + @Override + public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, + int length) throws IOException { + wrapped.binaryField(fieldInfo, value, offset, length); + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) + throws IOException { + wrapped.stringField(fieldInfo, value); + } + + @Override + public void intField(FieldInfo fieldInfo, int value) throws IOException { + wrapped.intField(fieldInfo, value); + } + + @Override + public void longField(FieldInfo fieldInfo, long value) throws IOException { + wrapped.longField(fieldInfo, value); + } + + @Override + public void floatField(FieldInfo fieldInfo, float value) throws IOException { + wrapped.floatField(fieldInfo, value); + } + + @Override + public void doubleField(FieldInfo fieldInfo, double value) + throws IOException { + wrapped.doubleField(fieldInfo, value); + } + + } + + @Override + public StoredFieldsReader clone() { + return new StackedStoredFieldsReader(map, main.clone(), stacked.clone()); + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedTermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedTermVectorsFormat.java new file mode 100644 index 0000000..7c649a9 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedTermVectorsFormat.java @@ -0,0 +1,41 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.codecs.TermVectorsWriter; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +public class StackedTermVectorsFormat extends TermVectorsFormat { + + public StackedTermVectorsFormat(TermVectorsFormat termVectorsFormat) { + // TODO Auto-generated constructor stub + } + + @Override + public TermVectorsReader vectorsReader(SegmentReadState state) + throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public TermVectorsWriter vectorsWriter(Directory directory, String segment, + IOContext context) throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public void files(SegmentInfo info, Set files) throws IOException { + // TODO Auto-generated method stub + + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedTerms.java b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedTerms.java new file mode 100644 index 0000000..1343c53 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/stacked/StackedTerms.java @@ -0,0 +1,256 @@ +package org.apache.lucene.codecs.stacked; + +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class StackedTerms extends Terms { + Terms main, stacked; + StackedMap map; + Comparator cmp; + String field; + + StackedTerms(String field, Terms mainTerms, Terms stackedTerms, StackedMap map) throws IOException { + this.main = mainTerms; + this.stacked = stackedTerms; + this.field = field; + assert this.stacked != null; + this.map = map; + if (main != null) { + cmp = main.getComparator(); + } else { + cmp = stacked.getComparator(); + } + } + + @Override + public TermsEnum iterator(TermsEnum reuse) throws IOException { + return new StackedTermsEnum(reuse); + } + + // Leapfrog enumerator. Iterates (or seeks) to the smallest next term from + // either the main or the stacked enum, and continues to enumerate from + // that enum until it reaches the other enum's term. + // When terms are equal (i.e. there are updated for a term) then a combined + // Doc*Enum is returned. + class StackedTermsEnum extends TermsEnum { + TermsEnum mte = null, ste = null; + BytesRef lastMTerm = null; + BytesRef lastSTerm = null; + boolean inited = false; + boolean needsSNext = false, needsMNext = false; + int m_lessThan_s = 1; + + StackedTermsEnum(TermsEnum reuse) throws IOException { + if (main != null) { + if (reuse instanceof StackedTermsEnum) { + mte = main.iterator(((StackedTermsEnum)reuse).mte); + } else { + mte = main.iterator(reuse); + } + } + ste = stacked.iterator(null); + } + + @Override + public BytesRef next() throws IOException { + if (!inited) { + lastSTerm = ste.next(); + if (mte != null) { + lastMTerm = mte.next(); + } + inited = true; + } else { + if (m_lessThan_s < 0) { + if (mte != null) lastMTerm = mte.next(); + } else if (m_lessThan_s > 0) { + lastSTerm = ste.next(); + } else { + lastSTerm = ste.next(); + if (mte != null) lastMTerm = mte.next(); + } + } + if (lastMTerm != null && lastSTerm != null) { + m_lessThan_s = cmp.compare(lastMTerm, lastSTerm); + if (m_lessThan_s == 0) { // postings for the same term + return lastMTerm; // return whichever + } else if (m_lessThan_s < 0) { // postings from m go now + return lastMTerm; + } else { // postings from s go now + return lastSTerm; + } + } else { + if (lastMTerm != null) { + return lastMTerm; + } + if (lastSTerm != null) { + return lastSTerm; + } + return null; + } + } + + @Override + public Comparator getComparator() { + return cmp; + } + + @Override + public SeekStatus seekCeil(BytesRef text, boolean useCache) + throws IOException { + SeekStatus sStatus = ste.seekCeil(text, useCache); + lastSTerm = ste.term(); + SeekStatus mStatus = SeekStatus.END; + if (mte != null) { + mStatus = mte.seekCeil(text, useCache); + lastMTerm = mte.term(); + } + if (sStatus == SeekStatus.FOUND || mStatus == SeekStatus.FOUND) { + return SeekStatus.FOUND; + } else if (mStatus == SeekStatus.NOT_FOUND || sStatus == SeekStatus.NOT_FOUND) { + return SeekStatus.NOT_FOUND; + } else { + return SeekStatus.END; + } + } + + // XXX + @Override + public void seekExact(long ord) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public BytesRef term() throws IOException { + if (m_lessThan_s == 0) { + return lastMTerm; + } else if (m_lessThan_s > 0) { + return lastSTerm; + } else { + return lastMTerm; + } + } + + // XXX + @Override + public long ord() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int docFreq() throws IOException { + if (m_lessThan_s == 0) { + return mte.docFreq(); // XXX doesn't consider stacked docFreq + } else if (m_lessThan_s > 0) { + return ste.docFreq(); + } else { + return mte.docFreq(); + } + } + + @Override + public long totalTermFreq() throws IOException { + if (m_lessThan_s == 0) { + return mte.totalTermFreq(); // XXX doesn't consider stacked docFreq + } else if (m_lessThan_s > 0) { + return ste.totalTermFreq(); + } else { + return mte.totalTermFreq(); + } + } + + @Override + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) + throws IOException { + if (m_lessThan_s == 0) { // merge old and new postings + return new StackedDocsEnum(field, map, lastSTerm, mte.docs(liveDocs, null, needsFreqs), ste, needsFreqs, liveDocs); + } else if (m_lessThan_s > 0) { // return postings for stacked, remapping + return new StackedDocsEnum(field, map, lastSTerm, null, ste, needsFreqs, liveDocs); + } else { // return original postings + return mte.docs(liveDocs, reuse, needsFreqs); + } + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, + DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + if (m_lessThan_s == 0) { // merge old and new postings + return new StackedDocsAndPositionsEnum(field, map, lastSTerm, mte.docsAndPositions(liveDocs, null, needsOffsets), ste, needsOffsets, liveDocs); + } else if (m_lessThan_s > 0) { // return postings for stacked, remapping + return new StackedDocsAndPositionsEnum(field, map, lastSTerm, null, ste, needsOffsets, liveDocs); + } else { // return original postings + return mte.docsAndPositions(liveDocs, reuse, needsOffsets); + } + } + + } + + @Override + public Comparator getComparator() throws IOException { + return cmp; + } + + // XXX stats? + @Override + public long size() throws IOException { + if (main != null) { + return main.size(); + } else { + return stacked.size(); + } + } + + // XXX stats? + @Override + public long getSumTotalTermFreq() throws IOException { + if (main != null) { + return main.getSumTotalTermFreq(); + } else { + return stacked.getSumTotalTermFreq(); + } + } + + // XXX stats? + @Override + public long getSumDocFreq() throws IOException { + if (main != null) { + return main.getSumDocFreq(); + } else { + return stacked.getSumDocFreq(); + } + } + + // XXX stats? + @Override + public int getDocCount() throws IOException { + if (main != null) { + return main.getDocCount(); + } else { + return stacked.getDocCount(); + } + } + +} diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 9c15987..3f82e9f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -105,20 +105,39 @@ final class SegmentCoreReaders { final PostingsFormat format = codec.postingsFormat(); final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields - fields = format.fieldsProducer(segmentReadState); - assert fields != null; + FieldsProducer mainFields = format.fieldsProducer(segmentReadState); + assert mainFields != null; // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! - norms = codec.normsFormat().docsProducer(segmentReadState); - perDocProducer = codec.docValuesFormat().docsProducer(segmentReadState); + PerDocProducer mainNorms = codec.normsFormat().docsProducer(segmentReadState); + PerDocProducer mainPerDocProducer = codec.docValuesFormat().docsProducer(segmentReadState); - fieldsReaderOrig = si.getCodec().storedFieldsFormat().fieldsReader(cfsDir, si, fieldInfos, context); + StoredFieldsReader mainFieldsReaderOrig = si.getCodec().storedFieldsFormat().fieldsReader(segmentReadState); + TermVectorsReader mainTermVectorsReaderOrig = null; if (si.getHasVectors()) { // open term vector files only as needed - termVectorsReaderOrig = si.getCodec().termVectorsFormat().vectorsReader(cfsDir, si, fieldInfos, context); + mainTermVectorsReaderOrig = si.getCodec().termVectorsFormat().vectorsReader(segmentReadState); } else { - termVectorsReaderOrig = null; + mainTermVectorsReaderOrig = null; + } + + if (si.getHasStacked()) { + si.loadStackedMap(cfsDir); + } + if (si.getStackedMap() != null) { // open stacked reader and wrap format readers + SegmentReadState stacked = new SegmentReadState(cfsDir, stackedSi, stackedFieldInfos, context, termsIndexDivisor); + fields = mainFields; + norms = mainNorms; + perDocProducer = mainPerDocProducer; + fieldsReaderOrig = mainFieldsReaderOrig; + termVectorsReaderOrig = mainTermVectorsReaderOrig; + } else { + fields = mainFields; + norms = mainNorms; + perDocProducer = mainPerDocProducer; + fieldsReaderOrig = mainFieldsReaderOrig; + termVectorsReaderOrig = mainTermVectorsReaderOrig; } success = true; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java index fa3a041..4c0d680 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java @@ -29,6 +29,7 @@ import java.util.Set; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosReader; +import org.apache.lucene.codecs.stacked.StackedMap; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -91,6 +92,8 @@ public final class SegmentInfo implements Cloneable { //TODO: remove when we don't have to support old indexes anymore that had this field private int hasProx = CHECK_FIELDINFO; // True if this segment has any fields with positional information + private String stackedName; // name of the stacked segment with updates for this segment + private StackedMap stackedMap; // map of our id-s to the stacked id-s private FieldInfos fieldInfos; @@ -154,6 +157,7 @@ public final class SegmentInfo implements Cloneable { isCompoundFile = src.isCompoundFile; delCount = src.delCount; codec = src.codec; + stackedName = src.stackedName; } void setDiagnostics(Map diagnostics) { @@ -163,6 +167,10 @@ public final class SegmentInfo implements Cloneable { public Map getDiagnostics() { return diagnostics; } + + public StackedMap getStackedMap() { + return stackedMap; + } /** * Construct a new complete SegmentInfo instance from input. @@ -171,7 +179,7 @@ public final class SegmentInfo implements Cloneable { */ public SegmentInfo(Directory dir, String version, String name, int docCount, long delGen, int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, Map normGen, boolean isCompoundFile, - int delCount, int hasProx, Codec codec, Map diagnostics, int hasVectors) { + int delCount, int hasProx, Codec codec, Map diagnostics, int hasVectors, String ovlName) { this.dir = dir; this.version = version; this.name = name; @@ -187,6 +195,7 @@ public final class SegmentInfo implements Cloneable { this.codec = codec; this.diagnostics = diagnostics; this.hasVectors = hasVectors; + this.stackedName = ovlName; } synchronized void loadFieldInfos(Directory dir, boolean checkCompoundFile) throws IOException { @@ -206,6 +215,10 @@ public final class SegmentInfo implements Cloneable { } } } + + synchronized void loadStackedMap(Directory dir) throws IOException { + + } /** * Returns total size in bytes of all of files used by this segment @@ -228,6 +241,10 @@ public final class SegmentInfo implements Cloneable { return fieldInfos; } + public String getOverlayName() { + return stackedName; + } + public boolean hasDeletions() { // Cases: // @@ -270,6 +287,8 @@ public final class SegmentInfo implements Cloneable { si.version = version; si.hasProx = hasProx; si.hasVectors = hasVectors; + si.stackedName = stackedName; + si.stackedMap = stackedMap; return si; } @@ -390,6 +409,10 @@ public final class SegmentInfo implements Cloneable { public boolean getHasProx() throws IOException { return hasProx == CHECK_FIELDINFO ? getFieldInfos().hasProx() : hasProx == YES; } + + public boolean getHasStacked() { + return stackedName != null; + } /** Can only be called once. */ public void setCodec(Codec codec) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java index 1b21528..8caa265 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java @@ -35,6 +35,7 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -47,6 +48,7 @@ public class TestTermVectorsReader extends LuceneTestCase { private String[] testTerms = {"this", "is", "a", "test"}; private int[][] positions = new int[testTerms.length][]; private Directory dir; + private SegmentReadState state; private SegmentInfo seg; private FieldInfos fieldInfos = new FieldInfos(new FieldInfos.FieldNumberBiMap()); private static int TERM_FREQ = 3; @@ -129,6 +131,7 @@ public class TestTermVectorsReader extends LuceneTestCase { writer.close(); fieldInfos = seg.getFieldInfos(); //new FieldInfos(dir, IndexFileNames.segmentFileName(seg.name, "", IndexFileNames.FIELD_INFOS_EXTENSION)); + state = new SegmentReadState(dir, seg, fieldInfos, newIOContext(random()), 1); } @Override @@ -200,7 +203,7 @@ public class TestTermVectorsReader extends LuceneTestCase { } public void testReader() throws IOException { - TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg, fieldInfos, newIOContext(random())); + TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(state); for (int j = 0; j < 5; j++) { Terms vector = reader.get(j).terms(testFields[0]); assertNotNull(vector); @@ -219,7 +222,7 @@ public class TestTermVectorsReader extends LuceneTestCase { } public void testDocsEnum() throws IOException { - TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg, fieldInfos, newIOContext(random())); + TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(state); for (int j = 0; j < 5; j++) { Terms vector = reader.get(j).terms(testFields[0]); assertNotNull(vector); @@ -246,7 +249,7 @@ public class TestTermVectorsReader extends LuceneTestCase { } public void testPositionReader() throws IOException { - TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg, fieldInfos, newIOContext(random())); + TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(state); BytesRef[] terms; Terms vector = reader.get(0).terms(testFields[0]); assertNotNull(vector); @@ -301,7 +304,7 @@ public class TestTermVectorsReader extends LuceneTestCase { } public void testOffsetReader() throws IOException { - TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg, fieldInfos, newIOContext(random())); + TermVectorsReader reader = Codec.getDefault().termVectorsFormat().vectorsReader(state); Terms vector = reader.get(0).terms(testFields[0]); assertNotNull(vector); TermsEnum termsEnum = vector.iterator(null); @@ -343,7 +346,7 @@ public class TestTermVectorsReader extends LuceneTestCase { public void testBadParams() throws IOException { TermVectorsReader reader = null; try { - reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg, fieldInfos, newIOContext(random())); + reader = Codec.getDefault().termVectorsFormat().vectorsReader(state); //Bad document number, good field number reader.get(50); fail(); @@ -352,7 +355,7 @@ public class TestTermVectorsReader extends LuceneTestCase { } finally { reader.close(); } - reader = Codec.getDefault().termVectorsFormat().vectorsReader(dir, seg, fieldInfos, newIOContext(random())); + reader = Codec.getDefault().termVectorsFormat().vectorsReader(state); //good document number, bad field Terms vector = reader.get(0).terms("f50"); assertNull(vector); diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsFormat.java index 4f0a750..5e9c32d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWTermVectorsFormat.java @@ -23,6 +23,7 @@ import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsWriter; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.LuceneTestCase; @@ -35,8 +36,8 @@ class PreFlexRWTermVectorsFormat extends Lucene3xTermVectorsFormat { } @Override - public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException { - return new Lucene3xTermVectorsReader(directory, segmentInfo, fieldInfos, context) { + public TermVectorsReader vectorsReader(SegmentReadState state) throws IOException { + return new Lucene3xTermVectorsReader(state.dir, state.segmentInfo, state.fieldInfos, state.context) { @Override protected boolean sortTermsByUnicode() { // We carefully peek into stack track above us: if