Separate norm files are created (when adequate) for both compound and non compound segments.
-
+
Term Vectors
Term Vector support is an optional on a field by
@@ -2529,7 +2525,7 @@
-
+
Deleted Documents
The .del file is
optional, and only exists when a segment contains deletions.
@@ -2593,7 +2589,7 @@
-
+
Limitations
Index: lucene/src/site/src/documentation/content/xdocs/fileformats.xml
===================================================================
--- lucene/src/site/src/documentation/content/xdocs/fileformats.xml (revision 1228924)
+++ lucene/src/site/src/documentation/content/xdocs/fileformats.xml (working copy)
@@ -1216,8 +1216,6 @@
bit is one for fields that have term vectors stored, and zero for fields
without term vectors.
-
If the third lowest-order bit is set (0x04), term positions are stored with the term vectors.
-
If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors.
If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field.
If the sixth lowest-order bit is set (0x20), payloads are stored for the indexed field.
If the seventh lowest-order bit is set (0x40), term frequencies and positions omitted for the indexed field.
Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
===================================================================
--- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1228924)
+++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy)
@@ -22,7 +22,6 @@
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collection;
-import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
@@ -35,6 +34,8 @@
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
@@ -43,12 +44,11 @@
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.index.TermVectorMapper;
-import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Searcher;
-import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory; // for javadocs
import org.apache.lucene.util.ArrayUtil;
@@ -192,6 +192,8 @@
private static final long serialVersionUID = 2782195016849084649L;
private static final boolean DEBUG = false;
+
+ private final FieldInfos fieldInfos;
/**
* Sorts term entries into ascending order; also works for
@@ -227,6 +229,7 @@
*/
private MemoryIndex(boolean storeOffsets) {
this.stride = storeOffsets ? 3 : 1;
+ fieldInfos = new FieldInfos();
}
/**
@@ -345,6 +348,8 @@
int numTokens = 0;
int numOverlapTokens = 0;
int pos = -1;
+
+ fieldInfos.add(fieldName, true, true);
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
@@ -389,7 +394,7 @@
}
}
}
-
+
/**
* Creates and returns a searcher that can be used to execute arbitrary
* Lucene queries and to collect the resulting query results as hits.
@@ -750,6 +755,11 @@
}
@Override
+ public FieldInfos getFieldInfos() {
+ return fieldInfos;
+ }
+
+ @Override
public int docFreq(Term term) {
Info info = getInfo(term.field());
int freq = 0;
@@ -1188,22 +1198,6 @@
protected void doClose() {
if (DEBUG) System.err.println("MemoryIndexReader.doClose");
}
-
- // lucene >= 1.9 (remove this method for lucene-1.4.3)
- @Override
- public Collection getFieldNames(FieldOption fieldOption) {
- if (DEBUG) System.err.println("MemoryIndexReader.getFieldNamesOption");
- if (fieldOption == FieldOption.UNINDEXED)
- return Collections.emptySet();
- if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR)
- return Collections.emptySet();
- if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET && stride == 1)
- return Collections.emptySet();
- if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && stride == 1)
- return Collections.emptySet();
-
- return Collections.unmodifiableSet(fields.keySet());
- }
}
Index: lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
===================================================================
--- lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (revision 1228924)
+++ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (working copy)
@@ -16,19 +16,20 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.Reader;
+import java.util.*;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.*;
-
/**
* An {@link Analyzer} used primarily at query time to wrap another analyzer and provide a layer of protection
* which prevents very common words from being passed into queries.
@@ -97,7 +98,7 @@
Analyzer delegate,
IndexReader indexReader,
int maxDocFreq) throws IOException {
- this(matchVersion, delegate, indexReader, indexReader.getFieldNames(IndexReader.FieldOption.INDEXED), maxDocFreq);
+ this(matchVersion, delegate, indexReader, ReaderUtil.getIndexedFields(indexReader), maxDocFreq);
}
/**
@@ -117,7 +118,7 @@
Analyzer delegate,
IndexReader indexReader,
float maxPercentDocs) throws IOException {
- this(matchVersion, delegate, indexReader, indexReader.getFieldNames(IndexReader.FieldOption.INDEXED), maxPercentDocs);
+ this(matchVersion, delegate, indexReader, ReaderUtil.getIndexedFields(indexReader), maxPercentDocs);
}
/**
@@ -214,7 +215,7 @@
@Deprecated
public int addStopWords(IndexReader reader, int maxDocFreq) throws IOException {
int numStopWords = 0;
- Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
+ Collection fieldNames = ReaderUtil.getIndexedFields(reader);
for (Iterator iter = fieldNames.iterator(); iter.hasNext();) {
String fieldName = iter.next();
numStopWords += addStopWords(reader, fieldName, maxDocFreq);
@@ -237,7 +238,7 @@
@Deprecated
public int addStopWords(IndexReader reader, float maxPercentDocs) throws IOException {
int numStopWords = 0;
- Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
+ Collection fieldNames = ReaderUtil.getIndexedFields(reader);
for (Iterator iter = fieldNames.iterator(); iter.hasNext();) {
String fieldName = iter.next();
numStopWords += addStopWords(reader, fieldName, maxPercentDocs);
Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
===================================================================
--- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (revision 1228924)
+++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (working copy)
@@ -29,6 +29,8 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Payload;
@@ -41,6 +43,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ReaderUtil;
/**
* Asserts equality of content and behaviour of two index readers.
@@ -387,18 +390,11 @@
assertEquals(aprioriReader.numDocs(), testReader.numDocs());
- // assert field options
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED), testReader.getFieldNames(IndexReader.FieldOption.INDEXED));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), testReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET));
- assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.UNINDEXED), testReader.getFieldNames(IndexReader.FieldOption.UNINDEXED));
+ final FieldInfos aprioriFieldInfos = ReaderUtil.getMergedFieldInfos(aprioriReader);
- for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) {
+ for (FieldInfo aprioriFieldInfo : aprioriFieldInfos) {
+
+ Object field = aprioriFieldInfo.name;
// test norms as used by normal use
Index: lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestSerialization.java
===================================================================
--- lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestSerialization.java (revision 1228924)
+++ lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestSerialization.java (working copy)
@@ -26,7 +26,9 @@
import org.apache.lucene.document.Field;
import java.io.ByteArrayOutputStream;
+import java.io.ByteArrayInputStream;
import java.io.ObjectOutputStream;
+import java.io.ObjectInputStream;
public class TestSerialization extends LuceneTestCase {
@@ -49,8 +51,13 @@
oos.writeObject(ii);
oos.close();
baos.close();
+
+ final byte[] bytes = baos.toByteArray();
+ ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
+ ObjectInputStream ois = new ObjectInputStream(bais);
+ InstantiatedIndex ii2 = (InstantiatedIndex) ois.readObject();
+ assertNotNull(ii2.getFieldInfos());
dir.close();
-
}
}
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (revision 1228924)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (working copy)
@@ -18,15 +18,11 @@
import java.io.IOException;
import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
@@ -39,7 +35,11 @@
* sync with the latest commit to the store!
*
* Consider using InstantiatedIndex as if it was immutable.
+ *
+ * @deprecated contrib/instantiated will be removed in 4.0;
+ * you can use the memory codec to hold all postings in RAM
*/
+@Deprecated
public class InstantiatedIndexReader extends IndexReader {
private final InstantiatedIndex index;
@@ -67,6 +67,11 @@
}
@Override
+ public FieldInfos getFieldInfos() {
+ return index.getFieldInfos();
+ }
+
+ @Override
public Directory directory() {
throw new UnsupportedOperationException();
}
@@ -203,39 +208,6 @@
// todo perhaps release all associated instances?
}
- @Override
- public Collection getFieldNames(FieldOption fieldOption) {
- Set fieldSet = new HashSet();
- for (FieldSetting fi : index.getFieldSettings().values()) {
- if (fieldOption == IndexReader.FieldOption.ALL) {
- fieldSet.add(fi.fieldName);
- } else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
- fieldSet.add(fi.fieldName);
- } else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
- fieldSet.add(fi.fieldName);
- } else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
- fieldSet.add(fi.fieldName);
- } else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
- fieldSet.add(fi.fieldName);
- } else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
- && fieldOption == IndexReader.FieldOption.TERMVECTOR) {
- fieldSet.add(fi.fieldName);
- } else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
- fieldSet.add(fi.fieldName);
- } else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
- && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
- fieldSet.add(fi.fieldName);
- } else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
- && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
- fieldSet.add(fi.fieldName);
- } else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
- && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
- fieldSet.add(fi.fieldName);
- }
- }
- return fieldSet;
- }
-
/**
* Return the {@link org.apache.lucene.document.Document} at the nth
* position.
@@ -249,7 +221,7 @@
* This can also be seen as a feature for live changes of stored values,
* but be careful! Adding a field with an name unknown to the index
* or to a field with previously no stored values will make
- * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
+ * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldInfos()}
* out of sync, causing problems for instance when merging the
* instantiated index to another index.
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (revision 1228924)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (working copy)
@@ -60,7 +60,11 @@
* Consider using InstantiatedIndex as if it was immutable.
*
* @see org.apache.lucene.index.IndexWriter
+ *
+ * @deprecated contrib/instantiated will be removed in 4.0;
+ * you can use the memory codec to hold all postings in RAM
*/
+@Deprecated
public class InstantiatedIndexWriter implements Closeable {
private PrintStream infoStream = null;
@@ -382,6 +386,9 @@
for (FieldSetting fieldSetting : fieldSettingsByFieldName.values()) {
index.getFieldSettings().merge(fieldSetting);
}
+
+ index.rebuildFieldInfos();
+
// set term index
if (orderedTermsDirty) {
// todo optimize, only update from start position
Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
===================================================================
--- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (revision 1228924)
+++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (working copy)
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -29,12 +28,15 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.ReaderUtil;
/**
* Represented as a coupled graph of class instances, this
@@ -51,7 +53,11 @@
* at the same time as a searcher is reading from it.
*
* Consider using InstantiatedIndex as if it was immutable.
+ *
+ * @deprecated contrib/instantiated will be removed in 4.0;
+ * you can use the memory codec to hold all postings in RAM
*/
+@Deprecated
public class InstantiatedIndex
implements Serializable,Closeable {
@@ -69,6 +75,7 @@
private Map normsByFieldNameAndDocumentNumber;
private FieldSettings fieldSettings;
+ private transient FieldInfos fieldInfos;
/**
* Creates an empty instantiated index for you to fill with data using an {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}.
@@ -84,8 +91,28 @@
orderedTerms = new InstantiatedTerm[0];
documentsByNumber = new InstantiatedDocument[0];
normsByFieldNameAndDocumentNumber = new HashMap();
+ rebuildFieldInfos();
}
+ private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
+ in.defaultReadObject();
+ rebuildFieldInfos();
+ }
+
+ void rebuildFieldInfos() {
+ fieldInfos = new FieldInfos();
+ for(FieldSetting fieldSetting : fieldSettings.values()) {
+ fieldInfos.add(fieldSetting.fieldName,
+ fieldSetting.indexed,
+ fieldSetting.storeTermVector,
+ false, fieldSetting.storePayloads,
+ FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ }
+ }
+
+ public FieldInfos getFieldInfos() {
+ return fieldInfos;
+ }
/**
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
@@ -113,63 +140,18 @@
//throw new IOException("Source index has more than one segment.");
}
-
initialize();
- Collection allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
-
- // load field options
-
- Collection indexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED);
- for (String name : indexedNames) {
- FieldSetting setting = fieldSettings.get(name, true);
- setting.indexed = true;
- }
- Collection indexedNoVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR);
- for (String name : indexedNoVecNames) {
- FieldSetting setting = fieldSettings.get(name, true);
- setting.storeTermVector = false;
- setting.indexed = true;
- }
- Collection indexedVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
- for (String name : indexedVecNames) {
- FieldSetting setting = fieldSettings.get(name, true);
- setting.storeTermVector = true;
- setting.indexed = true;
- }
- Collection payloadNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS);
- for (String name : payloadNames) {
- FieldSetting setting = fieldSettings.get(name, true);
- setting.storePayloads = true;
- }
- Collection termVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR);
- for (String name : termVecNames) {
- FieldSetting setting = fieldSettings.get(name, true);
- setting.storeTermVector = true;
- }
- Collection termVecOffsetNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
- for (String name : termVecOffsetNames) {
- FieldSetting setting = fieldSettings.get(name, true);
+ // load field infos
+ for(FieldInfo fieldInfo : ReaderUtil.getMergedFieldInfos(sourceIndexReader)) {
+ FieldSetting setting = fieldSettings.get(fieldInfo.name, true);
+ setting.indexed = fieldInfo.isIndexed;
+ setting.storeTermVector = fieldInfo.storeTermVector;
setting.storeOffsetWithTermVector = true;
- }
- Collection termVecPosNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
- for (String name : termVecPosNames) {
- FieldSetting setting = fieldSettings.get(name, true);
setting.storePositionWithTermVector = true;
+ setting.storePayloads = fieldInfo.storePayloads;
}
- Collection termVecPosOffNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
- for (String name : termVecPosOffNames) {
- FieldSetting setting = fieldSettings.get(name, true);
- setting.storeOffsetWithTermVector = true;
- setting.storePositionWithTermVector = true;
- }
- Collection unindexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
- for (String name : unindexedNames) {
- FieldSetting setting = fieldSettings.get(name, true);
- setting.indexed = false;
- }
-
documentsByNumber = new InstantiatedDocument[sourceIndexReader.maxDoc()];
if (sourceIndexReader.hasDeletions()) {
@@ -204,17 +186,17 @@
}
}
-
-
// create norms
- for (String fieldName : allFieldNames) {
+ for (FieldInfo fieldInfo : ReaderUtil.getMergedFieldInfos(sourceIndexReader)) {
+ String fieldName = fieldInfo.name;
if (fields == null || fields.contains(fieldName)) {
getNormsByFieldNameAndDocumentNumber().put(fieldName, sourceIndexReader.norms(fieldName));
}
}
// create terms
- for (String fieldName : allFieldNames) {
+ for (FieldInfo fieldInfo : ReaderUtil.getMergedFieldInfos(sourceIndexReader)) {
+ String fieldName = fieldInfo.name;
if (fields == null || fields.contains(fieldName)) {
getTermsByFieldAndText().put(fieldName, new HashMap(5000));
}
@@ -269,15 +251,13 @@
continue; // deleted
}
for (Fieldable field : document.getDocument().getFields()) {
- if (field.isTermVectorStored() && field.isStoreOffsetWithTermVector()) {
- TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
- if (termPositionVector != null) {
- for (int i = 0; i < termPositionVector.getTerms().length; i++) {
- String token = termPositionVector.getTerms()[i];
- InstantiatedTerm term = findTerm(field.name(), token);
- InstantiatedTermDocumentInformation termDocumentInformation = term.getAssociatedDocument(document.getDocumentNumber());
- termDocumentInformation.setTermOffsets(termPositionVector.getOffsets(i));
- }
+ TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
+ if (termPositionVector != null) {
+ for (int i = 0; i < termPositionVector.getTerms().length; i++) {
+ String token = termPositionVector.getTerms()[i];
+ InstantiatedTerm term = findTerm(field.name(), token);
+ InstantiatedTermDocumentInformation termDocumentInformation = term.getAssociatedDocument(document.getDocumentNumber());
+ termDocumentInformation.setTermOffsets(termPositionVector.getOffsets(i));
}
}
}
Index: lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSorter.java
===================================================================
--- lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSorter.java (revision 1228924)
+++ lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSorter.java (working copy)
@@ -26,6 +26,7 @@
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.store.*;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
/** Sort an index by document importance factor. Higher scoring documents are
@@ -195,6 +196,11 @@
}
@Override
+ public FieldInfos getFieldInfos() {
+ return ReaderUtil.getMergedFieldInfos(in);
+ }
+
+ @Override
public Document document(int n, FieldSelector fieldSelector)
throws CorruptIndexException, IOException {
return super.document(newToOld[n], fieldSelector);
Index: lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
===================================================================
--- lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (revision 1228924)
+++ lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (working copy)
@@ -19,7 +19,6 @@
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
-import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
@@ -41,14 +40,11 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DefaultSimilarity;
-import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
/**
@@ -621,8 +617,7 @@
public Query like(int docNum) throws IOException {
if (fieldNames == null) {
// gather list of valid fields from lucene
- Collection fields = ir
- .getFieldNames(IndexReader.FieldOption.INDEXED);
+ Collection fields = ReaderUtil.getIndexedFields(ir);
fieldNames = fields.toArray(new String[fields.size()]);
}
@@ -638,8 +633,7 @@
public Query like(File f) throws IOException {
if (fieldNames == null) {
// gather list of valid fields from lucene
- Collection fields = ir
- .getFieldNames(IndexReader.FieldOption.INDEXED);
+ Collection fields = ReaderUtil.getIndexedFields(ir);
fieldNames = fields.toArray(new String[fields.size()]);
}
Index: lucene/backwards/src/test/org/apache/lucene/index/TestSegmentMerger.java
===================================================================
--- lucene/backwards/src/test/org/apache/lucene/index/TestSegmentMerger.java (revision 1228924)
+++ lucene/backwards/src/test/org/apache/lucene/index/TestSegmentMerger.java (working copy)
@@ -28,7 +28,6 @@
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import java.io.IOException;
-import java.util.Collection;
public class TestSegmentMerger extends LuceneTestCase {
//The variables for the new merged segment
@@ -102,11 +101,6 @@
assertTrue(termDocs != null);
assertTrue(termDocs.next() == true);
- Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
- assertTrue(stored != null);
- //System.out.println("stored size: " + stored.size());
- assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3);
-
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
assertTrue(vector != null);
String [] terms = vector.getTerms();
Index: lucene/backwards/src/test/org/apache/lucene/index/TestIndexReader.java
===================================================================
--- lucene/backwards/src/test/org/apache/lucene/index/TestIndexReader.java (revision 1228924)
+++ lucene/backwards/src/test/org/apache/lucene/index/TestIndexReader.java (working copy)
@@ -38,7 +38,6 @@
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.SetBasedFieldSelector;
-import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.AlreadyClosedException;
@@ -152,13 +151,6 @@
writer.close();
// set up reader
- IndexReader reader = IndexReader.open(d, false);
- Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
- assertTrue(fieldNames.contains("keyword"));
- assertTrue(fieldNames.contains("text"));
- assertTrue(fieldNames.contains("unindexed"));
- assertTrue(fieldNames.contains("unstored"));
- reader.close();
// add more documents
writer = new IndexWriter(
d,
@@ -197,62 +189,6 @@
}
writer.close();
- // verify fields again
- reader = IndexReader.open(d, false);
- fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
- assertEquals(13, fieldNames.size()); // the following fields
- assertTrue(fieldNames.contains("keyword"));
- assertTrue(fieldNames.contains("text"));
- assertTrue(fieldNames.contains("unindexed"));
- assertTrue(fieldNames.contains("unstored"));
- assertTrue(fieldNames.contains("keyword2"));
- assertTrue(fieldNames.contains("text2"));
- assertTrue(fieldNames.contains("unindexed2"));
- assertTrue(fieldNames.contains("unstored2"));
- assertTrue(fieldNames.contains("tvnot"));
- assertTrue(fieldNames.contains("termvector"));
- assertTrue(fieldNames.contains("tvposition"));
- assertTrue(fieldNames.contains("tvoffset"));
- assertTrue(fieldNames.contains("tvpositionoffset"));
-
- // verify that only indexed fields were returned
- fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
- assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields
- assertTrue(fieldNames.contains("keyword"));
- assertTrue(fieldNames.contains("text"));
- assertTrue(fieldNames.contains("unstored"));
- assertTrue(fieldNames.contains("keyword2"));
- assertTrue(fieldNames.contains("text2"));
- assertTrue(fieldNames.contains("unstored2"));
- assertTrue(fieldNames.contains("tvnot"));
- assertTrue(fieldNames.contains("termvector"));
- assertTrue(fieldNames.contains("tvposition"));
- assertTrue(fieldNames.contains("tvoffset"));
- assertTrue(fieldNames.contains("tvpositionoffset"));
-
- // verify that only unindexed fields were returned
- fieldNames = reader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
- assertEquals(2, fieldNames.size()); // the following fields
- assertTrue(fieldNames.contains("unindexed"));
- assertTrue(fieldNames.contains("unindexed2"));
-
- // verify index term vector fields
- fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR);
- assertEquals(1, fieldNames.size()); // 1 field has term vector only
- assertTrue(fieldNames.contains("termvector"));
-
- fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
- assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
- assertTrue(fieldNames.contains("tvposition"));
-
- fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
- assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
- assertTrue(fieldNames.contains("tvoffset"));
-
- fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
- assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
- assertTrue(fieldNames.contains("tvpositionoffset"));
- reader.close();
d.close();
}
@@ -825,35 +761,6 @@
assertEquals("Single segment test differs.", index1.getSequentialSubReaders().length == 1, index2.getSequentialSubReaders().length == 1);
}
- // check field names
- Collection fields1 = index1.getFieldNames(FieldOption.ALL);
- Collection fields2 = index1.getFieldNames(FieldOption.ALL);
- assertEquals("IndexReaders have different numbers of fields.", fields1.size(), fields2.size());
- Iterator it1 = fields1.iterator();
- Iterator it2 = fields1.iterator();
- while (it1.hasNext()) {
- assertEquals("Different field names.", it1.next(), it2.next());
- }
-
- // check norms
- it1 = fields1.iterator();
- while (it1.hasNext()) {
- String curField = it1.next();
- byte[] norms1 = index1.norms(curField);
- byte[] norms2 = index2.norms(curField);
- if (norms1 != null && norms2 != null)
- {
- assertEquals(norms1.length, norms2.length);
- for (int i = 0; i < norms1.length; i++) {
- assertEquals("Norm different for doc " + i + " and field '" + curField + "'.", norms1[i], norms2[i]);
- }
- }
- else
- {
- assertSame(norms1, norms2);
- }
- }
-
// check deletions
for (int i = 0; i < index1.maxDoc(); i++) {
assertEquals("Doc " + i + " only deleted in one index.", index1.isDeleted(i), index2.isDeleted(i));
Index: lucene/backwards/src/test/org/apache/lucene/index/TestFieldsReader.java
===================================================================
--- lucene/backwards/src/test/org/apache/lucene/index/TestFieldsReader.java (revision 1228924)
+++ lucene/backwards/src/test/org/apache/lucene/index/TestFieldsReader.java (working copy)
@@ -91,24 +91,18 @@
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == true);
- assertTrue(field.isStoreOffsetWithTermVector() == true);
- assertTrue(field.isStorePositionWithTermVector() == true);
assertTrue(field.getOmitNorms() == false);
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == false);
- assertTrue(field.isStoreOffsetWithTermVector() == false);
- assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == true);
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
field = doc.getField(DocHelper.NO_TF_KEY);
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == false);
- assertTrue(field.isStoreOffsetWithTermVector() == false);
- assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == false);
assertTrue(field.getIndexOptions() == IndexOptions.DOCS_ONLY);
reader.close();
Index: lucene/backwards/src/test/org/apache/lucene/index/TestParallelReader.java
===================================================================
--- lucene/backwards/src/test/org/apache/lucene/index/TestParallelReader.java (revision 1228924)
+++ lucene/backwards/src/test/org/apache/lucene/index/TestParallelReader.java (working copy)
@@ -19,7 +19,6 @@
import java.io.IOException;
import java.util.Arrays;
-import java.util.Collection;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -76,23 +75,6 @@
queryTest(bq1);
}
- public void testFieldNames() throws Exception {
- Directory dir1 = getDir1(random);
- Directory dir2 = getDir2(random);
- ParallelReader pr = new ParallelReader();
- pr.add(IndexReader.open(dir1, false));
- pr.add(IndexReader.open(dir2, false));
- Collection fieldNames = pr.getFieldNames(IndexReader.FieldOption.ALL);
- assertEquals(4, fieldNames.size());
- assertTrue(fieldNames.contains("f1"));
- assertTrue(fieldNames.contains("f2"));
- assertTrue(fieldNames.contains("f3"));
- assertTrue(fieldNames.contains("f4"));
- pr.close();
- dir1.close();
- dir2.close();
- }
-
public void testDocument() throws IOException {
Directory dir1 = getDir1(random);
Directory dir2 = getDir2(random);
Index: lucene/backwards/src/test/org/apache/lucene/index/TestSegmentReader.java
===================================================================
--- lucene/backwards/src/test/org/apache/lucene/index/TestSegmentReader.java (revision 1228924)
+++ lucene/backwards/src/test/org/apache/lucene/index/TestSegmentReader.java (working copy)
@@ -18,8 +18,6 @@
*/
import java.io.IOException;
-import java.util.Collection;
-import java.util.Iterator;
import java.util.List;
import org.apache.lucene.util.LuceneTestCase;
@@ -87,36 +85,6 @@
deleteReader.close();
}
- public void testGetFieldNameVariations() {
- Collection result = reader.getFieldNames(IndexReader.FieldOption.ALL);
- assertTrue(result != null);
- assertTrue(result.size() == DocHelper.all.size());
- for (Iterator iter = result.iterator(); iter.hasNext();) {
- String s = iter.next();
- //System.out.println("Name: " + s);
- assertTrue(DocHelper.nameValues.containsKey(s) == true || s.equals(""));
- }
- result = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
- assertTrue(result != null);
- assertTrue(result.size() == DocHelper.indexed.size());
- for (Iterator iter = result.iterator(); iter.hasNext();) {
- String s = iter.next();
- assertTrue(DocHelper.indexed.containsKey(s) == true || s.equals(""));
- }
-
- result = reader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
- assertTrue(result != null);
- assertTrue(result.size() == DocHelper.unindexed.size());
- //Get all indexed fields that are storing term vectors
- result = reader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
- assertTrue(result != null);
- assertTrue(result.size() == DocHelper.termvector.size());
-
- result = reader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR);
- assertTrue(result != null);
- assertTrue(result.size() == DocHelper.notermvector.size());
- }
-
public void testTerms() throws IOException {
TermEnum terms = reader.terms();
assertTrue(terms != null);